Blame - arch/arm/nwfpe/softfloat.c - kernel/msm-4.9

blob: e038dd3be9b3c63e019a5be3f77de94f25f4c949 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	===============================================================================
				3
				4	This C source file is part of the SoftFloat IEC/IEEE Floating-point
				5	Arithmetic Package, Release 2.
				6
				7	Written by John R. Hauser. This work was made possible in part by the
				8	International Computer Science Institute, located at Suite 600, 1947 Center
				9	Street, Berkeley, California 94704. Funding was partially provided by the
				10	National Science Foundation under grant MIP-9311980. The original version
				11	of this code was written as part of a project to build a fixed-point vector
				12	processor in collaboration with the University of California at Berkeley,
				13	overseen by Profs. Nelson Morgan and John Wawrzynek. More information
				14	is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
				15	arithmetic/softfloat.html'.
				16
				17	THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
				18	has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
				19	TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
				20	PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
				21	AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
				22
				23	Derivative works are acceptable, even for commercial purposes, so long as
				24	(1) they include prominent notice that the work is derivative, and (2) they
				25	include prominent notice akin to these three paragraphs for those parts of
				26	this code that are retained.
				27
				28	===============================================================================
				29	*/
				30
Nicolas Pitre	c1241c4c	2005-06-23 21:56:46 +0100	[diff] [blame]	31	#include <asm/div64.h>
				32
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	33	#include "fpa11.h"
				34	//#include "milieu.h"
				35	//#include "softfloat.h"
				36
				37	/*
				38	-------------------------------------------------------------------------------
				39	Floating-point rounding mode, extended double-precision rounding precision,
				40	and exception flags.
				41	-------------------------------------------------------------------------------
				42	*/
				43	int8 float_rounding_mode = float_round_nearest_even;
				44	int8 floatx80_rounding_precision = 80;
				45	int8 float_exception_flags;
				46
				47	/*
				48	-------------------------------------------------------------------------------
				49	Primitive arithmetic functions, including multi-word arithmetic, and
				50	division and square root approximations. (Can be specialized to target if
				51	desired.)
				52	-------------------------------------------------------------------------------
				53	*/
				54	#include "softfloat-macros"
				55
				56	/*
				57	-------------------------------------------------------------------------------
				58	Functions and definitions to determine: (1) whether tininess for underflow
				59	is detected before or after rounding by default, (2) what (if anything)
				60	happens when exceptions are raised, (3) how signaling NaNs are distinguished
				61	from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
				62	are propagated from function inputs to output. These details are target-
				63	specific.
				64	-------------------------------------------------------------------------------
				65	*/
				66	#include "softfloat-specialize"
				67
				68	/*
				69	-------------------------------------------------------------------------------
				70	Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
				71	and 7, and returns the properly rounded 32-bit integer corresponding to the
				72	input. If `zSign' is nonzero, the input is negated before being converted
				73	to an integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point
				74	input is simply rounded to an integer, with the inexact exception raised if
				75	the input cannot be represented exactly as an integer. If the fixed-point
				76	input is too large, however, the invalid exception is raised and the largest
				77	positive or negative integer is returned.
				78	-------------------------------------------------------------------------------
				79	*/
				80	static int32 roundAndPackInt32( flag zSign, bits64 absZ )
				81	{
				82	int8 roundingMode;
				83	flag roundNearestEven;
				84	int8 roundIncrement, roundBits;
				85	int32 z;
				86
				87	roundingMode = float_rounding_mode;
				88	roundNearestEven = ( roundingMode == float_round_nearest_even );
				89	roundIncrement = 0x40;
				90	if ( ! roundNearestEven ) {
				91	if ( roundingMode == float_round_to_zero ) {
				92	roundIncrement = 0;
				93	}
				94	else {
				95	roundIncrement = 0x7F;
				96	if ( zSign ) {
				97	if ( roundingMode == float_round_up ) roundIncrement = 0;
				98	}
				99	else {
				100	if ( roundingMode == float_round_down ) roundIncrement = 0;
				101	}
				102	}
				103	}
				104	roundBits = absZ & 0x7F;
				105	absZ = ( absZ + roundIncrement )>>7;
				106	absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
				107	z = absZ;
				108	if ( zSign ) z = - z;
				109	if ( ( absZ>>32 ) \|\| ( z && ( ( z < 0 ) ^ zSign ) ) ) {
				110	float_exception_flags \|= float_flag_invalid;
				111	return zSign ? 0x80000000 : 0x7FFFFFFF;
				112	}
				113	if ( roundBits ) float_exception_flags \|= float_flag_inexact;
				114	return z;
				115
				116	}
				117
				118	/*
				119	-------------------------------------------------------------------------------
				120	Returns the fraction bits of the single-precision floating-point value `a'.
				121	-------------------------------------------------------------------------------
				122	*/
				123	INLINE bits32 extractFloat32Frac( float32 a )
				124	{
				125
				126	return a & 0x007FFFFF;
				127
				128	}
				129
				130	/*
				131	-------------------------------------------------------------------------------
				132	Returns the exponent bits of the single-precision floating-point value `a'.
				133	-------------------------------------------------------------------------------
				134	*/
				135	INLINE int16 extractFloat32Exp( float32 a )
				136	{
				137
				138	return ( a>>23 ) & 0xFF;
				139
				140	}
				141
				142	/*
				143	-------------------------------------------------------------------------------
				144	Returns the sign bit of the single-precision floating-point value `a'.
				145	-------------------------------------------------------------------------------
				146	*/
				147	#if 0 /* in softfloat.h */
				148	INLINE flag extractFloat32Sign( float32 a )
				149	{
				150
				151	return a>>31;
				152
				153	}
				154	#endif
				155
				156	/*
				157	-------------------------------------------------------------------------------
				158	Normalizes the subnormal single-precision floating-point value represented
				159	by the denormalized significand `aSig'. The normalized exponent and
				160	significand are stored at the locations pointed to by `zExpPtr' and
				161	`zSigPtr', respectively.
				162	-------------------------------------------------------------------------------
				163	*/
				164	static void
				165	normalizeFloat32Subnormal( bits32 aSig, int16 zExpPtr, bits32 zSigPtr )
				166	{
				167	int8 shiftCount;
				168
				169	shiftCount = countLeadingZeros32( aSig ) - 8;
				170	*zSigPtr = aSig<<shiftCount;
				171	*zExpPtr = 1 - shiftCount;
				172
				173	}
				174
				175	/*
				176	-------------------------------------------------------------------------------
				177	Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
				178	single-precision floating-point value, returning the result. After being
				179	shifted into the proper positions, the three fields are simply added
				180	together to form the result. This means that any integer portion of `zSig'
				181	will be added into the exponent. Since a properly normalized significand
				182	will have an integer portion equal to 1, the `zExp' input should be 1 less
				183	than the desired result exponent whenever `zSig' is a complete, normalized
				184	significand.
				185	-------------------------------------------------------------------------------
				186	*/
				187	INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
				188	{
				189	#if 0
				190	float32 f;
				191	__asm__("@ packFloat32 \n\
				192	mov %0, %1, asl #31 \n\
				193	orr %0, %2, asl #23 \n\
				194	orr %0, %3"
				195	: /* no outputs */
				196	: "g" (f), "g" (zSign), "g" (zExp), "g" (zSig)
				197	: "cc");
				198	return f;
				199	#else
				200	return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
				201	#endif
				202	}
				203
				204	/*
				205	-------------------------------------------------------------------------------
				206	Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				207	and significand `zSig', and returns the proper single-precision floating-
				208	point value corresponding to the abstract input. Ordinarily, the abstract
				209	value is simply rounded and packed into the single-precision format, with
				210	the inexact exception raised if the abstract input cannot be represented
				211	exactly. If the abstract value is too large, however, the overflow and
				212	inexact exceptions are raised and an infinity or maximal finite value is
				213	returned. If the abstract value is too small, the input value is rounded to
				214	a subnormal number, and the underflow and inexact exceptions are raised if
				215	the abstract input cannot be represented exactly as a subnormal single-
				216	precision floating-point number.
				217	The input significand `zSig' has its binary point between bits 30
				218	and 29, which is 7 bits to the left of the usual location. This shifted
				219	significand must be normalized or smaller. If `zSig' is not normalized,
				220	`zExp' must be 0; in that case, the result returned is a subnormal number,
				221	and it must not require rounding. In the usual case that `zSig' is
				222	normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
				223	The handling of underflow and overflow follows the IEC/IEEE Standard for
				224	Binary Floating-point Arithmetic.
				225	-------------------------------------------------------------------------------
				226	*/
				227	static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
				228	{
				229	int8 roundingMode;
				230	flag roundNearestEven;
				231	int8 roundIncrement, roundBits;
				232	flag isTiny;
				233
				234	roundingMode = float_rounding_mode;
				235	roundNearestEven = ( roundingMode == float_round_nearest_even );
				236	roundIncrement = 0x40;
				237	if ( ! roundNearestEven ) {
				238	if ( roundingMode == float_round_to_zero ) {
				239	roundIncrement = 0;
				240	}
				241	else {
				242	roundIncrement = 0x7F;
				243	if ( zSign ) {
				244	if ( roundingMode == float_round_up ) roundIncrement = 0;
				245	}
				246	else {
				247	if ( roundingMode == float_round_down ) roundIncrement = 0;
				248	}
				249	}
				250	}
				251	roundBits = zSig & 0x7F;
				252	if ( 0xFD <= (bits16) zExp ) {
				253	if ( ( 0xFD < zExp )
				254	\|\| ( ( zExp == 0xFD )
				255	&& ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
				256	) {
				257	float_raise( float_flag_overflow \| float_flag_inexact );
				258	return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
				259	}
				260	if ( zExp < 0 ) {
				261	isTiny =
				262	( float_detect_tininess == float_tininess_before_rounding )
				263	\|\| ( zExp < -1 )
				264	\|\| ( zSig + roundIncrement < 0x80000000 );
				265	shift32RightJamming( zSig, - zExp, &zSig );
				266	zExp = 0;
				267	roundBits = zSig & 0x7F;
				268	if ( isTiny && roundBits ) float_raise( float_flag_underflow );
				269	}
				270	}
				271	if ( roundBits ) float_exception_flags \|= float_flag_inexact;
				272	zSig = ( zSig + roundIncrement )>>7;
				273	zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
				274	if ( zSig == 0 ) zExp = 0;
				275	return packFloat32( zSign, zExp, zSig );
				276
				277	}
				278
				279	/*
				280	-------------------------------------------------------------------------------
				281	Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				282	and significand `zSig', and returns the proper single-precision floating-
				283	point value corresponding to the abstract input. This routine is just like
				284	`roundAndPackFloat32' except that `zSig' does not have to be normalized in
				285	any way. In all cases, `zExp' must be 1 less than the ``true'' floating-
				286	point exponent.
				287	-------------------------------------------------------------------------------
				288	*/
				289	static float32
				290	normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
				291	{
				292	int8 shiftCount;
				293
				294	shiftCount = countLeadingZeros32( zSig ) - 1;
				295	return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
				296
				297	}
				298
				299	/*
				300	-------------------------------------------------------------------------------
				301	Returns the fraction bits of the double-precision floating-point value `a'.
				302	-------------------------------------------------------------------------------
				303	*/
				304	INLINE bits64 extractFloat64Frac( float64 a )
				305	{
				306
				307	return a & LIT64( 0x000FFFFFFFFFFFFF );
				308
				309	}
				310
				311	/*
				312	-------------------------------------------------------------------------------
				313	Returns the exponent bits of the double-precision floating-point value `a'.
				314	-------------------------------------------------------------------------------
				315	*/
				316	INLINE int16 extractFloat64Exp( float64 a )
				317	{
				318
				319	return ( a>>52 ) & 0x7FF;
				320
				321	}
				322
				323	/*
				324	-------------------------------------------------------------------------------
				325	Returns the sign bit of the double-precision floating-point value `a'.
				326	-------------------------------------------------------------------------------
				327	*/
				328	#if 0 /* in softfloat.h */
				329	INLINE flag extractFloat64Sign( float64 a )
				330	{
				331
				332	return a>>63;
				333
				334	}
				335	#endif
				336
				337	/*
				338	-------------------------------------------------------------------------------
				339	Normalizes the subnormal double-precision floating-point value represented
				340	by the denormalized significand `aSig'. The normalized exponent and
				341	significand are stored at the locations pointed to by `zExpPtr' and
				342	`zSigPtr', respectively.
				343	-------------------------------------------------------------------------------
				344	*/
				345	static void
				346	normalizeFloat64Subnormal( bits64 aSig, int16 zExpPtr, bits64 zSigPtr )
				347	{
				348	int8 shiftCount;
				349
				350	shiftCount = countLeadingZeros64( aSig ) - 11;
				351	*zSigPtr = aSig<<shiftCount;
				352	*zExpPtr = 1 - shiftCount;
				353
				354	}
				355
				356	/*
				357	-------------------------------------------------------------------------------
				358	Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
				359	double-precision floating-point value, returning the result. After being
				360	shifted into the proper positions, the three fields are simply added
				361	together to form the result. This means that any integer portion of `zSig'
				362	will be added into the exponent. Since a properly normalized significand
				363	will have an integer portion equal to 1, the `zExp' input should be 1 less
				364	than the desired result exponent whenever `zSig' is a complete, normalized
				365	significand.
				366	-------------------------------------------------------------------------------
				367	*/
				368	INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
				369	{
				370
				371	return ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig;
				372
				373	}
				374
				375	/*
				376	-------------------------------------------------------------------------------
				377	Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				378	and significand `zSig', and returns the proper double-precision floating-
				379	point value corresponding to the abstract input. Ordinarily, the abstract
				380	value is simply rounded and packed into the double-precision format, with
				381	the inexact exception raised if the abstract input cannot be represented
				382	exactly. If the abstract value is too large, however, the overflow and
				383	inexact exceptions are raised and an infinity or maximal finite value is
				384	returned. If the abstract value is too small, the input value is rounded to
				385	a subnormal number, and the underflow and inexact exceptions are raised if
				386	the abstract input cannot be represented exactly as a subnormal double-
				387	precision floating-point number.
				388	The input significand `zSig' has its binary point between bits 62
				389	and 61, which is 10 bits to the left of the usual location. This shifted
				390	significand must be normalized or smaller. If `zSig' is not normalized,
				391	`zExp' must be 0; in that case, the result returned is a subnormal number,
				392	and it must not require rounding. In the usual case that `zSig' is
				393	normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
				394	The handling of underflow and overflow follows the IEC/IEEE Standard for
				395	Binary Floating-point Arithmetic.
				396	-------------------------------------------------------------------------------
				397	*/
				398	static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
				399	{
				400	int8 roundingMode;
				401	flag roundNearestEven;
				402	int16 roundIncrement, roundBits;
				403	flag isTiny;
				404
				405	roundingMode = float_rounding_mode;
				406	roundNearestEven = ( roundingMode == float_round_nearest_even );
				407	roundIncrement = 0x200;
				408	if ( ! roundNearestEven ) {
				409	if ( roundingMode == float_round_to_zero ) {
				410	roundIncrement = 0;
				411	}
				412	else {
				413	roundIncrement = 0x3FF;
				414	if ( zSign ) {
				415	if ( roundingMode == float_round_up ) roundIncrement = 0;
				416	}
				417	else {
				418	if ( roundingMode == float_round_down ) roundIncrement = 0;
				419	}
				420	}
				421	}
				422	roundBits = zSig & 0x3FF;
				423	if ( 0x7FD <= (bits16) zExp ) {
				424	if ( ( 0x7FD < zExp )
				425	\|\| ( ( zExp == 0x7FD )
				426	&& ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
				427	) {
				428	//register int lr = __builtin_return_address(0);
				429	//printk("roundAndPackFloat64 called from 0x%08x\n",lr);
				430	float_raise( float_flag_overflow \| float_flag_inexact );
				431	return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );
				432	}
				433	if ( zExp < 0 ) {
				434	isTiny =
				435	( float_detect_tininess == float_tininess_before_rounding )
				436	\|\| ( zExp < -1 )
				437	\|\| ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
				438	shift64RightJamming( zSig, - zExp, &zSig );
				439	zExp = 0;
				440	roundBits = zSig & 0x3FF;
				441	if ( isTiny && roundBits ) float_raise( float_flag_underflow );
				442	}
				443	}
				444	if ( roundBits ) float_exception_flags \|= float_flag_inexact;
				445	zSig = ( zSig + roundIncrement )>>10;
				446	zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
				447	if ( zSig == 0 ) zExp = 0;
				448	return packFloat64( zSign, zExp, zSig );
				449
				450	}
				451
				452	/*
				453	-------------------------------------------------------------------------------
				454	Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				455	and significand `zSig', and returns the proper double-precision floating-
				456	point value corresponding to the abstract input. This routine is just like
				457	`roundAndPackFloat64' except that `zSig' does not have to be normalized in
				458	any way. In all cases, `zExp' must be 1 less than the ``true'' floating-
				459	point exponent.
				460	-------------------------------------------------------------------------------
				461	*/
				462	static float64
				463	normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
				464	{
				465	int8 shiftCount;
				466
				467	shiftCount = countLeadingZeros64( zSig ) - 1;
				468	return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
				469
				470	}
				471
				472	#ifdef FLOATX80
				473
				474	/*
				475	-------------------------------------------------------------------------------
				476	Returns the fraction bits of the extended double-precision floating-point
				477	value `a'.
				478	-------------------------------------------------------------------------------
				479	*/
				480	INLINE bits64 extractFloatx80Frac( floatx80 a )
				481	{
				482
				483	return a.low;
				484
				485	}
				486
				487	/*
				488	-------------------------------------------------------------------------------
				489	Returns the exponent bits of the extended double-precision floating-point
				490	value `a'.
				491	-------------------------------------------------------------------------------
				492	*/
				493	INLINE int32 extractFloatx80Exp( floatx80 a )
				494	{
				495
				496	return a.high & 0x7FFF;
				497
				498	}
				499
				500	/*
				501	-------------------------------------------------------------------------------
				502	Returns the sign bit of the extended double-precision floating-point value
				503	`a'.
				504	-------------------------------------------------------------------------------
				505	*/
				506	INLINE flag extractFloatx80Sign( floatx80 a )
				507	{
				508
				509	return a.high>>15;
				510
				511	}
				512
				513	/*
				514	-------------------------------------------------------------------------------
				515	Normalizes the subnormal extended double-precision floating-point value
				516	represented by the denormalized significand `aSig'. The normalized exponent
				517	and significand are stored at the locations pointed to by `zExpPtr' and
				518	`zSigPtr', respectively.
				519	-------------------------------------------------------------------------------
				520	*/
				521	static void
				522	normalizeFloatx80Subnormal( bits64 aSig, int32 zExpPtr, bits64 zSigPtr )
				523	{
				524	int8 shiftCount;
				525
				526	shiftCount = countLeadingZeros64( aSig );
				527	*zSigPtr = aSig<<shiftCount;
				528	*zExpPtr = 1 - shiftCount;
				529
				530	}
				531
				532	/*
				533	-------------------------------------------------------------------------------
				534	Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
				535	extended double-precision floating-point value, returning the result.
				536	-------------------------------------------------------------------------------
				537	*/
				538	INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
				539	{
				540	floatx80 z;
				541
				542	z.low = zSig;
				543	z.high = ( ( (bits16) zSign )<<15 ) + zExp;
				544	return z;
				545
				546	}
				547
				548	/*
				549	-------------------------------------------------------------------------------
				550	Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				551	and extended significand formed by the concatenation of `zSig0' and `zSig1',
				552	and returns the proper extended double-precision floating-point value
				553	corresponding to the abstract input. Ordinarily, the abstract value is
				554	rounded and packed into the extended double-precision format, with the
				555	inexact exception raised if the abstract input cannot be represented
				556	exactly. If the abstract value is too large, however, the overflow and
				557	inexact exceptions are raised and an infinity or maximal finite value is
				558	returned. If the abstract value is too small, the input value is rounded to
				559	a subnormal number, and the underflow and inexact exceptions are raised if
				560	the abstract input cannot be represented exactly as a subnormal extended
				561	double-precision floating-point number.
				562	If `roundingPrecision' is 32 or 64, the result is rounded to the same
				563	number of bits as single or double precision, respectively. Otherwise, the
				564	result is rounded to the full precision of the extended double-precision
				565	format.
				566	The input significand must be normalized or smaller. If the input
				567	significand is not normalized, `zExp' must be 0; in that case, the result
				568	returned is a subnormal number, and it must not require rounding. The
				569	handling of underflow and overflow follows the IEC/IEEE Standard for Binary
				570	Floating-point Arithmetic.
				571	-------------------------------------------------------------------------------
				572	*/
				573	static floatx80
				574	roundAndPackFloatx80(
				575	int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
				576	)
				577	{
				578	int8 roundingMode;
				579	flag roundNearestEven, increment, isTiny;
				580	int64 roundIncrement, roundMask, roundBits;
				581
				582	roundingMode = float_rounding_mode;
				583	roundNearestEven = ( roundingMode == float_round_nearest_even );
				584	if ( roundingPrecision == 80 ) goto precision80;
				585	if ( roundingPrecision == 64 ) {
				586	roundIncrement = LIT64( 0x0000000000000400 );
				587	roundMask = LIT64( 0x00000000000007FF );
				588	}
				589	else if ( roundingPrecision == 32 ) {
				590	roundIncrement = LIT64( 0x0000008000000000 );
				591	roundMask = LIT64( 0x000000FFFFFFFFFF );
				592	}
				593	else {
				594	goto precision80;
				595	}
				596	zSig0 \|= ( zSig1 != 0 );
				597	if ( ! roundNearestEven ) {
				598	if ( roundingMode == float_round_to_zero ) {
				599	roundIncrement = 0;
				600	}
				601	else {
				602	roundIncrement = roundMask;
				603	if ( zSign ) {
				604	if ( roundingMode == float_round_up ) roundIncrement = 0;
				605	}
				606	else {
				607	if ( roundingMode == float_round_down ) roundIncrement = 0;
				608	}
				609	}
				610	}
				611	roundBits = zSig0 & roundMask;
				612	if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
				613	if ( ( 0x7FFE < zExp )
				614	\|\| ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
				615	) {
				616	goto overflow;
				617	}
				618	if ( zExp <= 0 ) {
				619	isTiny =
				620	( float_detect_tininess == float_tininess_before_rounding )
				621	\|\| ( zExp < 0 )
				622	\|\| ( zSig0 <= zSig0 + roundIncrement );
				623	shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
				624	zExp = 0;
				625	roundBits = zSig0 & roundMask;
				626	if ( isTiny && roundBits ) float_raise( float_flag_underflow );
				627	if ( roundBits ) float_exception_flags \|= float_flag_inexact;
				628	zSig0 += roundIncrement;
				629	if ( (sbits64) zSig0 < 0 ) zExp = 1;
				630	roundIncrement = roundMask + 1;
				631	if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
				632	roundMask \|= roundIncrement;
				633	}
				634	zSig0 &= ~ roundMask;
				635	return packFloatx80( zSign, zExp, zSig0 );
				636	}
				637	}
				638	if ( roundBits ) float_exception_flags \|= float_flag_inexact;
				639	zSig0 += roundIncrement;
				640	if ( zSig0 < roundIncrement ) {
				641	++zExp;
				642	zSig0 = LIT64( 0x8000000000000000 );
				643	}
				644	roundIncrement = roundMask + 1;
				645	if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
				646	roundMask \|= roundIncrement;
				647	}
				648	zSig0 &= ~ roundMask;
				649	if ( zSig0 == 0 ) zExp = 0;
				650	return packFloatx80( zSign, zExp, zSig0 );
				651	precision80:
				652	increment = ( (sbits64) zSig1 < 0 );
				653	if ( ! roundNearestEven ) {
				654	if ( roundingMode == float_round_to_zero ) {
				655	increment = 0;
				656	}
				657	else {
				658	if ( zSign ) {
				659	increment = ( roundingMode == float_round_down ) && zSig1;
				660	}
				661	else {
				662	increment = ( roundingMode == float_round_up ) && zSig1;
				663	}
				664	}
				665	}
				666	if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
				667	if ( ( 0x7FFE < zExp )
				668	\|\| ( ( zExp == 0x7FFE )
				669	&& ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
				670	&& increment
				671	)
				672	) {
				673	roundMask = 0;
				674	overflow:
				675	float_raise( float_flag_overflow \| float_flag_inexact );
				676	if ( ( roundingMode == float_round_to_zero )
				677	\|\| ( zSign && ( roundingMode == float_round_up ) )
				678	\|\| ( ! zSign && ( roundingMode == float_round_down ) )
				679	) {
				680	return packFloatx80( zSign, 0x7FFE, ~ roundMask );
				681	}
				682	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				683	}
				684	if ( zExp <= 0 ) {
				685	isTiny =
				686	( float_detect_tininess == float_tininess_before_rounding )
				687	\|\| ( zExp < 0 )
				688	\|\| ! increment
				689	\|\| ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
				690	shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
				691	zExp = 0;
				692	if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
				693	if ( zSig1 ) float_exception_flags \|= float_flag_inexact;
				694	if ( roundNearestEven ) {
				695	increment = ( (sbits64) zSig1 < 0 );
				696	}
				697	else {
				698	if ( zSign ) {
				699	increment = ( roundingMode == float_round_down ) && zSig1;
				700	}
				701	else {
				702	increment = ( roundingMode == float_round_up ) && zSig1;
				703	}
				704	}
				705	if ( increment ) {
				706	++zSig0;
				707	zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );
				708	if ( (sbits64) zSig0 < 0 ) zExp = 1;
				709	}
				710	return packFloatx80( zSign, zExp, zSig0 );
				711	}
				712	}
				713	if ( zSig1 ) float_exception_flags \|= float_flag_inexact;
				714	if ( increment ) {
				715	++zSig0;
				716	if ( zSig0 == 0 ) {
				717	++zExp;
				718	zSig0 = LIT64( 0x8000000000000000 );
				719	}
				720	else {
				721	zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );
				722	}
				723	}
				724	else {
				725	if ( zSig0 == 0 ) zExp = 0;
				726	}
				727
				728	return packFloatx80( zSign, zExp, zSig0 );
				729	}
				730
				731	/*
				732	-------------------------------------------------------------------------------
				733	Takes an abstract floating-point value having sign `zSign', exponent
				734	`zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
				735	and returns the proper extended double-precision floating-point value
				736	corresponding to the abstract input. This routine is just like
				737	`roundAndPackFloatx80' except that the input significand does not have to be
				738	normalized.
				739	-------------------------------------------------------------------------------
				740	*/
				741	static floatx80
				742	normalizeRoundAndPackFloatx80(
				743	int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
				744	)
				745	{
				746	int8 shiftCount;
				747
				748	if ( zSig0 == 0 ) {
				749	zSig0 = zSig1;
				750	zSig1 = 0;
				751	zExp -= 64;
				752	}
				753	shiftCount = countLeadingZeros64( zSig0 );
				754	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
				755	zExp -= shiftCount;
				756	return
				757	roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
				758
				759	}
				760
				761	#endif
				762
				763	/*
				764	-------------------------------------------------------------------------------
				765	Returns the result of converting the 32-bit two's complement integer `a' to
				766	the single-precision floating-point format. The conversion is performed
				767	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				768	-------------------------------------------------------------------------------
				769	*/
				770	float32 int32_to_float32( int32 a )
				771	{
				772	flag zSign;
				773
				774	if ( a == 0 ) return 0;
				775	if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
				776	zSign = ( a < 0 );
				777	return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
				778
				779	}
				780
				781	/*
				782	-------------------------------------------------------------------------------
				783	Returns the result of converting the 32-bit two's complement integer `a' to
				784	the double-precision floating-point format. The conversion is performed
				785	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				786	-------------------------------------------------------------------------------
				787	*/
				788	float64 int32_to_float64( int32 a )
				789	{
				790	flag aSign;
				791	uint32 absA;
				792	int8 shiftCount;
				793	bits64 zSig;
				794
				795	if ( a == 0 ) return 0;
				796	aSign = ( a < 0 );
				797	absA = aSign ? - a : a;
				798	shiftCount = countLeadingZeros32( absA ) + 21;
				799	zSig = absA;
				800	return packFloat64( aSign, 0x432 - shiftCount, zSig<<shiftCount );
				801
				802	}
				803
				804	#ifdef FLOATX80
				805
				806	/*
				807	-------------------------------------------------------------------------------
				808	Returns the result of converting the 32-bit two's complement integer `a'
				809	to the extended double-precision floating-point format. The conversion
				810	is performed according to the IEC/IEEE Standard for Binary Floating-point
				811	Arithmetic.
				812	-------------------------------------------------------------------------------
				813	*/
				814	floatx80 int32_to_floatx80( int32 a )
				815	{
				816	flag zSign;
				817	uint32 absA;
				818	int8 shiftCount;
				819	bits64 zSig;
				820
				821	if ( a == 0 ) return packFloatx80( 0, 0, 0 );
				822	zSign = ( a < 0 );
				823	absA = zSign ? - a : a;
				824	shiftCount = countLeadingZeros32( absA ) + 32;
				825	zSig = absA;
				826	return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
				827
				828	}
				829
				830	#endif
				831
				832	/*
				833	-------------------------------------------------------------------------------
				834	Returns the result of converting the single-precision floating-point value
				835	`a' to the 32-bit two's complement integer format. The conversion is
				836	performed according to the IEC/IEEE Standard for Binary Floating-point
				837	Arithmetic---which means in particular that the conversion is rounded
				838	according to the current rounding mode. If `a' is a NaN, the largest
				839	positive integer is returned. Otherwise, if the conversion overflows, the
				840	largest integer with the same sign as `a' is returned.
				841	-------------------------------------------------------------------------------
				842	*/
				843	int32 float32_to_int32( float32 a )
				844	{
				845	flag aSign;
				846	int16 aExp, shiftCount;
				847	bits32 aSig;
				848	bits64 zSig;
				849
				850	aSig = extractFloat32Frac( a );
				851	aExp = extractFloat32Exp( a );
				852	aSign = extractFloat32Sign( a );
				853	if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
				854	if ( aExp ) aSig \|= 0x00800000;
				855	shiftCount = 0xAF - aExp;
				856	zSig = aSig;
				857	zSig <<= 32;
				858	if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig );
				859	return roundAndPackInt32( aSign, zSig );
				860
				861	}
				862
				863	/*
				864	-------------------------------------------------------------------------------
				865	Returns the result of converting the single-precision floating-point value
				866	`a' to the 32-bit two's complement integer format. The conversion is
				867	performed according to the IEC/IEEE Standard for Binary Floating-point
				868	Arithmetic, except that the conversion is always rounded toward zero. If
				869	`a' is a NaN, the largest positive integer is returned. Otherwise, if the
				870	conversion overflows, the largest integer with the same sign as `a' is
				871	returned.
				872	-------------------------------------------------------------------------------
				873	*/
				874	int32 float32_to_int32_round_to_zero( float32 a )
				875	{
				876	flag aSign;
				877	int16 aExp, shiftCount;
				878	bits32 aSig;
				879	int32 z;
				880
				881	aSig = extractFloat32Frac( a );
				882	aExp = extractFloat32Exp( a );
				883	aSign = extractFloat32Sign( a );
				884	shiftCount = aExp - 0x9E;
				885	if ( 0 <= shiftCount ) {
				886	if ( a == 0xCF000000 ) return 0x80000000;
				887	float_raise( float_flag_invalid );
				888	if ( ! aSign \|\| ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
				889	return 0x80000000;
				890	}
				891	else if ( aExp <= 0x7E ) {
				892	if ( aExp \| aSig ) float_exception_flags \|= float_flag_inexact;
				893	return 0;
				894	}
				895	aSig = ( aSig \| 0x00800000 )<<8;
				896	z = aSig>>( - shiftCount );
				897	if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
				898	float_exception_flags \|= float_flag_inexact;
				899	}
				900	return aSign ? - z : z;
				901
				902	}
				903
				904	/*
				905	-------------------------------------------------------------------------------
				906	Returns the result of converting the single-precision floating-point value
				907	`a' to the double-precision floating-point format. The conversion is
				908	performed according to the IEC/IEEE Standard for Binary Floating-point
				909	Arithmetic.
				910	-------------------------------------------------------------------------------
				911	*/
				912	float64 float32_to_float64( float32 a )
				913	{
				914	flag aSign;
				915	int16 aExp;
				916	bits32 aSig;
				917
				918	aSig = extractFloat32Frac( a );
				919	aExp = extractFloat32Exp( a );
				920	aSign = extractFloat32Sign( a );
				921	if ( aExp == 0xFF ) {
				922	if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
				923	return packFloat64( aSign, 0x7FF, 0 );
				924	}
				925	if ( aExp == 0 ) {
				926	if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
				927	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				928	--aExp;
				929	}
				930	return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
				931
				932	}
				933
				934	#ifdef FLOATX80
				935
				936	/*
				937	-------------------------------------------------------------------------------
				938	Returns the result of converting the single-precision floating-point value
				939	`a' to the extended double-precision floating-point format. The conversion
				940	is performed according to the IEC/IEEE Standard for Binary Floating-point
				941	Arithmetic.
				942	-------------------------------------------------------------------------------
				943	*/
				944	floatx80 float32_to_floatx80( float32 a )
				945	{
				946	flag aSign;
				947	int16 aExp;
				948	bits32 aSig;
				949
				950	aSig = extractFloat32Frac( a );
				951	aExp = extractFloat32Exp( a );
				952	aSign = extractFloat32Sign( a );
				953	if ( aExp == 0xFF ) {
				954	if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
				955	return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				956	}
				957	if ( aExp == 0 ) {
				958	if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
				959	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				960	}
				961	aSig \|= 0x00800000;
				962	return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
				963
				964	}
				965
				966	#endif
				967
				968	/*
				969	-------------------------------------------------------------------------------
				970	Rounds the single-precision floating-point value `a' to an integer, and
				971	returns the result as a single-precision floating-point value. The
				972	operation is performed according to the IEC/IEEE Standard for Binary
				973	Floating-point Arithmetic.
				974	-------------------------------------------------------------------------------
				975	*/
				976	float32 float32_round_to_int( float32 a )
				977	{
				978	flag aSign;
				979	int16 aExp;
				980	bits32 lastBitMask, roundBitsMask;
				981	int8 roundingMode;
				982	float32 z;
				983
				984	aExp = extractFloat32Exp( a );
				985	if ( 0x96 <= aExp ) {
				986	if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
				987	return propagateFloat32NaN( a, a );
				988	}
				989	return a;
				990	}
				991	if ( aExp <= 0x7E ) {
				992	if ( (bits32) ( a<<1 ) == 0 ) return a;
				993	float_exception_flags \|= float_flag_inexact;
				994	aSign = extractFloat32Sign( a );
				995	switch ( float_rounding_mode ) {
				996	case float_round_nearest_even:
				997	if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
				998	return packFloat32( aSign, 0x7F, 0 );
				999	}
				1000	break;
				1001	case float_round_down:
				1002	return aSign ? 0xBF800000 : 0;
				1003	case float_round_up:
				1004	return aSign ? 0x80000000 : 0x3F800000;
				1005	}
				1006	return packFloat32( aSign, 0, 0 );
				1007	}
				1008	lastBitMask = 1;
				1009	lastBitMask <<= 0x96 - aExp;
				1010	roundBitsMask = lastBitMask - 1;
				1011	z = a;
				1012	roundingMode = float_rounding_mode;
				1013	if ( roundingMode == float_round_nearest_even ) {
				1014	z += lastBitMask>>1;
				1015	if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
				1016	}
				1017	else if ( roundingMode != float_round_to_zero ) {
				1018	if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
				1019	z += roundBitsMask;
				1020	}
				1021	}
				1022	z &= ~ roundBitsMask;
				1023	if ( z != a ) float_exception_flags \|= float_flag_inexact;
				1024	return z;
				1025
				1026	}
				1027
				1028	/*
				1029	-------------------------------------------------------------------------------
				1030	Returns the result of adding the absolute values of the single-precision
				1031	floating-point values `a' and `b'. If `zSign' is true, the sum is negated
				1032	before being returned. `zSign' is ignored if the result is a NaN. The
				1033	addition is performed according to the IEC/IEEE Standard for Binary
				1034	Floating-point Arithmetic.
				1035	-------------------------------------------------------------------------------
				1036	*/
				1037	static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
				1038	{
				1039	int16 aExp, bExp, zExp;
				1040	bits32 aSig, bSig, zSig;
				1041	int16 expDiff;
				1042
				1043	aSig = extractFloat32Frac( a );
				1044	aExp = extractFloat32Exp( a );
				1045	bSig = extractFloat32Frac( b );
				1046	bExp = extractFloat32Exp( b );
				1047	expDiff = aExp - bExp;
				1048	aSig <<= 6;
				1049	bSig <<= 6;
				1050	if ( 0 < expDiff ) {
				1051	if ( aExp == 0xFF ) {
				1052	if ( aSig ) return propagateFloat32NaN( a, b );
				1053	return a;
				1054	}
				1055	if ( bExp == 0 ) {
				1056	--expDiff;
				1057	}
				1058	else {
				1059	bSig \|= 0x20000000;
				1060	}
				1061	shift32RightJamming( bSig, expDiff, &bSig );
				1062	zExp = aExp;
				1063	}
				1064	else if ( expDiff < 0 ) {
				1065	if ( bExp == 0xFF ) {
				1066	if ( bSig ) return propagateFloat32NaN( a, b );
				1067	return packFloat32( zSign, 0xFF, 0 );
				1068	}
				1069	if ( aExp == 0 ) {
				1070	++expDiff;
				1071	}
				1072	else {
				1073	aSig \|= 0x20000000;
				1074	}
				1075	shift32RightJamming( aSig, - expDiff, &aSig );
				1076	zExp = bExp;
				1077	}
				1078	else {
				1079	if ( aExp == 0xFF ) {
				1080	if ( aSig \| bSig ) return propagateFloat32NaN( a, b );
				1081	return a;
				1082	}
				1083	if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
				1084	zSig = 0x40000000 + aSig + bSig;
				1085	zExp = aExp;
				1086	goto roundAndPack;
				1087	}
				1088	aSig \|= 0x20000000;
				1089	zSig = ( aSig + bSig )<<1;
				1090	--zExp;
				1091	if ( (sbits32) zSig < 0 ) {
				1092	zSig = aSig + bSig;
				1093	++zExp;
				1094	}
				1095	roundAndPack:
				1096	return roundAndPackFloat32( zSign, zExp, zSig );
				1097
				1098	}
				1099
				1100	/*
				1101	-------------------------------------------------------------------------------
				1102	Returns the result of subtracting the absolute values of the single-
				1103	precision floating-point values `a' and `b'. If `zSign' is true, the
				1104	difference is negated before being returned. `zSign' is ignored if the
				1105	result is a NaN. The subtraction is performed according to the IEC/IEEE
				1106	Standard for Binary Floating-point Arithmetic.
				1107	-------------------------------------------------------------------------------
				1108	*/
				1109	static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
				1110	{
				1111	int16 aExp, bExp, zExp;
				1112	bits32 aSig, bSig, zSig;
				1113	int16 expDiff;
				1114
				1115	aSig = extractFloat32Frac( a );
				1116	aExp = extractFloat32Exp( a );
				1117	bSig = extractFloat32Frac( b );
				1118	bExp = extractFloat32Exp( b );
				1119	expDiff = aExp - bExp;
				1120	aSig <<= 7;
				1121	bSig <<= 7;
				1122	if ( 0 < expDiff ) goto aExpBigger;
				1123	if ( expDiff < 0 ) goto bExpBigger;
				1124	if ( aExp == 0xFF ) {
				1125	if ( aSig \| bSig ) return propagateFloat32NaN( a, b );
				1126	float_raise( float_flag_invalid );
				1127	return float32_default_nan;
				1128	}
				1129	if ( aExp == 0 ) {
				1130	aExp = 1;
				1131	bExp = 1;
				1132	}
				1133	if ( bSig < aSig ) goto aBigger;
				1134	if ( aSig < bSig ) goto bBigger;
				1135	return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
				1136	bExpBigger:
				1137	if ( bExp == 0xFF ) {
				1138	if ( bSig ) return propagateFloat32NaN( a, b );
				1139	return packFloat32( zSign ^ 1, 0xFF, 0 );
				1140	}
				1141	if ( aExp == 0 ) {
				1142	++expDiff;
				1143	}
				1144	else {
				1145	aSig \|= 0x40000000;
				1146	}
				1147	shift32RightJamming( aSig, - expDiff, &aSig );
				1148	bSig \|= 0x40000000;
				1149	bBigger:
				1150	zSig = bSig - aSig;
				1151	zExp = bExp;
				1152	zSign ^= 1;
				1153	goto normalizeRoundAndPack;
				1154	aExpBigger:
				1155	if ( aExp == 0xFF ) {
				1156	if ( aSig ) return propagateFloat32NaN( a, b );
				1157	return a;
				1158	}
				1159	if ( bExp == 0 ) {
				1160	--expDiff;
				1161	}
				1162	else {
				1163	bSig \|= 0x40000000;
				1164	}
				1165	shift32RightJamming( bSig, expDiff, &bSig );
				1166	aSig \|= 0x40000000;
				1167	aBigger:
				1168	zSig = aSig - bSig;
				1169	zExp = aExp;
				1170	normalizeRoundAndPack:
				1171	--zExp;
				1172	return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
				1173
				1174	}
				1175
				1176	/*
				1177	-------------------------------------------------------------------------------
				1178	Returns the result of adding the single-precision floating-point values `a'
				1179	and `b'. The operation is performed according to the IEC/IEEE Standard for
				1180	Binary Floating-point Arithmetic.
				1181	-------------------------------------------------------------------------------
				1182	*/
				1183	float32 float32_add( float32 a, float32 b )
				1184	{
				1185	flag aSign, bSign;
				1186
				1187	aSign = extractFloat32Sign( a );
				1188	bSign = extractFloat32Sign( b );
				1189	if ( aSign == bSign ) {
				1190	return addFloat32Sigs( a, b, aSign );
				1191	}
				1192	else {
				1193	return subFloat32Sigs( a, b, aSign );
				1194	}
				1195
				1196	}
				1197
				1198	/*
				1199	-------------------------------------------------------------------------------
				1200	Returns the result of subtracting the single-precision floating-point values
				1201	`a' and `b'. The operation is performed according to the IEC/IEEE Standard
				1202	for Binary Floating-point Arithmetic.
				1203	-------------------------------------------------------------------------------
				1204	*/
				1205	float32 float32_sub( float32 a, float32 b )
				1206	{
				1207	flag aSign, bSign;
				1208
				1209	aSign = extractFloat32Sign( a );
				1210	bSign = extractFloat32Sign( b );
				1211	if ( aSign == bSign ) {
				1212	return subFloat32Sigs( a, b, aSign );
				1213	}
				1214	else {
				1215	return addFloat32Sigs( a, b, aSign );
				1216	}
				1217
				1218	}
				1219
				1220	/*
				1221	-------------------------------------------------------------------------------
				1222	Returns the result of multiplying the single-precision floating-point values
				1223	`a' and `b'. The operation is performed according to the IEC/IEEE Standard
				1224	for Binary Floating-point Arithmetic.
				1225	-------------------------------------------------------------------------------
				1226	*/
				1227	float32 float32_mul( float32 a, float32 b )
				1228	{
				1229	flag aSign, bSign, zSign;
				1230	int16 aExp, bExp, zExp;
				1231	bits32 aSig, bSig;
				1232	bits64 zSig64;
				1233	bits32 zSig;
				1234
				1235	aSig = extractFloat32Frac( a );
				1236	aExp = extractFloat32Exp( a );
				1237	aSign = extractFloat32Sign( a );
				1238	bSig = extractFloat32Frac( b );
				1239	bExp = extractFloat32Exp( b );
				1240	bSign = extractFloat32Sign( b );
				1241	zSign = aSign ^ bSign;
				1242	if ( aExp == 0xFF ) {
				1243	if ( aSig \|\| ( ( bExp == 0xFF ) && bSig ) ) {
				1244	return propagateFloat32NaN( a, b );
				1245	}
				1246	if ( ( bExp \| bSig ) == 0 ) {
				1247	float_raise( float_flag_invalid );
				1248	return float32_default_nan;
				1249	}
				1250	return packFloat32( zSign, 0xFF, 0 );
				1251	}
				1252	if ( bExp == 0xFF ) {
				1253	if ( bSig ) return propagateFloat32NaN( a, b );
				1254	if ( ( aExp \| aSig ) == 0 ) {
				1255	float_raise( float_flag_invalid );
				1256	return float32_default_nan;
				1257	}
				1258	return packFloat32( zSign, 0xFF, 0 );
				1259	}
				1260	if ( aExp == 0 ) {
				1261	if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
				1262	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1263	}
				1264	if ( bExp == 0 ) {
				1265	if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
				1266	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
				1267	}
				1268	zExp = aExp + bExp - 0x7F;
				1269	aSig = ( aSig \| 0x00800000 )<<7;
				1270	bSig = ( bSig \| 0x00800000 )<<8;
				1271	shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
				1272	zSig = zSig64;
				1273	if ( 0 <= (sbits32) ( zSig<<1 ) ) {
				1274	zSig <<= 1;
				1275	--zExp;
				1276	}
				1277	return roundAndPackFloat32( zSign, zExp, zSig );
				1278
				1279	}
				1280
				1281	/*
				1282	-------------------------------------------------------------------------------
				1283	Returns the result of dividing the single-precision floating-point value `a'
				1284	by the corresponding value `b'. The operation is performed according to the
				1285	IEC/IEEE Standard for Binary Floating-point Arithmetic.
				1286	-------------------------------------------------------------------------------
				1287	*/
				1288	float32 float32_div( float32 a, float32 b )
				1289	{
				1290	flag aSign, bSign, zSign;
				1291	int16 aExp, bExp, zExp;
				1292	bits32 aSig, bSig, zSig;
				1293
				1294	aSig = extractFloat32Frac( a );
				1295	aExp = extractFloat32Exp( a );
				1296	aSign = extractFloat32Sign( a );
				1297	bSig = extractFloat32Frac( b );
				1298	bExp = extractFloat32Exp( b );
				1299	bSign = extractFloat32Sign( b );
				1300	zSign = aSign ^ bSign;
				1301	if ( aExp == 0xFF ) {
				1302	if ( aSig ) return propagateFloat32NaN( a, b );
				1303	if ( bExp == 0xFF ) {
				1304	if ( bSig ) return propagateFloat32NaN( a, b );
				1305	float_raise( float_flag_invalid );
				1306	return float32_default_nan;
				1307	}
				1308	return packFloat32( zSign, 0xFF, 0 );
				1309	}
				1310	if ( bExp == 0xFF ) {
				1311	if ( bSig ) return propagateFloat32NaN( a, b );
				1312	return packFloat32( zSign, 0, 0 );
				1313	}
				1314	if ( bExp == 0 ) {
				1315	if ( bSig == 0 ) {
				1316	if ( ( aExp \| aSig ) == 0 ) {
				1317	float_raise( float_flag_invalid );
				1318	return float32_default_nan;
				1319	}
				1320	float_raise( float_flag_divbyzero );
				1321	return packFloat32( zSign, 0xFF, 0 );
				1322	}
				1323	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
				1324	}
				1325	if ( aExp == 0 ) {
				1326	if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
				1327	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1328	}
				1329	zExp = aExp - bExp + 0x7D;
				1330	aSig = ( aSig \| 0x00800000 )<<7;
				1331	bSig = ( bSig \| 0x00800000 )<<8;
				1332	if ( bSig <= ( aSig + aSig ) ) {
				1333	aSig >>= 1;
				1334	++zExp;
				1335	}
Nicolas Pitre	c1241c4c	2005-06-23 21:56:46 +0100	[diff] [blame]	1336	{
				1337	bits64 tmp = ( (bits64) aSig )<<32;
				1338	do_div( tmp, bSig );
				1339	zSig = tmp;
				1340	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1341	if ( ( zSig & 0x3F ) == 0 ) {
				1342	zSig \|= ( ( (bits64) bSig ) * zSig != ( (bits64) aSig )<<32 );
				1343	}
				1344	return roundAndPackFloat32( zSign, zExp, zSig );
				1345
				1346	}
				1347
				1348	/*
				1349	-------------------------------------------------------------------------------
				1350	Returns the remainder of the single-precision floating-point value `a'
				1351	with respect to the corresponding value `b'. The operation is performed
				1352	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				1353	-------------------------------------------------------------------------------
				1354	*/
				1355	float32 float32_rem( float32 a, float32 b )
				1356	{
				1357	flag aSign, bSign, zSign;
				1358	int16 aExp, bExp, expDiff;
				1359	bits32 aSig, bSig;
				1360	bits32 q;
				1361	bits64 aSig64, bSig64, q64;
				1362	bits32 alternateASig;
				1363	sbits32 sigMean;
				1364
				1365	aSig = extractFloat32Frac( a );
				1366	aExp = extractFloat32Exp( a );
				1367	aSign = extractFloat32Sign( a );
				1368	bSig = extractFloat32Frac( b );
				1369	bExp = extractFloat32Exp( b );
				1370	bSign = extractFloat32Sign( b );
				1371	if ( aExp == 0xFF ) {
				1372	if ( aSig \|\| ( ( bExp == 0xFF ) && bSig ) ) {
				1373	return propagateFloat32NaN( a, b );
				1374	}
				1375	float_raise( float_flag_invalid );
				1376	return float32_default_nan;
				1377	}
				1378	if ( bExp == 0xFF ) {
				1379	if ( bSig ) return propagateFloat32NaN( a, b );
				1380	return a;
				1381	}
				1382	if ( bExp == 0 ) {
				1383	if ( bSig == 0 ) {
				1384	float_raise( float_flag_invalid );
				1385	return float32_default_nan;
				1386	}
				1387	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
				1388	}
				1389	if ( aExp == 0 ) {
				1390	if ( aSig == 0 ) return a;
				1391	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1392	}
				1393	expDiff = aExp - bExp;
				1394	aSig \|= 0x00800000;
				1395	bSig \|= 0x00800000;
				1396	if ( expDiff < 32 ) {
				1397	aSig <<= 8;
				1398	bSig <<= 8;
				1399	if ( expDiff < 0 ) {
				1400	if ( expDiff < -1 ) return a;
				1401	aSig >>= 1;
				1402	}
				1403	q = ( bSig <= aSig );
				1404	if ( q ) aSig -= bSig;
				1405	if ( 0 < expDiff ) {
Nicolas Pitre	c1241c4c	2005-06-23 21:56:46 +0100	[diff] [blame]	1406	bits64 tmp = ( (bits64) aSig )<<32;
				1407	do_div( tmp, bSig );
				1408	q = tmp;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1409	q >>= 32 - expDiff;
				1410	bSig >>= 2;
				1411	aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
				1412	}
				1413	else {
				1414	aSig >>= 2;
				1415	bSig >>= 2;
				1416	}
				1417	}
				1418	else {
				1419	if ( bSig <= aSig ) aSig -= bSig;
				1420	aSig64 = ( (bits64) aSig )<<40;
				1421	bSig64 = ( (bits64) bSig )<<40;
				1422	expDiff -= 64;
				1423	while ( 0 < expDiff ) {
				1424	q64 = estimateDiv128To64( aSig64, 0, bSig64 );
				1425	q64 = ( 2 < q64 ) ? q64 - 2 : 0;
				1426	aSig64 = - ( ( bSig * q64 )<<38 );
				1427	expDiff -= 62;
				1428	}
				1429	expDiff += 64;
				1430	q64 = estimateDiv128To64( aSig64, 0, bSig64 );
				1431	q64 = ( 2 < q64 ) ? q64 - 2 : 0;
				1432	q = q64>>( 64 - expDiff );
				1433	bSig <<= 6;
				1434	aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
				1435	}
				1436	do {
				1437	alternateASig = aSig;
				1438	++q;
				1439	aSig -= bSig;
				1440	} while ( 0 <= (sbits32) aSig );
				1441	sigMean = aSig + alternateASig;
				1442	if ( ( sigMean < 0 ) \|\| ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
				1443	aSig = alternateASig;
				1444	}
				1445	zSign = ( (sbits32) aSig < 0 );
				1446	if ( zSign ) aSig = - aSig;
				1447	return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
				1448
				1449	}
				1450
				1451	/*
				1452	-------------------------------------------------------------------------------
				1453	Returns the square root of the single-precision floating-point value `a'.
				1454	The operation is performed according to the IEC/IEEE Standard for Binary
				1455	Floating-point Arithmetic.
				1456	-------------------------------------------------------------------------------
				1457	*/
				1458	float32 float32_sqrt( float32 a )
				1459	{
				1460	flag aSign;
				1461	int16 aExp, zExp;
				1462	bits32 aSig, zSig;
				1463	bits64 rem, term;
				1464
				1465	aSig = extractFloat32Frac( a );
				1466	aExp = extractFloat32Exp( a );
				1467	aSign = extractFloat32Sign( a );
				1468	if ( aExp == 0xFF ) {
				1469	if ( aSig ) return propagateFloat32NaN( a, 0 );
				1470	if ( ! aSign ) return a;
				1471	float_raise( float_flag_invalid );
				1472	return float32_default_nan;
				1473	}
				1474	if ( aSign ) {
				1475	if ( ( aExp \| aSig ) == 0 ) return a;
				1476	float_raise( float_flag_invalid );
				1477	return float32_default_nan;
				1478	}
				1479	if ( aExp == 0 ) {
				1480	if ( aSig == 0 ) return 0;
				1481	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1482	}
				1483	zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
				1484	aSig = ( aSig \| 0x00800000 )<<8;
				1485	zSig = estimateSqrt32( aExp, aSig ) + 2;
				1486	if ( ( zSig & 0x7F ) <= 5 ) {
				1487	if ( zSig < 2 ) {
				1488	zSig = 0xFFFFFFFF;
				1489	}
				1490	else {
				1491	aSig >>= aExp & 1;
				1492	term = ( (bits64) zSig ) * zSig;
				1493	rem = ( ( (bits64) aSig )<<32 ) - term;
				1494	while ( (sbits64) rem < 0 ) {
				1495	--zSig;
				1496	rem += ( ( (bits64) zSig )<<1 ) \| 1;
				1497	}
				1498	zSig \|= ( rem != 0 );
				1499	}
				1500	}
				1501	shift32RightJamming( zSig, 1, &zSig );
				1502	return roundAndPackFloat32( 0, zExp, zSig );
				1503
				1504	}
				1505
				1506	/*
				1507	-------------------------------------------------------------------------------
				1508	Returns 1 if the single-precision floating-point value `a' is equal to the
				1509	corresponding value `b', and 0 otherwise. The comparison is performed
				1510	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				1511	-------------------------------------------------------------------------------
				1512	*/
				1513	flag float32_eq( float32 a, float32 b )
				1514	{
				1515
				1516	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				1517	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				1518	) {
				1519	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
				1520	float_raise( float_flag_invalid );
				1521	}
				1522	return 0;
				1523	}
				1524	return ( a == b ) \|\| ( (bits32) ( ( a \| b )<<1 ) == 0 );
				1525
				1526	}
				1527
				1528	/*
				1529	-------------------------------------------------------------------------------
				1530	Returns 1 if the single-precision floating-point value `a' is less than or
				1531	equal to the corresponding value `b', and 0 otherwise. The comparison is
				1532	performed according to the IEC/IEEE Standard for Binary Floating-point
				1533	Arithmetic.
				1534	-------------------------------------------------------------------------------
				1535	*/
				1536	flag float32_le( float32 a, float32 b )
				1537	{
				1538	flag aSign, bSign;
				1539
				1540	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				1541	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				1542	) {
				1543	float_raise( float_flag_invalid );
				1544	return 0;
				1545	}
				1546	aSign = extractFloat32Sign( a );
				1547	bSign = extractFloat32Sign( b );
				1548	if ( aSign != bSign ) return aSign \|\| ( (bits32) ( ( a \| b )<<1 ) == 0 );
				1549	return ( a == b ) \|\| ( aSign ^ ( a < b ) );
				1550
				1551	}
				1552
				1553	/*
				1554	-------------------------------------------------------------------------------
				1555	Returns 1 if the single-precision floating-point value `a' is less than
				1556	the corresponding value `b', and 0 otherwise. The comparison is performed
				1557	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				1558	-------------------------------------------------------------------------------
				1559	*/
				1560	flag float32_lt( float32 a, float32 b )
				1561	{
				1562	flag aSign, bSign;
				1563
				1564	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				1565	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				1566	) {
				1567	float_raise( float_flag_invalid );
				1568	return 0;
				1569	}
				1570	aSign = extractFloat32Sign( a );
				1571	bSign = extractFloat32Sign( b );
				1572	if ( aSign != bSign ) return aSign && ( (bits32) ( ( a \| b )<<1 ) != 0 );
				1573	return ( a != b ) && ( aSign ^ ( a < b ) );
				1574
				1575	}
				1576
				1577	/*
				1578	-------------------------------------------------------------------------------
				1579	Returns 1 if the single-precision floating-point value `a' is equal to the
				1580	corresponding value `b', and 0 otherwise. The invalid exception is raised
				1581	if either operand is a NaN. Otherwise, the comparison is performed
				1582	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				1583	-------------------------------------------------------------------------------
				1584	*/
				1585	flag float32_eq_signaling( float32 a, float32 b )
				1586	{
				1587
				1588	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				1589	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				1590	) {
				1591	float_raise( float_flag_invalid );
				1592	return 0;
				1593	}
				1594	return ( a == b ) \|\| ( (bits32) ( ( a \| b )<<1 ) == 0 );
				1595
				1596	}
				1597
				1598	/*
				1599	-------------------------------------------------------------------------------
				1600	Returns 1 if the single-precision floating-point value `a' is less than or
				1601	equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
				1602	cause an exception. Otherwise, the comparison is performed according to the
				1603	IEC/IEEE Standard for Binary Floating-point Arithmetic.
				1604	-------------------------------------------------------------------------------
				1605	*/
				1606	flag float32_le_quiet( float32 a, float32 b )
				1607	{
				1608	flag aSign, bSign;
				1609	//int16 aExp, bExp;
				1610
				1611	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				1612	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				1613	) {
				1614	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
				1615	float_raise( float_flag_invalid );
				1616	}
				1617	return 0;
				1618	}
				1619	aSign = extractFloat32Sign( a );
				1620	bSign = extractFloat32Sign( b );
				1621	if ( aSign != bSign ) return aSign \|\| ( (bits32) ( ( a \| b )<<1 ) == 0 );
				1622	return ( a == b ) \|\| ( aSign ^ ( a < b ) );
				1623
				1624	}
				1625
				1626	/*
				1627	-------------------------------------------------------------------------------
				1628	Returns 1 if the single-precision floating-point value `a' is less than
				1629	the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				1630	exception. Otherwise, the comparison is performed according to the IEC/IEEE
				1631	Standard for Binary Floating-point Arithmetic.
				1632	-------------------------------------------------------------------------------
				1633	*/
				1634	flag float32_lt_quiet( float32 a, float32 b )
				1635	{
				1636	flag aSign, bSign;
				1637
				1638	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				1639	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				1640	) {
				1641	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
				1642	float_raise( float_flag_invalid );
				1643	}
				1644	return 0;
				1645	}
				1646	aSign = extractFloat32Sign( a );
				1647	bSign = extractFloat32Sign( b );
				1648	if ( aSign != bSign ) return aSign && ( (bits32) ( ( a \| b )<<1 ) != 0 );
				1649	return ( a != b ) && ( aSign ^ ( a < b ) );
				1650
				1651	}
				1652
				1653	/*
				1654	-------------------------------------------------------------------------------
				1655	Returns the result of converting the double-precision floating-point value
				1656	`a' to the 32-bit two's complement integer format. The conversion is
				1657	performed according to the IEC/IEEE Standard for Binary Floating-point
				1658	Arithmetic---which means in particular that the conversion is rounded
				1659	according to the current rounding mode. If `a' is a NaN, the largest
				1660	positive integer is returned. Otherwise, if the conversion overflows, the
				1661	largest integer with the same sign as `a' is returned.
				1662	-------------------------------------------------------------------------------
				1663	*/
				1664	int32 float64_to_int32( float64 a )
				1665	{
				1666	flag aSign;
				1667	int16 aExp, shiftCount;
				1668	bits64 aSig;
				1669
				1670	aSig = extractFloat64Frac( a );
				1671	aExp = extractFloat64Exp( a );
				1672	aSign = extractFloat64Sign( a );
				1673	if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
				1674	if ( aExp ) aSig \|= LIT64( 0x0010000000000000 );
				1675	shiftCount = 0x42C - aExp;
				1676	if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
				1677	return roundAndPackInt32( aSign, aSig );
				1678
				1679	}
				1680
				1681	/*
				1682	-------------------------------------------------------------------------------
				1683	Returns the result of converting the double-precision floating-point value
				1684	`a' to the 32-bit two's complement integer format. The conversion is
				1685	performed according to the IEC/IEEE Standard for Binary Floating-point
				1686	Arithmetic, except that the conversion is always rounded toward zero. If
				1687	`a' is a NaN, the largest positive integer is returned. Otherwise, if the
				1688	conversion overflows, the largest integer with the same sign as `a' is
				1689	returned.
				1690	-------------------------------------------------------------------------------
				1691	*/
				1692	int32 float64_to_int32_round_to_zero( float64 a )
				1693	{
				1694	flag aSign;
				1695	int16 aExp, shiftCount;
				1696	bits64 aSig, savedASig;
				1697	int32 z;
				1698
				1699	aSig = extractFloat64Frac( a );
				1700	aExp = extractFloat64Exp( a );
				1701	aSign = extractFloat64Sign( a );
				1702	shiftCount = 0x433 - aExp;
				1703	if ( shiftCount < 21 ) {
				1704	if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
				1705	goto invalid;
				1706	}
				1707	else if ( 52 < shiftCount ) {
				1708	if ( aExp \|\| aSig ) float_exception_flags \|= float_flag_inexact;
				1709	return 0;
				1710	}
				1711	aSig \|= LIT64( 0x0010000000000000 );
				1712	savedASig = aSig;
				1713	aSig >>= shiftCount;
				1714	z = aSig;
				1715	if ( aSign ) z = - z;
				1716	if ( ( z < 0 ) ^ aSign ) {
				1717	invalid:
				1718	float_exception_flags \|= float_flag_invalid;
				1719	return aSign ? 0x80000000 : 0x7FFFFFFF;
				1720	}
				1721	if ( ( aSig<<shiftCount ) != savedASig ) {
				1722	float_exception_flags \|= float_flag_inexact;
				1723	}
				1724	return z;
				1725
				1726	}
				1727
				1728	/*
				1729	-------------------------------------------------------------------------------
				1730	Returns the result of converting the double-precision floating-point value
				1731	`a' to the 32-bit two's complement unsigned integer format. The conversion
				1732	is performed according to the IEC/IEEE Standard for Binary Floating-point
				1733	Arithmetic---which means in particular that the conversion is rounded
				1734	according to the current rounding mode. If `a' is a NaN, the largest
				1735	positive integer is returned. Otherwise, if the conversion overflows, the
				1736	largest positive integer is returned.
				1737	-------------------------------------------------------------------------------
				1738	*/
				1739	int32 float64_to_uint32( float64 a )
				1740	{
				1741	flag aSign;
				1742	int16 aExp, shiftCount;
				1743	bits64 aSig;
				1744
				1745	aSig = extractFloat64Frac( a );
				1746	aExp = extractFloat64Exp( a );
				1747	aSign = 0; //extractFloat64Sign( a );
				1748	//if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
				1749	if ( aExp ) aSig \|= LIT64( 0x0010000000000000 );
				1750	shiftCount = 0x42C - aExp;
				1751	if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
				1752	return roundAndPackInt32( aSign, aSig );
				1753	}
				1754
				1755	/*
				1756	-------------------------------------------------------------------------------
				1757	Returns the result of converting the double-precision floating-point value
				1758	`a' to the 32-bit two's complement integer format. The conversion is
				1759	performed according to the IEC/IEEE Standard for Binary Floating-point
				1760	Arithmetic, except that the conversion is always rounded toward zero. If
				1761	`a' is a NaN, the largest positive integer is returned. Otherwise, if the
				1762	conversion overflows, the largest positive integer is returned.
				1763	-------------------------------------------------------------------------------
				1764	*/
				1765	int32 float64_to_uint32_round_to_zero( float64 a )
				1766	{
				1767	flag aSign;
				1768	int16 aExp, shiftCount;
				1769	bits64 aSig, savedASig;
				1770	int32 z;
				1771
				1772	aSig = extractFloat64Frac( a );
				1773	aExp = extractFloat64Exp( a );
				1774	aSign = extractFloat64Sign( a );
				1775	shiftCount = 0x433 - aExp;
				1776	if ( shiftCount < 21 ) {
				1777	if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
				1778	goto invalid;
				1779	}
				1780	else if ( 52 < shiftCount ) {
				1781	if ( aExp \|\| aSig ) float_exception_flags \|= float_flag_inexact;
				1782	return 0;
				1783	}
				1784	aSig \|= LIT64( 0x0010000000000000 );
				1785	savedASig = aSig;
				1786	aSig >>= shiftCount;
				1787	z = aSig;
				1788	if ( aSign ) z = - z;
				1789	if ( ( z < 0 ) ^ aSign ) {
				1790	invalid:
				1791	float_exception_flags \|= float_flag_invalid;
				1792	return aSign ? 0x80000000 : 0x7FFFFFFF;
				1793	}
				1794	if ( ( aSig<<shiftCount ) != savedASig ) {
				1795	float_exception_flags \|= float_flag_inexact;
				1796	}
				1797	return z;
				1798	}
				1799
				1800	/*
				1801	-------------------------------------------------------------------------------
				1802	Returns the result of converting the double-precision floating-point value
				1803	`a' to the single-precision floating-point format. The conversion is
				1804	performed according to the IEC/IEEE Standard for Binary Floating-point
				1805	Arithmetic.
				1806	-------------------------------------------------------------------------------
				1807	*/
				1808	float32 float64_to_float32( float64 a )
				1809	{
				1810	flag aSign;
				1811	int16 aExp;
				1812	bits64 aSig;
				1813	bits32 zSig;
				1814
				1815	aSig = extractFloat64Frac( a );
				1816	aExp = extractFloat64Exp( a );
				1817	aSign = extractFloat64Sign( a );
				1818	if ( aExp == 0x7FF ) {
				1819	if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
				1820	return packFloat32( aSign, 0xFF, 0 );
				1821	}
				1822	shift64RightJamming( aSig, 22, &aSig );
				1823	zSig = aSig;
				1824	if ( aExp \|\| zSig ) {
				1825	zSig \|= 0x40000000;
				1826	aExp -= 0x381;
				1827	}
				1828	return roundAndPackFloat32( aSign, aExp, zSig );
				1829
				1830	}
				1831
				1832	#ifdef FLOATX80
				1833
				1834	/*
				1835	-------------------------------------------------------------------------------
				1836	Returns the result of converting the double-precision floating-point value
				1837	`a' to the extended double-precision floating-point format. The conversion
				1838	is performed according to the IEC/IEEE Standard for Binary Floating-point
				1839	Arithmetic.
				1840	-------------------------------------------------------------------------------
				1841	*/
				1842	floatx80 float64_to_floatx80( float64 a )
				1843	{
				1844	flag aSign;
				1845	int16 aExp;
				1846	bits64 aSig;
				1847
				1848	aSig = extractFloat64Frac( a );
				1849	aExp = extractFloat64Exp( a );
				1850	aSign = extractFloat64Sign( a );
				1851	if ( aExp == 0x7FF ) {
				1852	if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
				1853	return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				1854	}
				1855	if ( aExp == 0 ) {
				1856	if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
				1857	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				1858	}
				1859	return
				1860	packFloatx80(
				1861	aSign, aExp + 0x3C00, ( aSig \| LIT64( 0x0010000000000000 ) )<<11 );
				1862
				1863	}
				1864
				1865	#endif
				1866
				1867	/*
				1868	-------------------------------------------------------------------------------
				1869	Rounds the double-precision floating-point value `a' to an integer, and
				1870	returns the result as a double-precision floating-point value. The
				1871	operation is performed according to the IEC/IEEE Standard for Binary
				1872	Floating-point Arithmetic.
				1873	-------------------------------------------------------------------------------
				1874	*/
				1875	float64 float64_round_to_int( float64 a )
				1876	{
				1877	flag aSign;
				1878	int16 aExp;
				1879	bits64 lastBitMask, roundBitsMask;
				1880	int8 roundingMode;
				1881	float64 z;
				1882
				1883	aExp = extractFloat64Exp( a );
				1884	if ( 0x433 <= aExp ) {
				1885	if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
				1886	return propagateFloat64NaN( a, a );
				1887	}
				1888	return a;
				1889	}
				1890	if ( aExp <= 0x3FE ) {
				1891	if ( (bits64) ( a<<1 ) == 0 ) return a;
				1892	float_exception_flags \|= float_flag_inexact;
				1893	aSign = extractFloat64Sign( a );
				1894	switch ( float_rounding_mode ) {
				1895	case float_round_nearest_even:
				1896	if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
				1897	return packFloat64( aSign, 0x3FF, 0 );
				1898	}
				1899	break;
				1900	case float_round_down:
				1901	return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
				1902	case float_round_up:
				1903	return
				1904	aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
				1905	}
				1906	return packFloat64( aSign, 0, 0 );
				1907	}
				1908	lastBitMask = 1;
				1909	lastBitMask <<= 0x433 - aExp;
				1910	roundBitsMask = lastBitMask - 1;
				1911	z = a;
				1912	roundingMode = float_rounding_mode;
				1913	if ( roundingMode == float_round_nearest_even ) {
				1914	z += lastBitMask>>1;
				1915	if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
				1916	}
				1917	else if ( roundingMode != float_round_to_zero ) {
				1918	if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
				1919	z += roundBitsMask;
				1920	}
				1921	}
				1922	z &= ~ roundBitsMask;
				1923	if ( z != a ) float_exception_flags \|= float_flag_inexact;
				1924	return z;
				1925
				1926	}
				1927
				1928	/*
				1929	-------------------------------------------------------------------------------
				1930	Returns the result of adding the absolute values of the double-precision
				1931	floating-point values `a' and `b'. If `zSign' is true, the sum is negated
				1932	before being returned. `zSign' is ignored if the result is a NaN. The
				1933	addition is performed according to the IEC/IEEE Standard for Binary
				1934	Floating-point Arithmetic.
				1935	-------------------------------------------------------------------------------
				1936	*/
				1937	static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
				1938	{
				1939	int16 aExp, bExp, zExp;
				1940	bits64 aSig, bSig, zSig;
				1941	int16 expDiff;
				1942
				1943	aSig = extractFloat64Frac( a );
				1944	aExp = extractFloat64Exp( a );
				1945	bSig = extractFloat64Frac( b );
				1946	bExp = extractFloat64Exp( b );
				1947	expDiff = aExp - bExp;
				1948	aSig <<= 9;
				1949	bSig <<= 9;
				1950	if ( 0 < expDiff ) {
				1951	if ( aExp == 0x7FF ) {
				1952	if ( aSig ) return propagateFloat64NaN( a, b );
				1953	return a;
				1954	}
				1955	if ( bExp == 0 ) {
				1956	--expDiff;
				1957	}
				1958	else {
				1959	bSig \|= LIT64( 0x2000000000000000 );
				1960	}
				1961	shift64RightJamming( bSig, expDiff, &bSig );
				1962	zExp = aExp;
				1963	}
				1964	else if ( expDiff < 0 ) {
				1965	if ( bExp == 0x7FF ) {
				1966	if ( bSig ) return propagateFloat64NaN( a, b );
				1967	return packFloat64( zSign, 0x7FF, 0 );
				1968	}
				1969	if ( aExp == 0 ) {
				1970	++expDiff;
				1971	}
				1972	else {
				1973	aSig \|= LIT64( 0x2000000000000000 );
				1974	}
				1975	shift64RightJamming( aSig, - expDiff, &aSig );
				1976	zExp = bExp;
				1977	}
				1978	else {
				1979	if ( aExp == 0x7FF ) {
				1980	if ( aSig \| bSig ) return propagateFloat64NaN( a, b );
				1981	return a;
				1982	}
				1983	if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
				1984	zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
				1985	zExp = aExp;
				1986	goto roundAndPack;
				1987	}
				1988	aSig \|= LIT64( 0x2000000000000000 );
				1989	zSig = ( aSig + bSig )<<1;
				1990	--zExp;
				1991	if ( (sbits64) zSig < 0 ) {
				1992	zSig = aSig + bSig;
				1993	++zExp;
				1994	}
				1995	roundAndPack:
				1996	return roundAndPackFloat64( zSign, zExp, zSig );
				1997
				1998	}
				1999
				2000	/*
				2001	-------------------------------------------------------------------------------
				2002	Returns the result of subtracting the absolute values of the double-
				2003	precision floating-point values `a' and `b'. If `zSign' is true, the
				2004	difference is negated before being returned. `zSign' is ignored if the
				2005	result is a NaN. The subtraction is performed according to the IEC/IEEE
				2006	Standard for Binary Floating-point Arithmetic.
				2007	-------------------------------------------------------------------------------
				2008	*/
				2009	static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
				2010	{
				2011	int16 aExp, bExp, zExp;
				2012	bits64 aSig, bSig, zSig;
				2013	int16 expDiff;
				2014
				2015	aSig = extractFloat64Frac( a );
				2016	aExp = extractFloat64Exp( a );
				2017	bSig = extractFloat64Frac( b );
				2018	bExp = extractFloat64Exp( b );
				2019	expDiff = aExp - bExp;
				2020	aSig <<= 10;
				2021	bSig <<= 10;
				2022	if ( 0 < expDiff ) goto aExpBigger;
				2023	if ( expDiff < 0 ) goto bExpBigger;
				2024	if ( aExp == 0x7FF ) {
				2025	if ( aSig \| bSig ) return propagateFloat64NaN( a, b );
				2026	float_raise( float_flag_invalid );
				2027	return float64_default_nan;
				2028	}
				2029	if ( aExp == 0 ) {
				2030	aExp = 1;
				2031	bExp = 1;
				2032	}
				2033	if ( bSig < aSig ) goto aBigger;
				2034	if ( aSig < bSig ) goto bBigger;
				2035	return packFloat64( float_rounding_mode == float_round_down, 0, 0 );
				2036	bExpBigger:
				2037	if ( bExp == 0x7FF ) {
				2038	if ( bSig ) return propagateFloat64NaN( a, b );
				2039	return packFloat64( zSign ^ 1, 0x7FF, 0 );
				2040	}
				2041	if ( aExp == 0 ) {
				2042	++expDiff;
				2043	}
				2044	else {
				2045	aSig \|= LIT64( 0x4000000000000000 );
				2046	}
				2047	shift64RightJamming( aSig, - expDiff, &aSig );
				2048	bSig \|= LIT64( 0x4000000000000000 );
				2049	bBigger:
				2050	zSig = bSig - aSig;
				2051	zExp = bExp;
				2052	zSign ^= 1;
				2053	goto normalizeRoundAndPack;
				2054	aExpBigger:
				2055	if ( aExp == 0x7FF ) {
				2056	if ( aSig ) return propagateFloat64NaN( a, b );
				2057	return a;
				2058	}
				2059	if ( bExp == 0 ) {
				2060	--expDiff;
				2061	}
				2062	else {
				2063	bSig \|= LIT64( 0x4000000000000000 );
				2064	}
				2065	shift64RightJamming( bSig, expDiff, &bSig );
				2066	aSig \|= LIT64( 0x4000000000000000 );
				2067	aBigger:
				2068	zSig = aSig - bSig;
				2069	zExp = aExp;
				2070	normalizeRoundAndPack:
				2071	--zExp;
				2072	return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
				2073
				2074	}
				2075
				2076	/*
				2077	-------------------------------------------------------------------------------
				2078	Returns the result of adding the double-precision floating-point values `a'
				2079	and `b'. The operation is performed according to the IEC/IEEE Standard for
				2080	Binary Floating-point Arithmetic.
				2081	-------------------------------------------------------------------------------
				2082	*/
				2083	float64 float64_add( float64 a, float64 b )
				2084	{
				2085	flag aSign, bSign;
				2086
				2087	aSign = extractFloat64Sign( a );
				2088	bSign = extractFloat64Sign( b );
				2089	if ( aSign == bSign ) {
				2090	return addFloat64Sigs( a, b, aSign );
				2091	}
				2092	else {
				2093	return subFloat64Sigs( a, b, aSign );
				2094	}
				2095
				2096	}
				2097
				2098	/*
				2099	-------------------------------------------------------------------------------
				2100	Returns the result of subtracting the double-precision floating-point values
				2101	`a' and `b'. The operation is performed according to the IEC/IEEE Standard
				2102	for Binary Floating-point Arithmetic.
				2103	-------------------------------------------------------------------------------
				2104	*/
				2105	float64 float64_sub( float64 a, float64 b )
				2106	{
				2107	flag aSign, bSign;
				2108
				2109	aSign = extractFloat64Sign( a );
				2110	bSign = extractFloat64Sign( b );
				2111	if ( aSign == bSign ) {
				2112	return subFloat64Sigs( a, b, aSign );
				2113	}
				2114	else {
				2115	return addFloat64Sigs( a, b, aSign );
				2116	}
				2117
				2118	}
				2119
				2120	/*
				2121	-------------------------------------------------------------------------------
				2122	Returns the result of multiplying the double-precision floating-point values
				2123	`a' and `b'. The operation is performed according to the IEC/IEEE Standard
				2124	for Binary Floating-point Arithmetic.
				2125	-------------------------------------------------------------------------------
				2126	*/
				2127	float64 float64_mul( float64 a, float64 b )
				2128	{
				2129	flag aSign, bSign, zSign;
				2130	int16 aExp, bExp, zExp;
				2131	bits64 aSig, bSig, zSig0, zSig1;
				2132
				2133	aSig = extractFloat64Frac( a );
				2134	aExp = extractFloat64Exp( a );
				2135	aSign = extractFloat64Sign( a );
				2136	bSig = extractFloat64Frac( b );
				2137	bExp = extractFloat64Exp( b );
				2138	bSign = extractFloat64Sign( b );
				2139	zSign = aSign ^ bSign;
				2140	if ( aExp == 0x7FF ) {
				2141	if ( aSig \|\| ( ( bExp == 0x7FF ) && bSig ) ) {
				2142	return propagateFloat64NaN( a, b );
				2143	}
				2144	if ( ( bExp \| bSig ) == 0 ) {
				2145	float_raise( float_flag_invalid );
				2146	return float64_default_nan;
				2147	}
				2148	return packFloat64( zSign, 0x7FF, 0 );
				2149	}
				2150	if ( bExp == 0x7FF ) {
				2151	if ( bSig ) return propagateFloat64NaN( a, b );
				2152	if ( ( aExp \| aSig ) == 0 ) {
				2153	float_raise( float_flag_invalid );
				2154	return float64_default_nan;
				2155	}
				2156	return packFloat64( zSign, 0x7FF, 0 );
				2157	}
				2158	if ( aExp == 0 ) {
				2159	if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
				2160	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				2161	}
				2162	if ( bExp == 0 ) {
				2163	if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
				2164	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
				2165	}
				2166	zExp = aExp + bExp - 0x3FF;
				2167	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<10;
				2168	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
				2169	mul64To128( aSig, bSig, &zSig0, &zSig1 );
				2170	zSig0 \|= ( zSig1 != 0 );
				2171	if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
				2172	zSig0 <<= 1;
				2173	--zExp;
				2174	}
				2175	return roundAndPackFloat64( zSign, zExp, zSig0 );
				2176
				2177	}
				2178
				2179	/*
				2180	-------------------------------------------------------------------------------
				2181	Returns the result of dividing the double-precision floating-point value `a'
				2182	by the corresponding value `b'. The operation is performed according to
				2183	the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				2184	-------------------------------------------------------------------------------
				2185	*/
				2186	float64 float64_div( float64 a, float64 b )
				2187	{
				2188	flag aSign, bSign, zSign;
				2189	int16 aExp, bExp, zExp;
				2190	bits64 aSig, bSig, zSig;
				2191	bits64 rem0, rem1;
				2192	bits64 term0, term1;
				2193
				2194	aSig = extractFloat64Frac( a );
				2195	aExp = extractFloat64Exp( a );
				2196	aSign = extractFloat64Sign( a );
				2197	bSig = extractFloat64Frac( b );
				2198	bExp = extractFloat64Exp( b );
				2199	bSign = extractFloat64Sign( b );
				2200	zSign = aSign ^ bSign;
				2201	if ( aExp == 0x7FF ) {
				2202	if ( aSig ) return propagateFloat64NaN( a, b );
				2203	if ( bExp == 0x7FF ) {
				2204	if ( bSig ) return propagateFloat64NaN( a, b );
				2205	float_raise( float_flag_invalid );
				2206	return float64_default_nan;
				2207	}
				2208	return packFloat64( zSign, 0x7FF, 0 );
				2209	}
				2210	if ( bExp == 0x7FF ) {
				2211	if ( bSig ) return propagateFloat64NaN( a, b );
				2212	return packFloat64( zSign, 0, 0 );
				2213	}
				2214	if ( bExp == 0 ) {
				2215	if ( bSig == 0 ) {
				2216	if ( ( aExp \| aSig ) == 0 ) {
				2217	float_raise( float_flag_invalid );
				2218	return float64_default_nan;
				2219	}
				2220	float_raise( float_flag_divbyzero );
				2221	return packFloat64( zSign, 0x7FF, 0 );
				2222	}
				2223	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
				2224	}
				2225	if ( aExp == 0 ) {
				2226	if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
				2227	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				2228	}
				2229	zExp = aExp - bExp + 0x3FD;
				2230	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<10;
				2231	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
				2232	if ( bSig <= ( aSig + aSig ) ) {
				2233	aSig >>= 1;
				2234	++zExp;
				2235	}
				2236	zSig = estimateDiv128To64( aSig, 0, bSig );
				2237	if ( ( zSig & 0x1FF ) <= 2 ) {
				2238	mul64To128( bSig, zSig, &term0, &term1 );
				2239	sub128( aSig, 0, term0, term1, &rem0, &rem1 );
				2240	while ( (sbits64) rem0 < 0 ) {
				2241	--zSig;
				2242	add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
				2243	}
				2244	zSig \|= ( rem1 != 0 );
				2245	}
				2246	return roundAndPackFloat64( zSign, zExp, zSig );
				2247
				2248	}
				2249
				2250	/*
				2251	-------------------------------------------------------------------------------
				2252	Returns the remainder of the double-precision floating-point value `a'
				2253	with respect to the corresponding value `b'. The operation is performed
				2254	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				2255	-------------------------------------------------------------------------------
				2256	*/
				2257	float64 float64_rem( float64 a, float64 b )
				2258	{
				2259	flag aSign, bSign, zSign;
				2260	int16 aExp, bExp, expDiff;
				2261	bits64 aSig, bSig;
				2262	bits64 q, alternateASig;
				2263	sbits64 sigMean;
				2264
				2265	aSig = extractFloat64Frac( a );
				2266	aExp = extractFloat64Exp( a );
				2267	aSign = extractFloat64Sign( a );
				2268	bSig = extractFloat64Frac( b );
				2269	bExp = extractFloat64Exp( b );
				2270	bSign = extractFloat64Sign( b );
				2271	if ( aExp == 0x7FF ) {
				2272	if ( aSig \|\| ( ( bExp == 0x7FF ) && bSig ) ) {
				2273	return propagateFloat64NaN( a, b );
				2274	}
				2275	float_raise( float_flag_invalid );
				2276	return float64_default_nan;
				2277	}
				2278	if ( bExp == 0x7FF ) {
				2279	if ( bSig ) return propagateFloat64NaN( a, b );
				2280	return a;
				2281	}
				2282	if ( bExp == 0 ) {
				2283	if ( bSig == 0 ) {
				2284	float_raise( float_flag_invalid );
				2285	return float64_default_nan;
				2286	}
				2287	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
				2288	}
				2289	if ( aExp == 0 ) {
				2290	if ( aSig == 0 ) return a;
				2291	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				2292	}
				2293	expDiff = aExp - bExp;
				2294	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<11;
				2295	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
				2296	if ( expDiff < 0 ) {
				2297	if ( expDiff < -1 ) return a;
				2298	aSig >>= 1;
				2299	}
				2300	q = ( bSig <= aSig );
				2301	if ( q ) aSig -= bSig;
				2302	expDiff -= 64;
				2303	while ( 0 < expDiff ) {
				2304	q = estimateDiv128To64( aSig, 0, bSig );
				2305	q = ( 2 < q ) ? q - 2 : 0;
				2306	aSig = - ( ( bSig>>2 ) * q );
				2307	expDiff -= 62;
				2308	}
				2309	expDiff += 64;
				2310	if ( 0 < expDiff ) {
				2311	q = estimateDiv128To64( aSig, 0, bSig );
				2312	q = ( 2 < q ) ? q - 2 : 0;
				2313	q >>= 64 - expDiff;
				2314	bSig >>= 2;
				2315	aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
				2316	}
				2317	else {
				2318	aSig >>= 2;
				2319	bSig >>= 2;
				2320	}
				2321	do {
				2322	alternateASig = aSig;
				2323	++q;
				2324	aSig -= bSig;
				2325	} while ( 0 <= (sbits64) aSig );
				2326	sigMean = aSig + alternateASig;
				2327	if ( ( sigMean < 0 ) \|\| ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
				2328	aSig = alternateASig;
				2329	}
				2330	zSign = ( (sbits64) aSig < 0 );
				2331	if ( zSign ) aSig = - aSig;
				2332	return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
				2333
				2334	}
				2335
				2336	/*
				2337	-------------------------------------------------------------------------------
				2338	Returns the square root of the double-precision floating-point value `a'.
				2339	The operation is performed according to the IEC/IEEE Standard for Binary
				2340	Floating-point Arithmetic.
				2341	-------------------------------------------------------------------------------
				2342	*/
				2343	float64 float64_sqrt( float64 a )
				2344	{
				2345	flag aSign;
				2346	int16 aExp, zExp;
				2347	bits64 aSig, zSig;
				2348	bits64 rem0, rem1, term0, term1; //, shiftedRem;
				2349	//float64 z;
				2350
				2351	aSig = extractFloat64Frac( a );
				2352	aExp = extractFloat64Exp( a );
				2353	aSign = extractFloat64Sign( a );
				2354	if ( aExp == 0x7FF ) {
				2355	if ( aSig ) return propagateFloat64NaN( a, a );
				2356	if ( ! aSign ) return a;
				2357	float_raise( float_flag_invalid );
				2358	return float64_default_nan;
				2359	}
				2360	if ( aSign ) {
				2361	if ( ( aExp \| aSig ) == 0 ) return a;
				2362	float_raise( float_flag_invalid );
				2363	return float64_default_nan;
				2364	}
				2365	if ( aExp == 0 ) {
				2366	if ( aSig == 0 ) return 0;
				2367	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				2368	}
				2369	zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
				2370	aSig \|= LIT64( 0x0010000000000000 );
				2371	zSig = estimateSqrt32( aExp, aSig>>21 );
				2372	zSig <<= 31;
				2373	aSig <<= 9 - ( aExp & 1 );
				2374	zSig = estimateDiv128To64( aSig, 0, zSig ) + zSig + 2;
				2375	if ( ( zSig & 0x3FF ) <= 5 ) {
				2376	if ( zSig < 2 ) {
				2377	zSig = LIT64( 0xFFFFFFFFFFFFFFFF );
				2378	}
				2379	else {
				2380	aSig <<= 2;
				2381	mul64To128( zSig, zSig, &term0, &term1 );
				2382	sub128( aSig, 0, term0, term1, &rem0, &rem1 );
				2383	while ( (sbits64) rem0 < 0 ) {
				2384	--zSig;
				2385	shortShift128Left( 0, zSig, 1, &term0, &term1 );
				2386	term1 \|= 1;
				2387	add128( rem0, rem1, term0, term1, &rem0, &rem1 );
				2388	}
				2389	zSig \|= ( ( rem0 \| rem1 ) != 0 );
				2390	}
				2391	}
				2392	shift64RightJamming( zSig, 1, &zSig );
				2393	return roundAndPackFloat64( 0, zExp, zSig );
				2394
				2395	}
				2396
				2397	/*
				2398	-------------------------------------------------------------------------------
				2399	Returns 1 if the double-precision floating-point value `a' is equal to the
				2400	corresponding value `b', and 0 otherwise. The comparison is performed
				2401	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				2402	-------------------------------------------------------------------------------
				2403	*/
				2404	flag float64_eq( float64 a, float64 b )
				2405	{
				2406
				2407	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				2408	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				2409	) {
				2410	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
				2411	float_raise( float_flag_invalid );
				2412	}
				2413	return 0;
				2414	}
				2415	return ( a == b ) \|\| ( (bits64) ( ( a \| b )<<1 ) == 0 );
				2416
				2417	}
				2418
				2419	/*
				2420	-------------------------------------------------------------------------------
				2421	Returns 1 if the double-precision floating-point value `a' is less than or
				2422	equal to the corresponding value `b', and 0 otherwise. The comparison is
				2423	performed according to the IEC/IEEE Standard for Binary Floating-point
				2424	Arithmetic.
				2425	-------------------------------------------------------------------------------
				2426	*/
				2427	flag float64_le( float64 a, float64 b )
				2428	{
				2429	flag aSign, bSign;
				2430
				2431	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				2432	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				2433	) {
				2434	float_raise( float_flag_invalid );
				2435	return 0;
				2436	}
				2437	aSign = extractFloat64Sign( a );
				2438	bSign = extractFloat64Sign( b );
				2439	if ( aSign != bSign ) return aSign \|\| ( (bits64) ( ( a \| b )<<1 ) == 0 );
				2440	return ( a == b ) \|\| ( aSign ^ ( a < b ) );
				2441
				2442	}
				2443
				2444	/*
				2445	-------------------------------------------------------------------------------
				2446	Returns 1 if the double-precision floating-point value `a' is less than
				2447	the corresponding value `b', and 0 otherwise. The comparison is performed
				2448	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				2449	-------------------------------------------------------------------------------
				2450	*/
				2451	flag float64_lt( float64 a, float64 b )
				2452	{
				2453	flag aSign, bSign;
				2454
				2455	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				2456	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				2457	) {
				2458	float_raise( float_flag_invalid );
				2459	return 0;
				2460	}
				2461	aSign = extractFloat64Sign( a );
				2462	bSign = extractFloat64Sign( b );
				2463	if ( aSign != bSign ) return aSign && ( (bits64) ( ( a \| b )<<1 ) != 0 );
				2464	return ( a != b ) && ( aSign ^ ( a < b ) );
				2465
				2466	}
				2467
				2468	/*
				2469	-------------------------------------------------------------------------------
				2470	Returns 1 if the double-precision floating-point value `a' is equal to the
				2471	corresponding value `b', and 0 otherwise. The invalid exception is raised
				2472	if either operand is a NaN. Otherwise, the comparison is performed
				2473	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				2474	-------------------------------------------------------------------------------
				2475	*/
				2476	flag float64_eq_signaling( float64 a, float64 b )
				2477	{
				2478
				2479	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				2480	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				2481	) {
				2482	float_raise( float_flag_invalid );
				2483	return 0;
				2484	}
				2485	return ( a == b ) \|\| ( (bits64) ( ( a \| b )<<1 ) == 0 );
				2486
				2487	}
				2488
				2489	/*
				2490	-------------------------------------------------------------------------------
				2491	Returns 1 if the double-precision floating-point value `a' is less than or
				2492	equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
				2493	cause an exception. Otherwise, the comparison is performed according to the
				2494	IEC/IEEE Standard for Binary Floating-point Arithmetic.
				2495	-------------------------------------------------------------------------------
				2496	*/
				2497	flag float64_le_quiet( float64 a, float64 b )
				2498	{
				2499	flag aSign, bSign;
				2500	//int16 aExp, bExp;
				2501
				2502	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				2503	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				2504	) {
				2505	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
				2506	float_raise( float_flag_invalid );
				2507	}
				2508	return 0;
				2509	}
				2510	aSign = extractFloat64Sign( a );
				2511	bSign = extractFloat64Sign( b );
				2512	if ( aSign != bSign ) return aSign \|\| ( (bits64) ( ( a \| b )<<1 ) == 0 );
				2513	return ( a == b ) \|\| ( aSign ^ ( a < b ) );
				2514
				2515	}
				2516
				2517	/*
				2518	-------------------------------------------------------------------------------
				2519	Returns 1 if the double-precision floating-point value `a' is less than
				2520	the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				2521	exception. Otherwise, the comparison is performed according to the IEC/IEEE
				2522	Standard for Binary Floating-point Arithmetic.
				2523	-------------------------------------------------------------------------------
				2524	*/
				2525	flag float64_lt_quiet( float64 a, float64 b )
				2526	{
				2527	flag aSign, bSign;
				2528
				2529	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				2530	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				2531	) {
				2532	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
				2533	float_raise( float_flag_invalid );
				2534	}
				2535	return 0;
				2536	}
				2537	aSign = extractFloat64Sign( a );
				2538	bSign = extractFloat64Sign( b );
				2539	if ( aSign != bSign ) return aSign && ( (bits64) ( ( a \| b )<<1 ) != 0 );
				2540	return ( a != b ) && ( aSign ^ ( a < b ) );
				2541
				2542	}
				2543
				2544	#ifdef FLOATX80
				2545
				2546	/*
				2547	-------------------------------------------------------------------------------
				2548	Returns the result of converting the extended double-precision floating-
				2549	point value `a' to the 32-bit two's complement integer format. The
				2550	conversion is performed according to the IEC/IEEE Standard for Binary
				2551	Floating-point Arithmetic---which means in particular that the conversion
				2552	is rounded according to the current rounding mode. If `a' is a NaN, the
				2553	largest positive integer is returned. Otherwise, if the conversion
				2554	overflows, the largest integer with the same sign as `a' is returned.
				2555	-------------------------------------------------------------------------------
				2556	*/
				2557	int32 floatx80_to_int32( floatx80 a )
				2558	{
				2559	flag aSign;
				2560	int32 aExp, shiftCount;
				2561	bits64 aSig;
				2562
				2563	aSig = extractFloatx80Frac( a );
				2564	aExp = extractFloatx80Exp( a );
				2565	aSign = extractFloatx80Sign( a );
				2566	if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
				2567	shiftCount = 0x4037 - aExp;
				2568	if ( shiftCount <= 0 ) shiftCount = 1;
				2569	shift64RightJamming( aSig, shiftCount, &aSig );
				2570	return roundAndPackInt32( aSign, aSig );
				2571
				2572	}
				2573
				2574	/*
				2575	-------------------------------------------------------------------------------
				2576	Returns the result of converting the extended double-precision floating-
				2577	point value `a' to the 32-bit two's complement integer format. The
				2578	conversion is performed according to the IEC/IEEE Standard for Binary
				2579	Floating-point Arithmetic, except that the conversion is always rounded
				2580	toward zero. If `a' is a NaN, the largest positive integer is returned.
				2581	Otherwise, if the conversion overflows, the largest integer with the same
				2582	sign as `a' is returned.
				2583	-------------------------------------------------------------------------------
				2584	*/
				2585	int32 floatx80_to_int32_round_to_zero( floatx80 a )
				2586	{
				2587	flag aSign;
				2588	int32 aExp, shiftCount;
				2589	bits64 aSig, savedASig;
				2590	int32 z;
				2591
				2592	aSig = extractFloatx80Frac( a );
				2593	aExp = extractFloatx80Exp( a );
				2594	aSign = extractFloatx80Sign( a );
				2595	shiftCount = 0x403E - aExp;
				2596	if ( shiftCount < 32 ) {
				2597	if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
				2598	goto invalid;
				2599	}
				2600	else if ( 63 < shiftCount ) {
				2601	if ( aExp \|\| aSig ) float_exception_flags \|= float_flag_inexact;
				2602	return 0;
				2603	}
				2604	savedASig = aSig;
				2605	aSig >>= shiftCount;
				2606	z = aSig;
				2607	if ( aSign ) z = - z;
				2608	if ( ( z < 0 ) ^ aSign ) {
				2609	invalid:
				2610	float_exception_flags \|= float_flag_invalid;
				2611	return aSign ? 0x80000000 : 0x7FFFFFFF;
				2612	}
				2613	if ( ( aSig<<shiftCount ) != savedASig ) {
				2614	float_exception_flags \|= float_flag_inexact;
				2615	}
				2616	return z;
				2617
				2618	}
				2619
				2620	/*
				2621	-------------------------------------------------------------------------------
				2622	Returns the result of converting the extended double-precision floating-
				2623	point value `a' to the single-precision floating-point format. The
				2624	conversion is performed according to the IEC/IEEE Standard for Binary
				2625	Floating-point Arithmetic.
				2626	-------------------------------------------------------------------------------
				2627	*/
				2628	float32 floatx80_to_float32( floatx80 a )
				2629	{
				2630	flag aSign;
				2631	int32 aExp;
				2632	bits64 aSig;
				2633
				2634	aSig = extractFloatx80Frac( a );
				2635	aExp = extractFloatx80Exp( a );
				2636	aSign = extractFloatx80Sign( a );
				2637	if ( aExp == 0x7FFF ) {
				2638	if ( (bits64) ( aSig<<1 ) ) {
				2639	return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
				2640	}
				2641	return packFloat32( aSign, 0xFF, 0 );
				2642	}
				2643	shift64RightJamming( aSig, 33, &aSig );
				2644	if ( aExp \|\| aSig ) aExp -= 0x3F81;
				2645	return roundAndPackFloat32( aSign, aExp, aSig );
				2646
				2647	}
				2648
				2649	/*
				2650	-------------------------------------------------------------------------------
				2651	Returns the result of converting the extended double-precision floating-
				2652	point value `a' to the double-precision floating-point format. The
				2653	conversion is performed according to the IEC/IEEE Standard for Binary
				2654	Floating-point Arithmetic.
				2655	-------------------------------------------------------------------------------
				2656	*/
				2657	float64 floatx80_to_float64( floatx80 a )
				2658	{
				2659	flag aSign;
				2660	int32 aExp;
				2661	bits64 aSig, zSig;
				2662
				2663	aSig = extractFloatx80Frac( a );
				2664	aExp = extractFloatx80Exp( a );
				2665	aSign = extractFloatx80Sign( a );
				2666	if ( aExp == 0x7FFF ) {
				2667	if ( (bits64) ( aSig<<1 ) ) {
				2668	return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
				2669	}
				2670	return packFloat64( aSign, 0x7FF, 0 );
				2671	}
				2672	shift64RightJamming( aSig, 1, &zSig );
				2673	if ( aExp \|\| aSig ) aExp -= 0x3C01;
				2674	return roundAndPackFloat64( aSign, aExp, zSig );
				2675
				2676	}
				2677
				2678	/*
				2679	-------------------------------------------------------------------------------
				2680	Rounds the extended double-precision floating-point value `a' to an integer,
				2681	and returns the result as an extended quadruple-precision floating-point
				2682	value. The operation is performed according to the IEC/IEEE Standard for
				2683	Binary Floating-point Arithmetic.
				2684	-------------------------------------------------------------------------------
				2685	*/
				2686	floatx80 floatx80_round_to_int( floatx80 a )
				2687	{
				2688	flag aSign;
				2689	int32 aExp;
				2690	bits64 lastBitMask, roundBitsMask;
				2691	int8 roundingMode;
				2692	floatx80 z;
				2693
				2694	aExp = extractFloatx80Exp( a );
				2695	if ( 0x403E <= aExp ) {
				2696	if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
				2697	return propagateFloatx80NaN( a, a );
				2698	}
				2699	return a;
				2700	}
				2701	if ( aExp <= 0x3FFE ) {
				2702	if ( ( aExp == 0 )
				2703	&& ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
				2704	return a;
				2705	}
				2706	float_exception_flags \|= float_flag_inexact;
				2707	aSign = extractFloatx80Sign( a );
				2708	switch ( float_rounding_mode ) {
				2709	case float_round_nearest_even:
				2710	if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
				2711	) {
				2712	return
				2713	packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
				2714	}
				2715	break;
				2716	case float_round_down:
				2717	return
				2718	aSign ?
				2719	packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
				2720	: packFloatx80( 0, 0, 0 );
				2721	case float_round_up:
				2722	return
				2723	aSign ? packFloatx80( 1, 0, 0 )
				2724	: packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
				2725	}
				2726	return packFloatx80( aSign, 0, 0 );
				2727	}
				2728	lastBitMask = 1;
				2729	lastBitMask <<= 0x403E - aExp;
				2730	roundBitsMask = lastBitMask - 1;
				2731	z = a;
				2732	roundingMode = float_rounding_mode;
				2733	if ( roundingMode == float_round_nearest_even ) {
				2734	z.low += lastBitMask>>1;
				2735	if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
				2736	}
				2737	else if ( roundingMode != float_round_to_zero ) {
				2738	if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
				2739	z.low += roundBitsMask;
				2740	}
				2741	}
				2742	z.low &= ~ roundBitsMask;
				2743	if ( z.low == 0 ) {
				2744	++z.high;
				2745	z.low = LIT64( 0x8000000000000000 );
				2746	}
				2747	if ( z.low != a.low ) float_exception_flags \|= float_flag_inexact;
				2748	return z;
				2749
				2750	}
				2751
				2752	/*
				2753	-------------------------------------------------------------------------------
				2754	Returns the result of adding the absolute values of the extended double-
				2755	precision floating-point values `a' and `b'. If `zSign' is true, the sum is
				2756	negated before being returned. `zSign' is ignored if the result is a NaN.
				2757	The addition is performed according to the IEC/IEEE Standard for Binary
				2758	Floating-point Arithmetic.
				2759	-------------------------------------------------------------------------------
				2760	*/
				2761	static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
				2762	{
				2763	int32 aExp, bExp, zExp;
				2764	bits64 aSig, bSig, zSig0, zSig1;
				2765	int32 expDiff;
				2766
				2767	aSig = extractFloatx80Frac( a );
				2768	aExp = extractFloatx80Exp( a );
				2769	bSig = extractFloatx80Frac( b );
				2770	bExp = extractFloatx80Exp( b );
				2771	expDiff = aExp - bExp;
				2772	if ( 0 < expDiff ) {
				2773	if ( aExp == 0x7FFF ) {
				2774	if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
				2775	return a;
				2776	}
				2777	if ( bExp == 0 ) --expDiff;
				2778	shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
				2779	zExp = aExp;
				2780	}
				2781	else if ( expDiff < 0 ) {
				2782	if ( bExp == 0x7FFF ) {
				2783	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
				2784	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				2785	}
				2786	if ( aExp == 0 ) ++expDiff;
				2787	shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
				2788	zExp = bExp;
				2789	}
				2790	else {
				2791	if ( aExp == 0x7FFF ) {
				2792	if ( (bits64) ( ( aSig \| bSig )<<1 ) ) {
				2793	return propagateFloatx80NaN( a, b );
				2794	}
				2795	return a;
				2796	}
				2797	zSig1 = 0;
				2798	zSig0 = aSig + bSig;
				2799	if ( aExp == 0 ) {
				2800	normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
				2801	goto roundAndPack;
				2802	}
				2803	zExp = aExp;
				2804	goto shiftRight1;
				2805	}
				2806
				2807	zSig0 = aSig + bSig;
				2808
				2809	if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
				2810	shiftRight1:
				2811	shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
				2812	zSig0 \|= LIT64( 0x8000000000000000 );
				2813	++zExp;
				2814	roundAndPack:
				2815	return
				2816	roundAndPackFloatx80(
				2817	floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
				2818
				2819	}
				2820
				2821	/*
				2822	-------------------------------------------------------------------------------
				2823	Returns the result of subtracting the absolute values of the extended
				2824	double-precision floating-point values `a' and `b'. If `zSign' is true,
				2825	the difference is negated before being returned. `zSign' is ignored if the
				2826	result is a NaN. The subtraction is performed according to the IEC/IEEE
				2827	Standard for Binary Floating-point Arithmetic.
				2828	-------------------------------------------------------------------------------
				2829	*/
				2830	static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
				2831	{
				2832	int32 aExp, bExp, zExp;
				2833	bits64 aSig, bSig, zSig0, zSig1;
				2834	int32 expDiff;
				2835	floatx80 z;
				2836
				2837	aSig = extractFloatx80Frac( a );
				2838	aExp = extractFloatx80Exp( a );
				2839	bSig = extractFloatx80Frac( b );
				2840	bExp = extractFloatx80Exp( b );
				2841	expDiff = aExp - bExp;
				2842	if ( 0 < expDiff ) goto aExpBigger;
				2843	if ( expDiff < 0 ) goto bExpBigger;
				2844	if ( aExp == 0x7FFF ) {
				2845	if ( (bits64) ( ( aSig \| bSig )<<1 ) ) {
				2846	return propagateFloatx80NaN( a, b );
				2847	}
				2848	float_raise( float_flag_invalid );
				2849	z.low = floatx80_default_nan_low;
				2850	z.high = floatx80_default_nan_high;
				2851	return z;
				2852	}
				2853	if ( aExp == 0 ) {
				2854	aExp = 1;
				2855	bExp = 1;
				2856	}
				2857	zSig1 = 0;
				2858	if ( bSig < aSig ) goto aBigger;
				2859	if ( aSig < bSig ) goto bBigger;
				2860	return packFloatx80( float_rounding_mode == float_round_down, 0, 0 );
				2861	bExpBigger:
				2862	if ( bExp == 0x7FFF ) {
				2863	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
				2864	return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
				2865	}
				2866	if ( aExp == 0 ) ++expDiff;
				2867	shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
				2868	bBigger:
				2869	sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
				2870	zExp = bExp;
				2871	zSign ^= 1;
				2872	goto normalizeRoundAndPack;
				2873	aExpBigger:
				2874	if ( aExp == 0x7FFF ) {
				2875	if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
				2876	return a;
				2877	}
				2878	if ( bExp == 0 ) --expDiff;
				2879	shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
				2880	aBigger:
				2881	sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
				2882	zExp = aExp;
				2883	normalizeRoundAndPack:
				2884	return
				2885	normalizeRoundAndPackFloatx80(
				2886	floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
				2887
				2888	}
				2889
				2890	/*
				2891	-------------------------------------------------------------------------------
				2892	Returns the result of adding the extended double-precision floating-point
				2893	values `a' and `b'. The operation is performed according to the IEC/IEEE
				2894	Standard for Binary Floating-point Arithmetic.
				2895	-------------------------------------------------------------------------------
				2896	*/
				2897	floatx80 floatx80_add( floatx80 a, floatx80 b )
				2898	{
				2899	flag aSign, bSign;
				2900
				2901	aSign = extractFloatx80Sign( a );
				2902	bSign = extractFloatx80Sign( b );
				2903	if ( aSign == bSign ) {
				2904	return addFloatx80Sigs( a, b, aSign );
				2905	}
				2906	else {
				2907	return subFloatx80Sigs( a, b, aSign );
				2908	}
				2909
				2910	}
				2911
				2912	/*
				2913	-------------------------------------------------------------------------------
				2914	Returns the result of subtracting the extended double-precision floating-
				2915	point values `a' and `b'. The operation is performed according to the
				2916	IEC/IEEE Standard for Binary Floating-point Arithmetic.
				2917	-------------------------------------------------------------------------------
				2918	*/
				2919	floatx80 floatx80_sub( floatx80 a, floatx80 b )
				2920	{
				2921	flag aSign, bSign;
				2922
				2923	aSign = extractFloatx80Sign( a );
				2924	bSign = extractFloatx80Sign( b );
				2925	if ( aSign == bSign ) {
				2926	return subFloatx80Sigs( a, b, aSign );
				2927	}
				2928	else {
				2929	return addFloatx80Sigs( a, b, aSign );
				2930	}
				2931
				2932	}
				2933
				2934	/*
				2935	-------------------------------------------------------------------------------
				2936	Returns the result of multiplying the extended double-precision floating-
				2937	point values `a' and `b'. The operation is performed according to the
				2938	IEC/IEEE Standard for Binary Floating-point Arithmetic.
				2939	-------------------------------------------------------------------------------
				2940	*/
				2941	floatx80 floatx80_mul( floatx80 a, floatx80 b )
				2942	{
				2943	flag aSign, bSign, zSign;
				2944	int32 aExp, bExp, zExp;
				2945	bits64 aSig, bSig, zSig0, zSig1;
				2946	floatx80 z;
				2947
				2948	aSig = extractFloatx80Frac( a );
				2949	aExp = extractFloatx80Exp( a );
				2950	aSign = extractFloatx80Sign( a );
				2951	bSig = extractFloatx80Frac( b );
				2952	bExp = extractFloatx80Exp( b );
				2953	bSign = extractFloatx80Sign( b );
				2954	zSign = aSign ^ bSign;
				2955	if ( aExp == 0x7FFF ) {
				2956	if ( (bits64) ( aSig<<1 )
				2957	\|\| ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
				2958	return propagateFloatx80NaN( a, b );
				2959	}
				2960	if ( ( bExp \| bSig ) == 0 ) goto invalid;
				2961	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				2962	}
				2963	if ( bExp == 0x7FFF ) {
				2964	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
				2965	if ( ( aExp \| aSig ) == 0 ) {
				2966	invalid:
				2967	float_raise( float_flag_invalid );
				2968	z.low = floatx80_default_nan_low;
				2969	z.high = floatx80_default_nan_high;
				2970	return z;
				2971	}
				2972	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				2973	}
				2974	if ( aExp == 0 ) {
				2975	if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
				2976	normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
				2977	}
				2978	if ( bExp == 0 ) {
				2979	if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
				2980	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
				2981	}
				2982	zExp = aExp + bExp - 0x3FFE;
				2983	mul64To128( aSig, bSig, &zSig0, &zSig1 );
				2984	if ( 0 < (sbits64) zSig0 ) {
				2985	shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
				2986	--zExp;
				2987	}
				2988	return
				2989	roundAndPackFloatx80(
				2990	floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
				2991
				2992	}
				2993
				2994	/*
				2995	-------------------------------------------------------------------------------
				2996	Returns the result of dividing the extended double-precision floating-point
				2997	value `a' by the corresponding value `b'. The operation is performed
				2998	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				2999	-------------------------------------------------------------------------------
				3000	*/
				3001	floatx80 floatx80_div( floatx80 a, floatx80 b )
				3002	{
				3003	flag aSign, bSign, zSign;
				3004	int32 aExp, bExp, zExp;
				3005	bits64 aSig, bSig, zSig0, zSig1;
				3006	bits64 rem0, rem1, rem2, term0, term1, term2;
				3007	floatx80 z;
				3008
				3009	aSig = extractFloatx80Frac( a );
				3010	aExp = extractFloatx80Exp( a );
				3011	aSign = extractFloatx80Sign( a );
				3012	bSig = extractFloatx80Frac( b );
				3013	bExp = extractFloatx80Exp( b );
				3014	bSign = extractFloatx80Sign( b );
				3015	zSign = aSign ^ bSign;
				3016	if ( aExp == 0x7FFF ) {
				3017	if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
				3018	if ( bExp == 0x7FFF ) {
				3019	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
				3020	goto invalid;
				3021	}
				3022	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				3023	}
				3024	if ( bExp == 0x7FFF ) {
				3025	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
				3026	return packFloatx80( zSign, 0, 0 );
				3027	}
				3028	if ( bExp == 0 ) {
				3029	if ( bSig == 0 ) {
				3030	if ( ( aExp \| aSig ) == 0 ) {
				3031	invalid:
				3032	float_raise( float_flag_invalid );
				3033	z.low = floatx80_default_nan_low;
				3034	z.high = floatx80_default_nan_high;
				3035	return z;
				3036	}
				3037	float_raise( float_flag_divbyzero );
				3038	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				3039	}
				3040	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
				3041	}
				3042	if ( aExp == 0 ) {
				3043	if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
				3044	normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
				3045	}
				3046	zExp = aExp - bExp + 0x3FFE;
				3047	rem1 = 0;
				3048	if ( bSig <= aSig ) {
				3049	shift128Right( aSig, 0, 1, &aSig, &rem1 );
				3050	++zExp;
				3051	}
				3052	zSig0 = estimateDiv128To64( aSig, rem1, bSig );
				3053	mul64To128( bSig, zSig0, &term0, &term1 );
				3054	sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
				3055	while ( (sbits64) rem0 < 0 ) {
				3056	--zSig0;
				3057	add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
				3058	}
				3059	zSig1 = estimateDiv128To64( rem1, 0, bSig );
				3060	if ( (bits64) ( zSig1<<1 ) <= 8 ) {
				3061	mul64To128( bSig, zSig1, &term1, &term2 );
				3062	sub128( rem1, 0, term1, term2, &rem1, &rem2 );
				3063	while ( (sbits64) rem1 < 0 ) {
				3064	--zSig1;
				3065	add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
				3066	}
				3067	zSig1 \|= ( ( rem1 \| rem2 ) != 0 );
				3068	}
				3069	return
				3070	roundAndPackFloatx80(
				3071	floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
				3072
				3073	}
				3074
				3075	/*
				3076	-------------------------------------------------------------------------------
				3077	Returns the remainder of the extended double-precision floating-point value
				3078	`a' with respect to the corresponding value `b'. The operation is performed
				3079	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				3080	-------------------------------------------------------------------------------
				3081	*/
				3082	floatx80 floatx80_rem( floatx80 a, floatx80 b )
				3083	{
				3084	flag aSign, bSign, zSign;
				3085	int32 aExp, bExp, expDiff;
				3086	bits64 aSig0, aSig1, bSig;
				3087	bits64 q, term0, term1, alternateASig0, alternateASig1;
				3088	floatx80 z;
				3089
				3090	aSig0 = extractFloatx80Frac( a );
				3091	aExp = extractFloatx80Exp( a );
				3092	aSign = extractFloatx80Sign( a );
				3093	bSig = extractFloatx80Frac( b );
				3094	bExp = extractFloatx80Exp( b );
				3095	bSign = extractFloatx80Sign( b );
				3096	if ( aExp == 0x7FFF ) {
				3097	if ( (bits64) ( aSig0<<1 )
				3098	\|\| ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
				3099	return propagateFloatx80NaN( a, b );
				3100	}
				3101	goto invalid;
				3102	}
				3103	if ( bExp == 0x7FFF ) {
				3104	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
				3105	return a;
				3106	}
				3107	if ( bExp == 0 ) {
				3108	if ( bSig == 0 ) {
				3109	invalid:
				3110	float_raise( float_flag_invalid );
				3111	z.low = floatx80_default_nan_low;
				3112	z.high = floatx80_default_nan_high;
				3113	return z;
				3114	}
				3115	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
				3116	}
				3117	if ( aExp == 0 ) {
				3118	if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
				3119	normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
				3120	}
				3121	bSig \|= LIT64( 0x8000000000000000 );
				3122	zSign = aSign;
				3123	expDiff = aExp - bExp;
				3124	aSig1 = 0;
				3125	if ( expDiff < 0 ) {
				3126	if ( expDiff < -1 ) return a;
				3127	shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
				3128	expDiff = 0;
				3129	}
				3130	q = ( bSig <= aSig0 );
				3131	if ( q ) aSig0 -= bSig;
				3132	expDiff -= 64;
				3133	while ( 0 < expDiff ) {
				3134	q = estimateDiv128To64( aSig0, aSig1, bSig );
				3135	q = ( 2 < q ) ? q - 2 : 0;
				3136	mul64To128( bSig, q, &term0, &term1 );
				3137	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
				3138	shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
				3139	expDiff -= 62;
				3140	}
				3141	expDiff += 64;
				3142	if ( 0 < expDiff ) {
				3143	q = estimateDiv128To64( aSig0, aSig1, bSig );
				3144	q = ( 2 < q ) ? q - 2 : 0;
				3145	q >>= 64 - expDiff;
				3146	mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
				3147	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
				3148	shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
				3149	while ( le128( term0, term1, aSig0, aSig1 ) ) {
				3150	++q;
				3151	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
				3152	}
				3153	}
				3154	else {
				3155	term1 = 0;
				3156	term0 = bSig;
				3157	}
				3158	sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
				3159	if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
				3160	\|\| ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
				3161	&& ( q & 1 ) )
				3162	) {
				3163	aSig0 = alternateASig0;
				3164	aSig1 = alternateASig1;
				3165	zSign = ! zSign;
				3166	}
				3167	return
				3168	normalizeRoundAndPackFloatx80(
				3169	80, zSign, bExp + expDiff, aSig0, aSig1 );
				3170
				3171	}
				3172
				3173	/*
				3174	-------------------------------------------------------------------------------
				3175	Returns the square root of the extended double-precision floating-point
				3176	value `a'. The operation is performed according to the IEC/IEEE Standard
				3177	for Binary Floating-point Arithmetic.
				3178	-------------------------------------------------------------------------------
				3179	*/
				3180	floatx80 floatx80_sqrt( floatx80 a )
				3181	{
				3182	flag aSign;
				3183	int32 aExp, zExp;
				3184	bits64 aSig0, aSig1, zSig0, zSig1;
				3185	bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
				3186	bits64 shiftedRem0, shiftedRem1;
				3187	floatx80 z;
				3188
				3189	aSig0 = extractFloatx80Frac( a );
				3190	aExp = extractFloatx80Exp( a );
				3191	aSign = extractFloatx80Sign( a );
				3192	if ( aExp == 0x7FFF ) {
				3193	if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
				3194	if ( ! aSign ) return a;
				3195	goto invalid;
				3196	}
				3197	if ( aSign ) {
				3198	if ( ( aExp \| aSig0 ) == 0 ) return a;
				3199	invalid:
				3200	float_raise( float_flag_invalid );
				3201	z.low = floatx80_default_nan_low;
				3202	z.high = floatx80_default_nan_high;
				3203	return z;
				3204	}
				3205	if ( aExp == 0 ) {
				3206	if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
				3207	normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
				3208	}
				3209	zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
				3210	zSig0 = estimateSqrt32( aExp, aSig0>>32 );
				3211	zSig0 <<= 31;
				3212	aSig1 = 0;
				3213	shift128Right( aSig0, 0, ( aExp & 1 ) + 2, &aSig0, &aSig1 );
				3214	zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0 ) + zSig0 + 4;
				3215	if ( 0 <= (sbits64) zSig0 ) zSig0 = LIT64( 0xFFFFFFFFFFFFFFFF );
				3216	shortShift128Left( aSig0, aSig1, 2, &aSig0, &aSig1 );
				3217	mul64To128( zSig0, zSig0, &term0, &term1 );
				3218	sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
				3219	while ( (sbits64) rem0 < 0 ) {
				3220	--zSig0;
				3221	shortShift128Left( 0, zSig0, 1, &term0, &term1 );
				3222	term1 \|= 1;
				3223	add128( rem0, rem1, term0, term1, &rem0, &rem1 );
				3224	}
				3225	shortShift128Left( rem0, rem1, 63, &shiftedRem0, &shiftedRem1 );
				3226	zSig1 = estimateDiv128To64( shiftedRem0, shiftedRem1, zSig0 );
				3227	if ( (bits64) ( zSig1<<1 ) <= 10 ) {
				3228	if ( zSig1 == 0 ) zSig1 = 1;
				3229	mul64To128( zSig0, zSig1, &term1, &term2 );
				3230	shortShift128Left( term1, term2, 1, &term1, &term2 );
				3231	sub128( rem1, 0, term1, term2, &rem1, &rem2 );
				3232	mul64To128( zSig1, zSig1, &term2, &term3 );
				3233	sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
				3234	while ( (sbits64) rem1 < 0 ) {
				3235	--zSig1;
				3236	shortShift192Left( 0, zSig0, zSig1, 1, &term1, &term2, &term3 );
				3237	term3 \|= 1;
				3238	add192(
				3239	rem1, rem2, rem3, term1, term2, term3, &rem1, &rem2, &rem3 );
				3240	}
				3241	zSig1 \|= ( ( rem1 \| rem2 \| rem3 ) != 0 );
				3242	}
				3243	return
				3244	roundAndPackFloatx80(
				3245	floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
				3246
				3247	}
				3248
				3249	/*
				3250	-------------------------------------------------------------------------------
				3251	Returns 1 if the extended double-precision floating-point value `a' is
				3252	equal to the corresponding value `b', and 0 otherwise. The comparison is
				3253	performed according to the IEC/IEEE Standard for Binary Floating-point
				3254	Arithmetic.
				3255	-------------------------------------------------------------------------------
				3256	*/
				3257	flag floatx80_eq( floatx80 a, floatx80 b )
				3258	{
				3259
				3260	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				3261	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				3262	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				3263	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				3264	) {
				3265	if ( floatx80_is_signaling_nan( a )
				3266	\|\| floatx80_is_signaling_nan( b ) ) {
				3267	float_raise( float_flag_invalid );
				3268	}
				3269	return 0;
				3270	}
				3271	return
				3272	( a.low == b.low )
				3273	&& ( ( a.high == b.high )
				3274	\|\| ( ( a.low == 0 )
				3275	&& ( (bits16) ( ( a.high \| b.high )<<1 ) == 0 ) )
				3276	);
				3277
				3278	}
				3279
				3280	/*
				3281	-------------------------------------------------------------------------------
				3282	Returns 1 if the extended double-precision floating-point value `a' is
				3283	less than or equal to the corresponding value `b', and 0 otherwise. The
				3284	comparison is performed according to the IEC/IEEE Standard for Binary
				3285	Floating-point Arithmetic.
				3286	-------------------------------------------------------------------------------
				3287	*/
				3288	flag floatx80_le( floatx80 a, floatx80 b )
				3289	{
				3290	flag aSign, bSign;
				3291
				3292	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				3293	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				3294	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				3295	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				3296	) {
				3297	float_raise( float_flag_invalid );
				3298	return 0;
				3299	}
				3300	aSign = extractFloatx80Sign( a );
				3301	bSign = extractFloatx80Sign( b );
				3302	if ( aSign != bSign ) {
				3303	return
				3304	aSign
				3305	\|\| ( ( ( (bits16) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				3306	== 0 );
				3307	}
				3308	return
				3309	aSign ? le128( b.high, b.low, a.high, a.low )
				3310	: le128( a.high, a.low, b.high, b.low );
				3311
				3312	}
				3313
				3314	/*
				3315	-------------------------------------------------------------------------------
				3316	Returns 1 if the extended double-precision floating-point value `a' is
				3317	less than the corresponding value `b', and 0 otherwise. The comparison
				3318	is performed according to the IEC/IEEE Standard for Binary Floating-point
				3319	Arithmetic.
				3320	-------------------------------------------------------------------------------
				3321	*/
				3322	flag floatx80_lt( floatx80 a, floatx80 b )
				3323	{
				3324	flag aSign, bSign;
				3325
				3326	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				3327	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				3328	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				3329	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				3330	) {
				3331	float_raise( float_flag_invalid );
				3332	return 0;
				3333	}
				3334	aSign = extractFloatx80Sign( a );
				3335	bSign = extractFloatx80Sign( b );
				3336	if ( aSign != bSign ) {
				3337	return
				3338	aSign
				3339	&& ( ( ( (bits16) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				3340	!= 0 );
				3341	}
				3342	return
				3343	aSign ? lt128( b.high, b.low, a.high, a.low )
				3344	: lt128( a.high, a.low, b.high, b.low );
				3345
				3346	}
				3347
				3348	/*
				3349	-------------------------------------------------------------------------------
				3350	Returns 1 if the extended double-precision floating-point value `a' is equal
				3351	to the corresponding value `b', and 0 otherwise. The invalid exception is
				3352	raised if either operand is a NaN. Otherwise, the comparison is performed
				3353	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				3354	-------------------------------------------------------------------------------
				3355	*/
				3356	flag floatx80_eq_signaling( floatx80 a, floatx80 b )
				3357	{
				3358
				3359	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				3360	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				3361	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				3362	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				3363	) {
				3364	float_raise( float_flag_invalid );
				3365	return 0;
				3366	}
				3367	return
				3368	( a.low == b.low )
				3369	&& ( ( a.high == b.high )
				3370	\|\| ( ( a.low == 0 )
				3371	&& ( (bits16) ( ( a.high \| b.high )<<1 ) == 0 ) )
				3372	);
				3373
				3374	}
				3375
				3376	/*
				3377	-------------------------------------------------------------------------------
				3378	Returns 1 if the extended double-precision floating-point value `a' is less
				3379	than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
				3380	do not cause an exception. Otherwise, the comparison is performed according
				3381	to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				3382	-------------------------------------------------------------------------------
				3383	*/
				3384	flag floatx80_le_quiet( floatx80 a, floatx80 b )
				3385	{
				3386	flag aSign, bSign;
				3387
				3388	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				3389	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				3390	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				3391	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				3392	) {
				3393	if ( floatx80_is_signaling_nan( a )
				3394	\|\| floatx80_is_signaling_nan( b ) ) {
				3395	float_raise( float_flag_invalid );
				3396	}
				3397	return 0;
				3398	}
				3399	aSign = extractFloatx80Sign( a );
				3400	bSign = extractFloatx80Sign( b );
				3401	if ( aSign != bSign ) {
				3402	return
				3403	aSign
				3404	\|\| ( ( ( (bits16) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				3405	== 0 );
				3406	}
				3407	return
				3408	aSign ? le128( b.high, b.low, a.high, a.low )
				3409	: le128( a.high, a.low, b.high, b.low );
				3410
				3411	}
				3412
				3413	/*
				3414	-------------------------------------------------------------------------------
				3415	Returns 1 if the extended double-precision floating-point value `a' is less
				3416	than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
				3417	an exception. Otherwise, the comparison is performed according to the
				3418	IEC/IEEE Standard for Binary Floating-point Arithmetic.
				3419	-------------------------------------------------------------------------------
				3420	*/
				3421	flag floatx80_lt_quiet( floatx80 a, floatx80 b )
				3422	{
				3423	flag aSign, bSign;
				3424
				3425	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				3426	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				3427	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				3428	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				3429	) {
				3430	if ( floatx80_is_signaling_nan( a )
				3431	\|\| floatx80_is_signaling_nan( b ) ) {
				3432	float_raise( float_flag_invalid );
				3433	}
				3434	return 0;
				3435	}
				3436	aSign = extractFloatx80Sign( a );
				3437	bSign = extractFloatx80Sign( b );
				3438	if ( aSign != bSign ) {
				3439	return
				3440	aSign
				3441	&& ( ( ( (bits16) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				3442	!= 0 );
				3443	}
				3444	return
				3445	aSign ? lt128( b.high, b.low, a.high, a.low )
				3446	: lt128( a.high, a.low, b.high, b.low );
				3447
				3448	}
				3449
				3450	#endif
				3451