Blame - arch/arm26/nwfpe/softfloat.c - kernel/msm

blob: 26c1b916e5279d40cbb9170aa43fc477d7924c05 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	===============================================================================
				3
				4	This C source file is part of the SoftFloat IEC/IEEE Floating-point
				5	Arithmetic Package, Release 2.
				6
				7	Written by John R. Hauser. This work was made possible in part by the
				8	International Computer Science Institute, located at Suite 600, 1947 Center
				9	Street, Berkeley, California 94704. Funding was partially provided by the
				10	National Science Foundation under grant MIP-9311980. The original version
				11	of this code was written as part of a project to build a fixed-point vector
				12	processor in collaboration with the University of California at Berkeley,
				13	overseen by Profs. Nelson Morgan and John Wawrzynek. More information
				14	is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
				15	arithmetic/softfloat.html'.
				16
				17	THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
				18	has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
				19	TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
				20	PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
				21	AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
				22
				23	Derivative works are acceptable, even for commercial purposes, so long as
				24	(1) they include prominent notice that the work is derivative, and (2) they
				25	include prominent notice akin to these three paragraphs for those parts of
				26	this code that are retained.
				27
				28	===============================================================================
				29	*/
				30
				31	#include "fpa11.h"
				32	#include "milieu.h"
				33	#include "softfloat.h"
				34
				35	/*
				36	-------------------------------------------------------------------------------
				37	Floating-point rounding mode, extended double-precision rounding precision,
				38	and exception flags.
				39	-------------------------------------------------------------------------------
				40	*/
				41	int8 float_rounding_mode = float_round_nearest_even;
				42	int8 floatx80_rounding_precision = 80;
				43	int8 float_exception_flags;
				44
				45	/*
				46	-------------------------------------------------------------------------------
				47	Primitive arithmetic functions, including multi-word arithmetic, and
				48	division and square root approximations. (Can be specialized to target if
				49	desired.)
				50	-------------------------------------------------------------------------------
				51	*/
				52	#include "softfloat-macros"
				53
				54	/*
				55	-------------------------------------------------------------------------------
				56	Functions and definitions to determine: (1) whether tininess for underflow
				57	is detected before or after rounding by default, (2) what (if anything)
				58	happens when exceptions are raised, (3) how signaling NaNs are distinguished
				59	from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
				60	are propagated from function inputs to output. These details are target-
				61	specific.
				62	-------------------------------------------------------------------------------
				63	*/
				64	#include "softfloat-specialize"
				65
				66	/*
				67	-------------------------------------------------------------------------------
				68	Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
				69	and 7, and returns the properly rounded 32-bit integer corresponding to the
				70	input. If `zSign' is nonzero, the input is negated before being converted
				71	to an integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point
				72	input is simply rounded to an integer, with the inexact exception raised if
				73	the input cannot be represented exactly as an integer. If the fixed-point
				74	input is too large, however, the invalid exception is raised and the largest
				75	positive or negative integer is returned.
				76	-------------------------------------------------------------------------------
				77	*/
				78	static int32 roundAndPackInt32( flag zSign, bits64 absZ )
				79	{
				80	int8 roundingMode;
				81	flag roundNearestEven;
				82	int8 roundIncrement, roundBits;
				83	int32 z;
				84
				85	roundingMode = float_rounding_mode;
				86	roundNearestEven = ( roundingMode == float_round_nearest_even );
				87	roundIncrement = 0x40;
				88	if ( ! roundNearestEven ) {
				89	if ( roundingMode == float_round_to_zero ) {
				90	roundIncrement = 0;
				91	}
				92	else {
				93	roundIncrement = 0x7F;
				94	if ( zSign ) {
				95	if ( roundingMode == float_round_up ) roundIncrement = 0;
				96	}
				97	else {
				98	if ( roundingMode == float_round_down ) roundIncrement = 0;
				99	}
				100	}
				101	}
				102	roundBits = absZ & 0x7F;
				103	absZ = ( absZ + roundIncrement )>>7;
				104	absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
				105	z = absZ;
				106	if ( zSign ) z = - z;
				107	if ( ( absZ>>32 ) \|\| ( z && ( ( z < 0 ) ^ zSign ) ) ) {
				108	float_exception_flags \|= float_flag_invalid;
				109	return zSign ? 0x80000000 : 0x7FFFFFFF;
				110	}
				111	if ( roundBits ) float_exception_flags \|= float_flag_inexact;
				112	return z;
				113
				114	}
				115
				116	/*
				117	-------------------------------------------------------------------------------
				118	Returns the fraction bits of the single-precision floating-point value `a'.
				119	-------------------------------------------------------------------------------
				120	*/
				121	INLINE bits32 extractFloat32Frac( float32 a )
				122	{
				123
				124	return a & 0x007FFFFF;
				125
				126	}
				127
				128	/*
				129	-------------------------------------------------------------------------------
				130	Returns the exponent bits of the single-precision floating-point value `a'.
				131	-------------------------------------------------------------------------------
				132	*/
				133	INLINE int16 extractFloat32Exp( float32 a )
				134	{
				135
				136	return ( a>>23 ) & 0xFF;
				137
				138	}
				139
				140	/*
				141	-------------------------------------------------------------------------------
				142	Returns the sign bit of the single-precision floating-point value `a'.
				143	-------------------------------------------------------------------------------
				144	*/
				145	INLINE flag extractFloat32Sign( float32 a )
				146	{
				147
				148	return a>>31;
				149
				150	}
				151
				152	/*
				153	-------------------------------------------------------------------------------
				154	Normalizes the subnormal single-precision floating-point value represented
				155	by the denormalized significand `aSig'. The normalized exponent and
				156	significand are stored at the locations pointed to by `zExpPtr' and
				157	`zSigPtr', respectively.
				158	-------------------------------------------------------------------------------
				159	*/
				160	static void
				161	normalizeFloat32Subnormal( bits32 aSig, int16 zExpPtr, bits32 zSigPtr )
				162	{
				163	int8 shiftCount;
				164
				165	shiftCount = countLeadingZeros32( aSig ) - 8;
				166	*zSigPtr = aSig<<shiftCount;
				167	*zExpPtr = 1 - shiftCount;
				168
				169	}
				170
				171	/*
				172	-------------------------------------------------------------------------------
				173	Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
				174	single-precision floating-point value, returning the result. After being
				175	shifted into the proper positions, the three fields are simply added
				176	together to form the result. This means that any integer portion of `zSig'
				177	will be added into the exponent. Since a properly normalized significand
				178	will have an integer portion equal to 1, the `zExp' input should be 1 less
				179	than the desired result exponent whenever `zSig' is a complete, normalized
				180	significand.
				181	-------------------------------------------------------------------------------
				182	*/
				183	INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
				184	{
				185	#if 0
				186	float32 f;
				187	__asm__("@ packFloat32; \n\
				188	mov %0, %1, asl #31; \n\
				189	orr %0, %2, asl #23; \n\
				190	orr %0, %3"
				191	: /* no outputs */
				192	: "g" (f), "g" (zSign), "g" (zExp), "g" (zSig)
				193	: "cc");
				194	return f;
				195	#else
				196	return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
				197	#endif
				198	}
				199
				200	/*
				201	-------------------------------------------------------------------------------
				202	Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				203	and significand `zSig', and returns the proper single-precision floating-
				204	point value corresponding to the abstract input. Ordinarily, the abstract
				205	value is simply rounded and packed into the single-precision format, with
				206	the inexact exception raised if the abstract input cannot be represented
				207	exactly. If the abstract value is too large, however, the overflow and
				208	inexact exceptions are raised and an infinity or maximal finite value is
				209	returned. If the abstract value is too small, the input value is rounded to
				210	a subnormal number, and the underflow and inexact exceptions are raised if
				211	the abstract input cannot be represented exactly as a subnormal single-
				212	precision floating-point number.
				213	The input significand `zSig' has its binary point between bits 30
				214	and 29, which is 7 bits to the left of the usual location. This shifted
				215	significand must be normalized or smaller. If `zSig' is not normalized,
				216	`zExp' must be 0; in that case, the result returned is a subnormal number,
				217	and it must not require rounding. In the usual case that `zSig' is
				218	normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
				219	The handling of underflow and overflow follows the IEC/IEEE Standard for
				220	Binary Floating-point Arithmetic.
				221	-------------------------------------------------------------------------------
				222	*/
				223	static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
				224	{
				225	int8 roundingMode;
				226	flag roundNearestEven;
				227	int8 roundIncrement, roundBits;
				228	flag isTiny;
				229
				230	roundingMode = float_rounding_mode;
				231	roundNearestEven = ( roundingMode == float_round_nearest_even );
				232	roundIncrement = 0x40;
				233	if ( ! roundNearestEven ) {
				234	if ( roundingMode == float_round_to_zero ) {
				235	roundIncrement = 0;
				236	}
				237	else {
				238	roundIncrement = 0x7F;
				239	if ( zSign ) {
				240	if ( roundingMode == float_round_up ) roundIncrement = 0;
				241	}
				242	else {
				243	if ( roundingMode == float_round_down ) roundIncrement = 0;
				244	}
				245	}
				246	}
				247	roundBits = zSig & 0x7F;
				248	if ( 0xFD <= (bits16) zExp ) {
				249	if ( ( 0xFD < zExp )
				250	\|\| ( ( zExp == 0xFD )
				251	&& ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
				252	) {
				253	float_raise( float_flag_overflow \| float_flag_inexact );
				254	return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
				255	}
				256	if ( zExp < 0 ) {
				257	isTiny =
				258	( float_detect_tininess == float_tininess_before_rounding )
				259	\|\| ( zExp < -1 )
				260	\|\| ( zSig + roundIncrement < 0x80000000 );
				261	shift32RightJamming( zSig, - zExp, &zSig );
				262	zExp = 0;
				263	roundBits = zSig & 0x7F;
				264	if ( isTiny && roundBits ) float_raise( float_flag_underflow );
				265	}
				266	}
				267	if ( roundBits ) float_exception_flags \|= float_flag_inexact;
				268	zSig = ( zSig + roundIncrement )>>7;
				269	zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
				270	if ( zSig == 0 ) zExp = 0;
				271	return packFloat32( zSign, zExp, zSig );
				272
				273	}
				274
				275	/*
				276	-------------------------------------------------------------------------------
				277	Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				278	and significand `zSig', and returns the proper single-precision floating-
				279	point value corresponding to the abstract input. This routine is just like
				280	`roundAndPackFloat32' except that `zSig' does not have to be normalized in
				281	any way. In all cases, `zExp' must be 1 less than the ``true'' floating-
				282	point exponent.
				283	-------------------------------------------------------------------------------
				284	*/
				285	static float32
				286	normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
				287	{
				288	int8 shiftCount;
				289
				290	shiftCount = countLeadingZeros32( zSig ) - 1;
				291	return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
				292
				293	}
				294
				295	/*
				296	-------------------------------------------------------------------------------
				297	Returns the fraction bits of the double-precision floating-point value `a'.
				298	-------------------------------------------------------------------------------
				299	*/
				300	INLINE bits64 extractFloat64Frac( float64 a )
				301	{
				302
				303	return a & LIT64( 0x000FFFFFFFFFFFFF );
				304
				305	}
				306
				307	/*
				308	-------------------------------------------------------------------------------
				309	Returns the exponent bits of the double-precision floating-point value `a'.
				310	-------------------------------------------------------------------------------
				311	*/
				312	INLINE int16 extractFloat64Exp( float64 a )
				313	{
				314
				315	return ( a>>52 ) & 0x7FF;
				316
				317	}
				318
				319	/*
				320	-------------------------------------------------------------------------------
				321	Returns the sign bit of the double-precision floating-point value `a'.
				322	-------------------------------------------------------------------------------
				323	*/
				324	INLINE flag extractFloat64Sign( float64 a )
				325	{
				326
				327	return a>>63;
				328
				329	}
				330
				331	/*
				332	-------------------------------------------------------------------------------
				333	Normalizes the subnormal double-precision floating-point value represented
				334	by the denormalized significand `aSig'. The normalized exponent and
				335	significand are stored at the locations pointed to by `zExpPtr' and
				336	`zSigPtr', respectively.
				337	-------------------------------------------------------------------------------
				338	*/
				339	static void
				340	normalizeFloat64Subnormal( bits64 aSig, int16 zExpPtr, bits64 zSigPtr )
				341	{
				342	int8 shiftCount;
				343
				344	shiftCount = countLeadingZeros64( aSig ) - 11;
				345	*zSigPtr = aSig<<shiftCount;
				346	*zExpPtr = 1 - shiftCount;
				347
				348	}
				349
				350	/*
				351	-------------------------------------------------------------------------------
				352	Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
				353	double-precision floating-point value, returning the result. After being
				354	shifted into the proper positions, the three fields are simply added
				355	together to form the result. This means that any integer portion of `zSig'
				356	will be added into the exponent. Since a properly normalized significand
				357	will have an integer portion equal to 1, the `zExp' input should be 1 less
				358	than the desired result exponent whenever `zSig' is a complete, normalized
				359	significand.
				360	-------------------------------------------------------------------------------
				361	*/
				362	INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
				363	{
				364
				365	return ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig;
				366
				367	}
				368
				369	/*
				370	-------------------------------------------------------------------------------
				371	Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				372	and significand `zSig', and returns the proper double-precision floating-
				373	point value corresponding to the abstract input. Ordinarily, the abstract
				374	value is simply rounded and packed into the double-precision format, with
				375	the inexact exception raised if the abstract input cannot be represented
				376	exactly. If the abstract value is too large, however, the overflow and
				377	inexact exceptions are raised and an infinity or maximal finite value is
				378	returned. If the abstract value is too small, the input value is rounded to
				379	a subnormal number, and the underflow and inexact exceptions are raised if
				380	the abstract input cannot be represented exactly as a subnormal double-
				381	precision floating-point number.
				382	The input significand `zSig' has its binary point between bits 62
				383	and 61, which is 10 bits to the left of the usual location. This shifted
				384	significand must be normalized or smaller. If `zSig' is not normalized,
				385	`zExp' must be 0; in that case, the result returned is a subnormal number,
				386	and it must not require rounding. In the usual case that `zSig' is
				387	normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
				388	The handling of underflow and overflow follows the IEC/IEEE Standard for
				389	Binary Floating-point Arithmetic.
				390	-------------------------------------------------------------------------------
				391	*/
				392	static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
				393	{
				394	int8 roundingMode;
				395	flag roundNearestEven;
				396	int16 roundIncrement, roundBits;
				397	flag isTiny;
				398
				399	roundingMode = float_rounding_mode;
				400	roundNearestEven = ( roundingMode == float_round_nearest_even );
				401	roundIncrement = 0x200;
				402	if ( ! roundNearestEven ) {
				403	if ( roundingMode == float_round_to_zero ) {
				404	roundIncrement = 0;
				405	}
				406	else {
				407	roundIncrement = 0x3FF;
				408	if ( zSign ) {
				409	if ( roundingMode == float_round_up ) roundIncrement = 0;
				410	}
				411	else {
				412	if ( roundingMode == float_round_down ) roundIncrement = 0;
				413	}
				414	}
				415	}
				416	roundBits = zSig & 0x3FF;
				417	if ( 0x7FD <= (bits16) zExp ) {
				418	if ( ( 0x7FD < zExp )
				419	\|\| ( ( zExp == 0x7FD )
				420	&& ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
				421	) {
				422	//register int lr = __builtin_return_address(0);
				423	//printk("roundAndPackFloat64 called from 0x%08x\n",lr);
				424	float_raise( float_flag_overflow \| float_flag_inexact );
				425	return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );
				426	}
				427	if ( zExp < 0 ) {
				428	isTiny =
				429	( float_detect_tininess == float_tininess_before_rounding )
				430	\|\| ( zExp < -1 )
				431	\|\| ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
				432	shift64RightJamming( zSig, - zExp, &zSig );
				433	zExp = 0;
				434	roundBits = zSig & 0x3FF;
				435	if ( isTiny && roundBits ) float_raise( float_flag_underflow );
				436	}
				437	}
				438	if ( roundBits ) float_exception_flags \|= float_flag_inexact;
				439	zSig = ( zSig + roundIncrement )>>10;
				440	zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
				441	if ( zSig == 0 ) zExp = 0;
				442	return packFloat64( zSign, zExp, zSig );
				443
				444	}
				445
				446	/*
				447	-------------------------------------------------------------------------------
				448	Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				449	and significand `zSig', and returns the proper double-precision floating-
				450	point value corresponding to the abstract input. This routine is just like
				451	`roundAndPackFloat64' except that `zSig' does not have to be normalized in
				452	any way. In all cases, `zExp' must be 1 less than the ``true'' floating-
				453	point exponent.
				454	-------------------------------------------------------------------------------
				455	*/
				456	static float64
				457	normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
				458	{
				459	int8 shiftCount;
				460
				461	shiftCount = countLeadingZeros64( zSig ) - 1;
				462	return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
				463
				464	}
				465
				466	#ifdef FLOATX80
				467
				468	/*
				469	-------------------------------------------------------------------------------
				470	Returns the fraction bits of the extended double-precision floating-point
				471	value `a'.
				472	-------------------------------------------------------------------------------
				473	*/
				474	INLINE bits64 extractFloatx80Frac( floatx80 a )
				475	{
				476
				477	return a.low;
				478
				479	}
				480
				481	/*
				482	-------------------------------------------------------------------------------
				483	Returns the exponent bits of the extended double-precision floating-point
				484	value `a'.
				485	-------------------------------------------------------------------------------
				486	*/
				487	INLINE int32 extractFloatx80Exp( floatx80 a )
				488	{
				489
				490	return a.high & 0x7FFF;
				491
				492	}
				493
				494	/*
				495	-------------------------------------------------------------------------------
				496	Returns the sign bit of the extended double-precision floating-point value
				497	`a'.
				498	-------------------------------------------------------------------------------
				499	*/
				500	INLINE flag extractFloatx80Sign( floatx80 a )
				501	{
				502
				503	return a.high>>15;
				504
				505	}
				506
				507	/*
				508	-------------------------------------------------------------------------------
				509	Normalizes the subnormal extended double-precision floating-point value
				510	represented by the denormalized significand `aSig'. The normalized exponent
				511	and significand are stored at the locations pointed to by `zExpPtr' and
				512	`zSigPtr', respectively.
				513	-------------------------------------------------------------------------------
				514	*/
				515	static void
				516	normalizeFloatx80Subnormal( bits64 aSig, int32 zExpPtr, bits64 zSigPtr )
				517	{
				518	int8 shiftCount;
				519
				520	shiftCount = countLeadingZeros64( aSig );
				521	*zSigPtr = aSig<<shiftCount;
				522	*zExpPtr = 1 - shiftCount;
				523
				524	}
				525
				526	/*
				527	-------------------------------------------------------------------------------
				528	Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
				529	extended double-precision floating-point value, returning the result.
				530	-------------------------------------------------------------------------------
				531	*/
				532	INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
				533	{
				534	floatx80 z;
				535
				536	z.low = zSig;
				537	z.high = ( ( (bits16) zSign )<<15 ) + zExp;
				538	return z;
				539
				540	}
				541
				542	/*
				543	-------------------------------------------------------------------------------
				544	Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				545	and extended significand formed by the concatenation of `zSig0' and `zSig1',
				546	and returns the proper extended double-precision floating-point value
				547	corresponding to the abstract input. Ordinarily, the abstract value is
				548	rounded and packed into the extended double-precision format, with the
				549	inexact exception raised if the abstract input cannot be represented
				550	exactly. If the abstract value is too large, however, the overflow and
				551	inexact exceptions are raised and an infinity or maximal finite value is
				552	returned. If the abstract value is too small, the input value is rounded to
				553	a subnormal number, and the underflow and inexact exceptions are raised if
				554	the abstract input cannot be represented exactly as a subnormal extended
				555	double-precision floating-point number.
				556	If `roundingPrecision' is 32 or 64, the result is rounded to the same
				557	number of bits as single or double precision, respectively. Otherwise, the
				558	result is rounded to the full precision of the extended double-precision
				559	format.
				560	The input significand must be normalized or smaller. If the input
				561	significand is not normalized, `zExp' must be 0; in that case, the result
				562	returned is a subnormal number, and it must not require rounding. The
				563	handling of underflow and overflow follows the IEC/IEEE Standard for Binary
				564	Floating-point Arithmetic.
				565	-------------------------------------------------------------------------------
				566	*/
				567	static floatx80
				568	roundAndPackFloatx80(
				569	int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
				570	)
				571	{
				572	int8 roundingMode;
				573	flag roundNearestEven, increment, isTiny;
				574	int64 roundIncrement, roundMask, roundBits;
				575
				576	roundingMode = float_rounding_mode;
				577	roundNearestEven = ( roundingMode == float_round_nearest_even );
				578	if ( roundingPrecision == 80 ) goto precision80;
				579	if ( roundingPrecision == 64 ) {
				580	roundIncrement = LIT64( 0x0000000000000400 );
				581	roundMask = LIT64( 0x00000000000007FF );
				582	}
				583	else if ( roundingPrecision == 32 ) {
				584	roundIncrement = LIT64( 0x0000008000000000 );
				585	roundMask = LIT64( 0x000000FFFFFFFFFF );
				586	}
				587	else {
				588	goto precision80;
				589	}
				590	zSig0 \|= ( zSig1 != 0 );
				591	if ( ! roundNearestEven ) {
				592	if ( roundingMode == float_round_to_zero ) {
				593	roundIncrement = 0;
				594	}
				595	else {
				596	roundIncrement = roundMask;
				597	if ( zSign ) {
				598	if ( roundingMode == float_round_up ) roundIncrement = 0;
				599	}
				600	else {
				601	if ( roundingMode == float_round_down ) roundIncrement = 0;
				602	}
				603	}
				604	}
				605	roundBits = zSig0 & roundMask;
				606	if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
				607	if ( ( 0x7FFE < zExp )
				608	\|\| ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
				609	) {
				610	goto overflow;
				611	}
				612	if ( zExp <= 0 ) {
				613	isTiny =
				614	( float_detect_tininess == float_tininess_before_rounding )
				615	\|\| ( zExp < 0 )
				616	\|\| ( zSig0 <= zSig0 + roundIncrement );
				617	shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
				618	zExp = 0;
				619	roundBits = zSig0 & roundMask;
				620	if ( isTiny && roundBits ) float_raise( float_flag_underflow );
				621	if ( roundBits ) float_exception_flags \|= float_flag_inexact;
				622	zSig0 += roundIncrement;
				623	if ( (sbits64) zSig0 < 0 ) zExp = 1;
				624	roundIncrement = roundMask + 1;
				625	if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
				626	roundMask \|= roundIncrement;
				627	}
				628	zSig0 &= ~ roundMask;
				629	return packFloatx80( zSign, zExp, zSig0 );
				630	}
				631	}
				632	if ( roundBits ) float_exception_flags \|= float_flag_inexact;
				633	zSig0 += roundIncrement;
				634	if ( zSig0 < roundIncrement ) {
				635	++zExp;
				636	zSig0 = LIT64( 0x8000000000000000 );
				637	}
				638	roundIncrement = roundMask + 1;
				639	if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
				640	roundMask \|= roundIncrement;
				641	}
				642	zSig0 &= ~ roundMask;
				643	if ( zSig0 == 0 ) zExp = 0;
				644	return packFloatx80( zSign, zExp, zSig0 );
				645	precision80:
				646	increment = ( (sbits64) zSig1 < 0 );
				647	if ( ! roundNearestEven ) {
				648	if ( roundingMode == float_round_to_zero ) {
				649	increment = 0;
				650	}
				651	else {
				652	if ( zSign ) {
				653	increment = ( roundingMode == float_round_down ) && zSig1;
				654	}
				655	else {
				656	increment = ( roundingMode == float_round_up ) && zSig1;
				657	}
				658	}
				659	}
				660	if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
				661	if ( ( 0x7FFE < zExp )
				662	\|\| ( ( zExp == 0x7FFE )
				663	&& ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
				664	&& increment
				665	)
				666	) {
				667	roundMask = 0;
				668	overflow:
				669	float_raise( float_flag_overflow \| float_flag_inexact );
				670	if ( ( roundingMode == float_round_to_zero )
				671	\|\| ( zSign && ( roundingMode == float_round_up ) )
				672	\|\| ( ! zSign && ( roundingMode == float_round_down ) )
				673	) {
				674	return packFloatx80( zSign, 0x7FFE, ~ roundMask );
				675	}
				676	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				677	}
				678	if ( zExp <= 0 ) {
				679	isTiny =
				680	( float_detect_tininess == float_tininess_before_rounding )
				681	\|\| ( zExp < 0 )
				682	\|\| ! increment
				683	\|\| ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
				684	shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
				685	zExp = 0;
				686	if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
				687	if ( zSig1 ) float_exception_flags \|= float_flag_inexact;
				688	if ( roundNearestEven ) {
				689	increment = ( (sbits64) zSig1 < 0 );
				690	}
				691	else {
				692	if ( zSign ) {
				693	increment = ( roundingMode == float_round_down ) && zSig1;
				694	}
				695	else {
				696	increment = ( roundingMode == float_round_up ) && zSig1;
				697	}
				698	}
				699	if ( increment ) {
				700	++zSig0;
				701	zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );
				702	if ( (sbits64) zSig0 < 0 ) zExp = 1;
				703	}
				704	return packFloatx80( zSign, zExp, zSig0 );
				705	}
				706	}
				707	if ( zSig1 ) float_exception_flags \|= float_flag_inexact;
				708	if ( increment ) {
				709	++zSig0;
				710	if ( zSig0 == 0 ) {
				711	++zExp;
				712	zSig0 = LIT64( 0x8000000000000000 );
				713	}
				714	else {
				715	zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );
				716	}
				717	}
				718	else {
				719	if ( zSig0 == 0 ) zExp = 0;
				720	}
				721
				722	return packFloatx80( zSign, zExp, zSig0 );
				723	}
				724
				725	/*
				726	-------------------------------------------------------------------------------
				727	Takes an abstract floating-point value having sign `zSign', exponent
				728	`zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
				729	and returns the proper extended double-precision floating-point value
				730	corresponding to the abstract input. This routine is just like
				731	`roundAndPackFloatx80' except that the input significand does not have to be
				732	normalized.
				733	-------------------------------------------------------------------------------
				734	*/
				735	static floatx80
				736	normalizeRoundAndPackFloatx80(
				737	int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
				738	)
				739	{
				740	int8 shiftCount;
				741
				742	if ( zSig0 == 0 ) {
				743	zSig0 = zSig1;
				744	zSig1 = 0;
				745	zExp -= 64;
				746	}
				747	shiftCount = countLeadingZeros64( zSig0 );
				748	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
				749	zExp -= shiftCount;
				750	return
				751	roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
				752
				753	}
				754
				755	#endif
				756
				757	/*
				758	-------------------------------------------------------------------------------
				759	Returns the result of converting the 32-bit two's complement integer `a' to
				760	the single-precision floating-point format. The conversion is performed
				761	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				762	-------------------------------------------------------------------------------
				763	*/
				764	float32 int32_to_float32( int32 a )
				765	{
				766	flag zSign;
				767
				768	if ( a == 0 ) return 0;
				769	if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
				770	zSign = ( a < 0 );
				771	return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
				772
				773	}
				774
				775	/*
				776	-------------------------------------------------------------------------------
				777	Returns the result of converting the 32-bit two's complement integer `a' to
				778	the double-precision floating-point format. The conversion is performed
				779	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				780	-------------------------------------------------------------------------------
				781	*/
				782	float64 int32_to_float64( int32 a )
				783	{
				784	flag aSign;
				785	uint32 absA;
				786	int8 shiftCount;
				787	bits64 zSig;
				788
				789	if ( a == 0 ) return 0;
				790	aSign = ( a < 0 );
				791	absA = aSign ? - a : a;
				792	shiftCount = countLeadingZeros32( absA ) + 21;
				793	zSig = absA;
				794	return packFloat64( aSign, 0x432 - shiftCount, zSig<<shiftCount );
				795
				796	}
				797
				798	#ifdef FLOATX80
				799
				800	/*
				801	-------------------------------------------------------------------------------
				802	Returns the result of converting the 32-bit two's complement integer `a'
				803	to the extended double-precision floating-point format. The conversion
				804	is performed according to the IEC/IEEE Standard for Binary Floating-point
				805	Arithmetic.
				806	-------------------------------------------------------------------------------
				807	*/
				808	floatx80 int32_to_floatx80( int32 a )
				809	{
				810	flag zSign;
				811	uint32 absA;
				812	int8 shiftCount;
				813	bits64 zSig;
				814
				815	if ( a == 0 ) return packFloatx80( 0, 0, 0 );
				816	zSign = ( a < 0 );
				817	absA = zSign ? - a : a;
				818	shiftCount = countLeadingZeros32( absA ) + 32;
				819	zSig = absA;
				820	return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
				821
				822	}
				823
				824	#endif
				825
				826	/*
				827	-------------------------------------------------------------------------------
				828	Returns the result of converting the single-precision floating-point value
				829	`a' to the 32-bit two's complement integer format. The conversion is
				830	performed according to the IEC/IEEE Standard for Binary Floating-point
				831	Arithmetic---which means in particular that the conversion is rounded
				832	according to the current rounding mode. If `a' is a NaN, the largest
				833	positive integer is returned. Otherwise, if the conversion overflows, the
				834	largest integer with the same sign as `a' is returned.
				835	-------------------------------------------------------------------------------
				836	*/
				837	int32 float32_to_int32( float32 a )
				838	{
				839	flag aSign;
				840	int16 aExp, shiftCount;
				841	bits32 aSig;
				842	bits64 zSig;
				843
				844	aSig = extractFloat32Frac( a );
				845	aExp = extractFloat32Exp( a );
				846	aSign = extractFloat32Sign( a );
				847	if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
				848	if ( aExp ) aSig \|= 0x00800000;
				849	shiftCount = 0xAF - aExp;
				850	zSig = aSig;
				851	zSig <<= 32;
				852	if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig );
				853	return roundAndPackInt32( aSign, zSig );
				854
				855	}
				856
				857	/*
				858	-------------------------------------------------------------------------------
				859	Returns the result of converting the single-precision floating-point value
				860	`a' to the 32-bit two's complement integer format. The conversion is
				861	performed according to the IEC/IEEE Standard for Binary Floating-point
				862	Arithmetic, except that the conversion is always rounded toward zero. If
				863	`a' is a NaN, the largest positive integer is returned. Otherwise, if the
				864	conversion overflows, the largest integer with the same sign as `a' is
				865	returned.
				866	-------------------------------------------------------------------------------
				867	*/
				868	int32 float32_to_int32_round_to_zero( float32 a )
				869	{
				870	flag aSign;
				871	int16 aExp, shiftCount;
				872	bits32 aSig;
				873	int32 z;
				874
				875	aSig = extractFloat32Frac( a );
				876	aExp = extractFloat32Exp( a );
				877	aSign = extractFloat32Sign( a );
				878	shiftCount = aExp - 0x9E;
				879	if ( 0 <= shiftCount ) {
				880	if ( a == 0xCF000000 ) return 0x80000000;
				881	float_raise( float_flag_invalid );
				882	if ( ! aSign \|\| ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
				883	return 0x80000000;
				884	}
				885	else if ( aExp <= 0x7E ) {
				886	if ( aExp \| aSig ) float_exception_flags \|= float_flag_inexact;
				887	return 0;
				888	}
				889	aSig = ( aSig \| 0x00800000 )<<8;
				890	z = aSig>>( - shiftCount );
				891	if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
				892	float_exception_flags \|= float_flag_inexact;
				893	}
				894	return aSign ? - z : z;
				895
				896	}
				897
				898	/*
				899	-------------------------------------------------------------------------------
				900	Returns the result of converting the single-precision floating-point value
				901	`a' to the double-precision floating-point format. The conversion is
				902	performed according to the IEC/IEEE Standard for Binary Floating-point
				903	Arithmetic.
				904	-------------------------------------------------------------------------------
				905	*/
				906	float64 float32_to_float64( float32 a )
				907	{
				908	flag aSign;
				909	int16 aExp;
				910	bits32 aSig;
				911
				912	aSig = extractFloat32Frac( a );
				913	aExp = extractFloat32Exp( a );
				914	aSign = extractFloat32Sign( a );
				915	if ( aExp == 0xFF ) {
				916	if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
				917	return packFloat64( aSign, 0x7FF, 0 );
				918	}
				919	if ( aExp == 0 ) {
				920	if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
				921	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				922	--aExp;
				923	}
				924	return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
				925
				926	}
				927
				928	#ifdef FLOATX80
				929
				930	/*
				931	-------------------------------------------------------------------------------
				932	Returns the result of converting the single-precision floating-point value
				933	`a' to the extended double-precision floating-point format. The conversion
				934	is performed according to the IEC/IEEE Standard for Binary Floating-point
				935	Arithmetic.
				936	-------------------------------------------------------------------------------
				937	*/
				938	floatx80 float32_to_floatx80( float32 a )
				939	{
				940	flag aSign;
				941	int16 aExp;
				942	bits32 aSig;
				943
				944	aSig = extractFloat32Frac( a );
				945	aExp = extractFloat32Exp( a );
				946	aSign = extractFloat32Sign( a );
				947	if ( aExp == 0xFF ) {
				948	if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
				949	return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				950	}
				951	if ( aExp == 0 ) {
				952	if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
				953	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				954	}
				955	aSig \|= 0x00800000;
				956	return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
				957
				958	}
				959
				960	#endif
				961
				962	/*
				963	-------------------------------------------------------------------------------
				964	Rounds the single-precision floating-point value `a' to an integer, and
				965	returns the result as a single-precision floating-point value. The
				966	operation is performed according to the IEC/IEEE Standard for Binary
				967	Floating-point Arithmetic.
				968	-------------------------------------------------------------------------------
				969	*/
				970	float32 float32_round_to_int( float32 a )
				971	{
				972	flag aSign;
				973	int16 aExp;
				974	bits32 lastBitMask, roundBitsMask;
				975	int8 roundingMode;
				976	float32 z;
				977
				978	aExp = extractFloat32Exp( a );
				979	if ( 0x96 <= aExp ) {
				980	if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
				981	return propagateFloat32NaN( a, a );
				982	}
				983	return a;
				984	}
				985	if ( aExp <= 0x7E ) {
				986	if ( (bits32) ( a<<1 ) == 0 ) return a;
				987	float_exception_flags \|= float_flag_inexact;
				988	aSign = extractFloat32Sign( a );
				989	switch ( float_rounding_mode ) {
				990	case float_round_nearest_even:
				991	if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
				992	return packFloat32( aSign, 0x7F, 0 );
				993	}
				994	break;
				995	case float_round_down:
				996	return aSign ? 0xBF800000 : 0;
				997	case float_round_up:
				998	return aSign ? 0x80000000 : 0x3F800000;
				999	}
				1000	return packFloat32( aSign, 0, 0 );
				1001	}
				1002	lastBitMask = 1;
				1003	lastBitMask <<= 0x96 - aExp;
				1004	roundBitsMask = lastBitMask - 1;
				1005	z = a;
				1006	roundingMode = float_rounding_mode;
				1007	if ( roundingMode == float_round_nearest_even ) {
				1008	z += lastBitMask>>1;
				1009	if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
				1010	}
				1011	else if ( roundingMode != float_round_to_zero ) {
				1012	if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
				1013	z += roundBitsMask;
				1014	}
				1015	}
				1016	z &= ~ roundBitsMask;
				1017	if ( z != a ) float_exception_flags \|= float_flag_inexact;
				1018	return z;
				1019
				1020	}
				1021
				1022	/*
				1023	-------------------------------------------------------------------------------
				1024	Returns the result of adding the absolute values of the single-precision
				1025	floating-point values `a' and `b'. If `zSign' is true, the sum is negated
				1026	before being returned. `zSign' is ignored if the result is a NaN. The
				1027	addition is performed according to the IEC/IEEE Standard for Binary
				1028	Floating-point Arithmetic.
				1029	-------------------------------------------------------------------------------
				1030	*/
				1031	static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
				1032	{
				1033	int16 aExp, bExp, zExp;
				1034	bits32 aSig, bSig, zSig;
				1035	int16 expDiff;
				1036
				1037	aSig = extractFloat32Frac( a );
				1038	aExp = extractFloat32Exp( a );
				1039	bSig = extractFloat32Frac( b );
				1040	bExp = extractFloat32Exp( b );
				1041	expDiff = aExp - bExp;
				1042	aSig <<= 6;
				1043	bSig <<= 6;
				1044	if ( 0 < expDiff ) {
				1045	if ( aExp == 0xFF ) {
				1046	if ( aSig ) return propagateFloat32NaN( a, b );
				1047	return a;
				1048	}
				1049	if ( bExp == 0 ) {
				1050	--expDiff;
				1051	}
				1052	else {
				1053	bSig \|= 0x20000000;
				1054	}
				1055	shift32RightJamming( bSig, expDiff, &bSig );
				1056	zExp = aExp;
				1057	}
				1058	else if ( expDiff < 0 ) {
				1059	if ( bExp == 0xFF ) {
				1060	if ( bSig ) return propagateFloat32NaN( a, b );
				1061	return packFloat32( zSign, 0xFF, 0 );
				1062	}
				1063	if ( aExp == 0 ) {
				1064	++expDiff;
				1065	}
				1066	else {
				1067	aSig \|= 0x20000000;
				1068	}
				1069	shift32RightJamming( aSig, - expDiff, &aSig );
				1070	zExp = bExp;
				1071	}
				1072	else {
				1073	if ( aExp == 0xFF ) {
				1074	if ( aSig \| bSig ) return propagateFloat32NaN( a, b );
				1075	return a;
				1076	}
				1077	if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
				1078	zSig = 0x40000000 + aSig + bSig;
				1079	zExp = aExp;
				1080	goto roundAndPack;
				1081	}
				1082	aSig \|= 0x20000000;
				1083	zSig = ( aSig + bSig )<<1;
				1084	--zExp;
				1085	if ( (sbits32) zSig < 0 ) {
				1086	zSig = aSig + bSig;
				1087	++zExp;
				1088	}
				1089	roundAndPack:
				1090	return roundAndPackFloat32( zSign, zExp, zSig );
				1091
				1092	}
				1093
				1094	/*
				1095	-------------------------------------------------------------------------------
				1096	Returns the result of subtracting the absolute values of the single-
				1097	precision floating-point values `a' and `b'. If `zSign' is true, the
				1098	difference is negated before being returned. `zSign' is ignored if the
				1099	result is a NaN. The subtraction is performed according to the IEC/IEEE
				1100	Standard for Binary Floating-point Arithmetic.
				1101	-------------------------------------------------------------------------------
				1102	*/
				1103	static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
				1104	{
				1105	int16 aExp, bExp, zExp;
				1106	bits32 aSig, bSig, zSig;
				1107	int16 expDiff;
				1108
				1109	aSig = extractFloat32Frac( a );
				1110	aExp = extractFloat32Exp( a );
				1111	bSig = extractFloat32Frac( b );
				1112	bExp = extractFloat32Exp( b );
				1113	expDiff = aExp - bExp;
				1114	aSig <<= 7;
				1115	bSig <<= 7;
				1116	if ( 0 < expDiff ) goto aExpBigger;
				1117	if ( expDiff < 0 ) goto bExpBigger;
				1118	if ( aExp == 0xFF ) {
				1119	if ( aSig \| bSig ) return propagateFloat32NaN( a, b );
				1120	float_raise( float_flag_invalid );
				1121	return float32_default_nan;
				1122	}
				1123	if ( aExp == 0 ) {
				1124	aExp = 1;
				1125	bExp = 1;
				1126	}
				1127	if ( bSig < aSig ) goto aBigger;
				1128	if ( aSig < bSig ) goto bBigger;
				1129	return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
				1130	bExpBigger:
				1131	if ( bExp == 0xFF ) {
				1132	if ( bSig ) return propagateFloat32NaN( a, b );
				1133	return packFloat32( zSign ^ 1, 0xFF, 0 );
				1134	}
				1135	if ( aExp == 0 ) {
				1136	++expDiff;
				1137	}
				1138	else {
				1139	aSig \|= 0x40000000;
				1140	}
				1141	shift32RightJamming( aSig, - expDiff, &aSig );
				1142	bSig \|= 0x40000000;
				1143	bBigger:
				1144	zSig = bSig - aSig;
				1145	zExp = bExp;
				1146	zSign ^= 1;
				1147	goto normalizeRoundAndPack;
				1148	aExpBigger:
				1149	if ( aExp == 0xFF ) {
				1150	if ( aSig ) return propagateFloat32NaN( a, b );
				1151	return a;
				1152	}
				1153	if ( bExp == 0 ) {
				1154	--expDiff;
				1155	}
				1156	else {
				1157	bSig \|= 0x40000000;
				1158	}
				1159	shift32RightJamming( bSig, expDiff, &bSig );
				1160	aSig \|= 0x40000000;
				1161	aBigger:
				1162	zSig = aSig - bSig;
				1163	zExp = aExp;
				1164	normalizeRoundAndPack:
				1165	--zExp;
				1166	return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
				1167
				1168	}
				1169
				1170	/*
				1171	-------------------------------------------------------------------------------
				1172	Returns the result of adding the single-precision floating-point values `a'
				1173	and `b'. The operation is performed according to the IEC/IEEE Standard for
				1174	Binary Floating-point Arithmetic.
				1175	-------------------------------------------------------------------------------
				1176	*/
				1177	float32 float32_add( float32 a, float32 b )
				1178	{
				1179	flag aSign, bSign;
				1180
				1181	aSign = extractFloat32Sign( a );
				1182	bSign = extractFloat32Sign( b );
				1183	if ( aSign == bSign ) {
				1184	return addFloat32Sigs( a, b, aSign );
				1185	}
				1186	else {
				1187	return subFloat32Sigs( a, b, aSign );
				1188	}
				1189
				1190	}
				1191
				1192	/*
				1193	-------------------------------------------------------------------------------
				1194	Returns the result of subtracting the single-precision floating-point values
				1195	`a' and `b'. The operation is performed according to the IEC/IEEE Standard
				1196	for Binary Floating-point Arithmetic.
				1197	-------------------------------------------------------------------------------
				1198	*/
				1199	float32 float32_sub( float32 a, float32 b )
				1200	{
				1201	flag aSign, bSign;
				1202
				1203	aSign = extractFloat32Sign( a );
				1204	bSign = extractFloat32Sign( b );
				1205	if ( aSign == bSign ) {
				1206	return subFloat32Sigs( a, b, aSign );
				1207	}
				1208	else {
				1209	return addFloat32Sigs( a, b, aSign );
				1210	}
				1211
				1212	}
				1213
				1214	/*
				1215	-------------------------------------------------------------------------------
				1216	Returns the result of multiplying the single-precision floating-point values
				1217	`a' and `b'. The operation is performed according to the IEC/IEEE Standard
				1218	for Binary Floating-point Arithmetic.
				1219	-------------------------------------------------------------------------------
				1220	*/
				1221	float32 float32_mul( float32 a, float32 b )
				1222	{
				1223	flag aSign, bSign, zSign;
				1224	int16 aExp, bExp, zExp;
				1225	bits32 aSig, bSig;
				1226	bits64 zSig64;
				1227	bits32 zSig;
				1228
				1229	aSig = extractFloat32Frac( a );
				1230	aExp = extractFloat32Exp( a );
				1231	aSign = extractFloat32Sign( a );
				1232	bSig = extractFloat32Frac( b );
				1233	bExp = extractFloat32Exp( b );
				1234	bSign = extractFloat32Sign( b );
				1235	zSign = aSign ^ bSign;
				1236	if ( aExp == 0xFF ) {
				1237	if ( aSig \|\| ( ( bExp == 0xFF ) && bSig ) ) {
				1238	return propagateFloat32NaN( a, b );
				1239	}
				1240	if ( ( bExp \| bSig ) == 0 ) {
				1241	float_raise( float_flag_invalid );
				1242	return float32_default_nan;
				1243	}
				1244	return packFloat32( zSign, 0xFF, 0 );
				1245	}
				1246	if ( bExp == 0xFF ) {
				1247	if ( bSig ) return propagateFloat32NaN( a, b );
				1248	if ( ( aExp \| aSig ) == 0 ) {
				1249	float_raise( float_flag_invalid );
				1250	return float32_default_nan;
				1251	}
				1252	return packFloat32( zSign, 0xFF, 0 );
				1253	}
				1254	if ( aExp == 0 ) {
				1255	if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
				1256	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1257	}
				1258	if ( bExp == 0 ) {
				1259	if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
				1260	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
				1261	}
				1262	zExp = aExp + bExp - 0x7F;
				1263	aSig = ( aSig \| 0x00800000 )<<7;
				1264	bSig = ( bSig \| 0x00800000 )<<8;
				1265	shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
				1266	zSig = zSig64;
				1267	if ( 0 <= (sbits32) ( zSig<<1 ) ) {
				1268	zSig <<= 1;
				1269	--zExp;
				1270	}
				1271	return roundAndPackFloat32( zSign, zExp, zSig );
				1272
				1273	}
				1274
				1275	/*
				1276	-------------------------------------------------------------------------------
				1277	Returns the result of dividing the single-precision floating-point value `a'
				1278	by the corresponding value `b'. The operation is performed according to the
				1279	IEC/IEEE Standard for Binary Floating-point Arithmetic.
				1280	-------------------------------------------------------------------------------
				1281	*/
				1282	float32 float32_div( float32 a, float32 b )
				1283	{
				1284	flag aSign, bSign, zSign;
				1285	int16 aExp, bExp, zExp;
				1286	bits32 aSig, bSig, zSig;
				1287
				1288	aSig = extractFloat32Frac( a );
				1289	aExp = extractFloat32Exp( a );
				1290	aSign = extractFloat32Sign( a );
				1291	bSig = extractFloat32Frac( b );
				1292	bExp = extractFloat32Exp( b );
				1293	bSign = extractFloat32Sign( b );
				1294	zSign = aSign ^ bSign;
				1295	if ( aExp == 0xFF ) {
				1296	if ( aSig ) return propagateFloat32NaN( a, b );
				1297	if ( bExp == 0xFF ) {
				1298	if ( bSig ) return propagateFloat32NaN( a, b );
				1299	float_raise( float_flag_invalid );
				1300	return float32_default_nan;
				1301	}
				1302	return packFloat32( zSign, 0xFF, 0 );
				1303	}
				1304	if ( bExp == 0xFF ) {
				1305	if ( bSig ) return propagateFloat32NaN( a, b );
				1306	return packFloat32( zSign, 0, 0 );
				1307	}
				1308	if ( bExp == 0 ) {
				1309	if ( bSig == 0 ) {
				1310	if ( ( aExp \| aSig ) == 0 ) {
				1311	float_raise( float_flag_invalid );
				1312	return float32_default_nan;
				1313	}
				1314	float_raise( float_flag_divbyzero );
				1315	return packFloat32( zSign, 0xFF, 0 );
				1316	}
				1317	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
				1318	}
				1319	if ( aExp == 0 ) {
				1320	if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
				1321	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1322	}
				1323	zExp = aExp - bExp + 0x7D;
				1324	aSig = ( aSig \| 0x00800000 )<<7;
				1325	bSig = ( bSig \| 0x00800000 )<<8;
				1326	if ( bSig <= ( aSig + aSig ) ) {
				1327	aSig >>= 1;
				1328	++zExp;
				1329	}
				1330	zSig = ( ( (bits64) aSig )<<32 ) / bSig;
				1331	if ( ( zSig & 0x3F ) == 0 ) {
				1332	zSig \|= ( ( (bits64) bSig ) * zSig != ( (bits64) aSig )<<32 );
				1333	}
				1334	return roundAndPackFloat32( zSign, zExp, zSig );
				1335
				1336	}
				1337
				1338	/*
				1339	-------------------------------------------------------------------------------
				1340	Returns the remainder of the single-precision floating-point value `a'
				1341	with respect to the corresponding value `b'. The operation is performed
				1342	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				1343	-------------------------------------------------------------------------------
				1344	*/
				1345	float32 float32_rem( float32 a, float32 b )
				1346	{
				1347	flag aSign, bSign, zSign;
				1348	int16 aExp, bExp, expDiff;
				1349	bits32 aSig, bSig;
				1350	bits32 q;
				1351	bits64 aSig64, bSig64, q64;
				1352	bits32 alternateASig;
				1353	sbits32 sigMean;
				1354
				1355	aSig = extractFloat32Frac( a );
				1356	aExp = extractFloat32Exp( a );
				1357	aSign = extractFloat32Sign( a );
				1358	bSig = extractFloat32Frac( b );
				1359	bExp = extractFloat32Exp( b );
				1360	bSign = extractFloat32Sign( b );
				1361	if ( aExp == 0xFF ) {
				1362	if ( aSig \|\| ( ( bExp == 0xFF ) && bSig ) ) {
				1363	return propagateFloat32NaN( a, b );
				1364	}
				1365	float_raise( float_flag_invalid );
				1366	return float32_default_nan;
				1367	}
				1368	if ( bExp == 0xFF ) {
				1369	if ( bSig ) return propagateFloat32NaN( a, b );
				1370	return a;
				1371	}
				1372	if ( bExp == 0 ) {
				1373	if ( bSig == 0 ) {
				1374	float_raise( float_flag_invalid );
				1375	return float32_default_nan;
				1376	}
				1377	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
				1378	}
				1379	if ( aExp == 0 ) {
				1380	if ( aSig == 0 ) return a;
				1381	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1382	}
				1383	expDiff = aExp - bExp;
				1384	aSig \|= 0x00800000;
				1385	bSig \|= 0x00800000;
				1386	if ( expDiff < 32 ) {
				1387	aSig <<= 8;
				1388	bSig <<= 8;
				1389	if ( expDiff < 0 ) {
				1390	if ( expDiff < -1 ) return a;
				1391	aSig >>= 1;
				1392	}
				1393	q = ( bSig <= aSig );
				1394	if ( q ) aSig -= bSig;
				1395	if ( 0 < expDiff ) {
				1396	q = ( ( (bits64) aSig )<<32 ) / bSig;
				1397	q >>= 32 - expDiff;
				1398	bSig >>= 2;
				1399	aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
				1400	}
				1401	else {
				1402	aSig >>= 2;
				1403	bSig >>= 2;
				1404	}
				1405	}
				1406	else {
				1407	if ( bSig <= aSig ) aSig -= bSig;
				1408	aSig64 = ( (bits64) aSig )<<40;
				1409	bSig64 = ( (bits64) bSig )<<40;
				1410	expDiff -= 64;
				1411	while ( 0 < expDiff ) {
				1412	q64 = estimateDiv128To64( aSig64, 0, bSig64 );
				1413	q64 = ( 2 < q64 ) ? q64 - 2 : 0;
				1414	aSig64 = - ( ( bSig * q64 )<<38 );
				1415	expDiff -= 62;
				1416	}
				1417	expDiff += 64;
				1418	q64 = estimateDiv128To64( aSig64, 0, bSig64 );
				1419	q64 = ( 2 < q64 ) ? q64 - 2 : 0;
				1420	q = q64>>( 64 - expDiff );
				1421	bSig <<= 6;
				1422	aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
				1423	}
				1424	do {
				1425	alternateASig = aSig;
				1426	++q;
				1427	aSig -= bSig;
				1428	} while ( 0 <= (sbits32) aSig );
				1429	sigMean = aSig + alternateASig;
				1430	if ( ( sigMean < 0 ) \|\| ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
				1431	aSig = alternateASig;
				1432	}
				1433	zSign = ( (sbits32) aSig < 0 );
				1434	if ( zSign ) aSig = - aSig;
				1435	return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
				1436
				1437	}
				1438
				1439	/*
				1440	-------------------------------------------------------------------------------
				1441	Returns the square root of the single-precision floating-point value `a'.
				1442	The operation is performed according to the IEC/IEEE Standard for Binary
				1443	Floating-point Arithmetic.
				1444	-------------------------------------------------------------------------------
				1445	*/
				1446	float32 float32_sqrt( float32 a )
				1447	{
				1448	flag aSign;
				1449	int16 aExp, zExp;
				1450	bits32 aSig, zSig;
				1451	bits64 rem, term;
				1452
				1453	aSig = extractFloat32Frac( a );
				1454	aExp = extractFloat32Exp( a );
				1455	aSign = extractFloat32Sign( a );
				1456	if ( aExp == 0xFF ) {
				1457	if ( aSig ) return propagateFloat32NaN( a, 0 );
				1458	if ( ! aSign ) return a;
				1459	float_raise( float_flag_invalid );
				1460	return float32_default_nan;
				1461	}
				1462	if ( aSign ) {
				1463	if ( ( aExp \| aSig ) == 0 ) return a;
				1464	float_raise( float_flag_invalid );
				1465	return float32_default_nan;
				1466	}
				1467	if ( aExp == 0 ) {
				1468	if ( aSig == 0 ) return 0;
				1469	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1470	}
				1471	zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
				1472	aSig = ( aSig \| 0x00800000 )<<8;
				1473	zSig = estimateSqrt32( aExp, aSig ) + 2;
				1474	if ( ( zSig & 0x7F ) <= 5 ) {
				1475	if ( zSig < 2 ) {
				1476	zSig = 0xFFFFFFFF;
				1477	}
				1478	else {
				1479	aSig >>= aExp & 1;
				1480	term = ( (bits64) zSig ) * zSig;
				1481	rem = ( ( (bits64) aSig )<<32 ) - term;
				1482	while ( (sbits64) rem < 0 ) {
				1483	--zSig;
				1484	rem += ( ( (bits64) zSig )<<1 ) \| 1;
				1485	}
				1486	zSig \|= ( rem != 0 );
				1487	}
				1488	}
				1489	shift32RightJamming( zSig, 1, &zSig );
				1490	return roundAndPackFloat32( 0, zExp, zSig );
				1491
				1492	}
				1493
				1494	/*
				1495	-------------------------------------------------------------------------------
				1496	Returns 1 if the single-precision floating-point value `a' is equal to the
				1497	corresponding value `b', and 0 otherwise. The comparison is performed
				1498	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				1499	-------------------------------------------------------------------------------
				1500	*/
				1501	flag float32_eq( float32 a, float32 b )
				1502	{
				1503
				1504	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				1505	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				1506	) {
				1507	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
				1508	float_raise( float_flag_invalid );
				1509	}
				1510	return 0;
				1511	}
				1512	return ( a == b ) \|\| ( (bits32) ( ( a \| b )<<1 ) == 0 );
				1513
				1514	}
				1515
				1516	/*
				1517	-------------------------------------------------------------------------------
				1518	Returns 1 if the single-precision floating-point value `a' is less than or
				1519	equal to the corresponding value `b', and 0 otherwise. The comparison is
				1520	performed according to the IEC/IEEE Standard for Binary Floating-point
				1521	Arithmetic.
				1522	-------------------------------------------------------------------------------
				1523	*/
				1524	flag float32_le( float32 a, float32 b )
				1525	{
				1526	flag aSign, bSign;
				1527
				1528	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				1529	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				1530	) {
				1531	float_raise( float_flag_invalid );
				1532	return 0;
				1533	}
				1534	aSign = extractFloat32Sign( a );
				1535	bSign = extractFloat32Sign( b );
				1536	if ( aSign != bSign ) return aSign \|\| ( (bits32) ( ( a \| b )<<1 ) == 0 );
				1537	return ( a == b ) \|\| ( aSign ^ ( a < b ) );
				1538
				1539	}
				1540
				1541	/*
				1542	-------------------------------------------------------------------------------
				1543	Returns 1 if the single-precision floating-point value `a' is less than
				1544	the corresponding value `b', and 0 otherwise. The comparison is performed
				1545	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				1546	-------------------------------------------------------------------------------
				1547	*/
				1548	flag float32_lt( float32 a, float32 b )
				1549	{
				1550	flag aSign, bSign;
				1551
				1552	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				1553	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				1554	) {
				1555	float_raise( float_flag_invalid );
				1556	return 0;
				1557	}
				1558	aSign = extractFloat32Sign( a );
				1559	bSign = extractFloat32Sign( b );
				1560	if ( aSign != bSign ) return aSign && ( (bits32) ( ( a \| b )<<1 ) != 0 );
				1561	return ( a != b ) && ( aSign ^ ( a < b ) );
				1562
				1563	}
				1564
				1565	/*
				1566	-------------------------------------------------------------------------------
				1567	Returns 1 if the single-precision floating-point value `a' is equal to the
				1568	corresponding value `b', and 0 otherwise. The invalid exception is raised
				1569	if either operand is a NaN. Otherwise, the comparison is performed
				1570	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				1571	-------------------------------------------------------------------------------
				1572	*/
				1573	flag float32_eq_signaling( float32 a, float32 b )
				1574	{
				1575
				1576	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				1577	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				1578	) {
				1579	float_raise( float_flag_invalid );
				1580	return 0;
				1581	}
				1582	return ( a == b ) \|\| ( (bits32) ( ( a \| b )<<1 ) == 0 );
				1583
				1584	}
				1585
				1586	/*
				1587	-------------------------------------------------------------------------------
				1588	Returns 1 if the single-precision floating-point value `a' is less than or
				1589	equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
				1590	cause an exception. Otherwise, the comparison is performed according to the
				1591	IEC/IEEE Standard for Binary Floating-point Arithmetic.
				1592	-------------------------------------------------------------------------------
				1593	*/
				1594	flag float32_le_quiet( float32 a, float32 b )
				1595	{
				1596	flag aSign, bSign;
				1597	//int16 aExp, bExp;
				1598
				1599	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				1600	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				1601	) {
				1602	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
				1603	float_raise( float_flag_invalid );
				1604	}
				1605	return 0;
				1606	}
				1607	aSign = extractFloat32Sign( a );
				1608	bSign = extractFloat32Sign( b );
				1609	if ( aSign != bSign ) return aSign \|\| ( (bits32) ( ( a \| b )<<1 ) == 0 );
				1610	return ( a == b ) \|\| ( aSign ^ ( a < b ) );
				1611
				1612	}
				1613
				1614	/*
				1615	-------------------------------------------------------------------------------
				1616	Returns 1 if the single-precision floating-point value `a' is less than
				1617	the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				1618	exception. Otherwise, the comparison is performed according to the IEC/IEEE
				1619	Standard for Binary Floating-point Arithmetic.
				1620	-------------------------------------------------------------------------------
				1621	*/
				1622	flag float32_lt_quiet( float32 a, float32 b )
				1623	{
				1624	flag aSign, bSign;
				1625
				1626	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				1627	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				1628	) {
				1629	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
				1630	float_raise( float_flag_invalid );
				1631	}
				1632	return 0;
				1633	}
				1634	aSign = extractFloat32Sign( a );
				1635	bSign = extractFloat32Sign( b );
				1636	if ( aSign != bSign ) return aSign && ( (bits32) ( ( a \| b )<<1 ) != 0 );
				1637	return ( a != b ) && ( aSign ^ ( a < b ) );
				1638
				1639	}
				1640
				1641	/*
				1642	-------------------------------------------------------------------------------
				1643	Returns the result of converting the double-precision floating-point value
				1644	`a' to the 32-bit two's complement integer format. The conversion is
				1645	performed according to the IEC/IEEE Standard for Binary Floating-point
				1646	Arithmetic---which means in particular that the conversion is rounded
				1647	according to the current rounding mode. If `a' is a NaN, the largest
				1648	positive integer is returned. Otherwise, if the conversion overflows, the
				1649	largest integer with the same sign as `a' is returned.
				1650	-------------------------------------------------------------------------------
				1651	*/
				1652	int32 float64_to_int32( float64 a )
				1653	{
				1654	flag aSign;
				1655	int16 aExp, shiftCount;
				1656	bits64 aSig;
				1657
				1658	aSig = extractFloat64Frac( a );
				1659	aExp = extractFloat64Exp( a );
				1660	aSign = extractFloat64Sign( a );
				1661	if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
				1662	if ( aExp ) aSig \|= LIT64( 0x0010000000000000 );
				1663	shiftCount = 0x42C - aExp;
				1664	if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
				1665	return roundAndPackInt32( aSign, aSig );
				1666
				1667	}
				1668
				1669	/*
				1670	-------------------------------------------------------------------------------
				1671	Returns the result of converting the double-precision floating-point value
				1672	`a' to the 32-bit two's complement integer format. The conversion is
				1673	performed according to the IEC/IEEE Standard for Binary Floating-point
				1674	Arithmetic, except that the conversion is always rounded toward zero. If
				1675	`a' is a NaN, the largest positive integer is returned. Otherwise, if the
				1676	conversion overflows, the largest integer with the same sign as `a' is
				1677	returned.
				1678	-------------------------------------------------------------------------------
				1679	*/
				1680	int32 float64_to_int32_round_to_zero( float64 a )
				1681	{
				1682	flag aSign;
				1683	int16 aExp, shiftCount;
				1684	bits64 aSig, savedASig;
				1685	int32 z;
				1686
				1687	aSig = extractFloat64Frac( a );
				1688	aExp = extractFloat64Exp( a );
				1689	aSign = extractFloat64Sign( a );
				1690	shiftCount = 0x433 - aExp;
				1691	if ( shiftCount < 21 ) {
				1692	if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
				1693	goto invalid;
				1694	}
				1695	else if ( 52 < shiftCount ) {
				1696	if ( aExp \|\| aSig ) float_exception_flags \|= float_flag_inexact;
				1697	return 0;
				1698	}
				1699	aSig \|= LIT64( 0x0010000000000000 );
				1700	savedASig = aSig;
				1701	aSig >>= shiftCount;
				1702	z = aSig;
				1703	if ( aSign ) z = - z;
				1704	if ( ( z < 0 ) ^ aSign ) {
				1705	invalid:
				1706	float_exception_flags \|= float_flag_invalid;
				1707	return aSign ? 0x80000000 : 0x7FFFFFFF;
				1708	}
				1709	if ( ( aSig<<shiftCount ) != savedASig ) {
				1710	float_exception_flags \|= float_flag_inexact;
				1711	}
				1712	return z;
				1713
				1714	}
				1715
				1716	/*
				1717	-------------------------------------------------------------------------------
				1718	Returns the result of converting the double-precision floating-point value
				1719	`a' to the 32-bit two's complement unsigned integer format. The conversion
				1720	is performed according to the IEC/IEEE Standard for Binary Floating-point
				1721	Arithmetic---which means in particular that the conversion is rounded
				1722	according to the current rounding mode. If `a' is a NaN, the largest
				1723	positive integer is returned. Otherwise, if the conversion overflows, the
				1724	largest positive integer is returned.
				1725	-------------------------------------------------------------------------------
				1726	*/
				1727	int32 float64_to_uint32( float64 a )
				1728	{
				1729	flag aSign;
				1730	int16 aExp, shiftCount;
				1731	bits64 aSig;
				1732
				1733	aSig = extractFloat64Frac( a );
				1734	aExp = extractFloat64Exp( a );
				1735	aSign = 0; //extractFloat64Sign( a );
				1736	//if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
				1737	if ( aExp ) aSig \|= LIT64( 0x0010000000000000 );
				1738	shiftCount = 0x42C - aExp;
				1739	if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
				1740	return roundAndPackInt32( aSign, aSig );
				1741	}
				1742
				1743	/*
				1744	-------------------------------------------------------------------------------
				1745	Returns the result of converting the double-precision floating-point value
				1746	`a' to the 32-bit two's complement integer format. The conversion is
				1747	performed according to the IEC/IEEE Standard for Binary Floating-point
				1748	Arithmetic, except that the conversion is always rounded toward zero. If
				1749	`a' is a NaN, the largest positive integer is returned. Otherwise, if the
				1750	conversion overflows, the largest positive integer is returned.
				1751	-------------------------------------------------------------------------------
				1752	*/
				1753	int32 float64_to_uint32_round_to_zero( float64 a )
				1754	{
				1755	flag aSign;
				1756	int16 aExp, shiftCount;
				1757	bits64 aSig, savedASig;
				1758	int32 z;
				1759
				1760	aSig = extractFloat64Frac( a );
				1761	aExp = extractFloat64Exp( a );
				1762	aSign = extractFloat64Sign( a );
				1763	shiftCount = 0x433 - aExp;
				1764	if ( shiftCount < 21 ) {
				1765	if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
				1766	goto invalid;
				1767	}
				1768	else if ( 52 < shiftCount ) {
				1769	if ( aExp \|\| aSig ) float_exception_flags \|= float_flag_inexact;
				1770	return 0;
				1771	}
				1772	aSig \|= LIT64( 0x0010000000000000 );
				1773	savedASig = aSig;
				1774	aSig >>= shiftCount;
				1775	z = aSig;
				1776	if ( aSign ) z = - z;
				1777	if ( ( z < 0 ) ^ aSign ) {
				1778	invalid:
				1779	float_exception_flags \|= float_flag_invalid;
				1780	return aSign ? 0x80000000 : 0x7FFFFFFF;
				1781	}
				1782	if ( ( aSig<<shiftCount ) != savedASig ) {
				1783	float_exception_flags \|= float_flag_inexact;
				1784	}
				1785	return z;
				1786	}
				1787
				1788	/*
				1789	-------------------------------------------------------------------------------
				1790	Returns the result of converting the double-precision floating-point value
				1791	`a' to the single-precision floating-point format. The conversion is
				1792	performed according to the IEC/IEEE Standard for Binary Floating-point
				1793	Arithmetic.
				1794	-------------------------------------------------------------------------------
				1795	*/
				1796	float32 float64_to_float32( float64 a )
				1797	{
				1798	flag aSign;
				1799	int16 aExp;
				1800	bits64 aSig;
				1801	bits32 zSig;
				1802
				1803	aSig = extractFloat64Frac( a );
				1804	aExp = extractFloat64Exp( a );
				1805	aSign = extractFloat64Sign( a );
				1806	if ( aExp == 0x7FF ) {
				1807	if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
				1808	return packFloat32( aSign, 0xFF, 0 );
				1809	}
				1810	shift64RightJamming( aSig, 22, &aSig );
				1811	zSig = aSig;
				1812	if ( aExp \|\| zSig ) {
				1813	zSig \|= 0x40000000;
				1814	aExp -= 0x381;
				1815	}
				1816	return roundAndPackFloat32( aSign, aExp, zSig );
				1817
				1818	}
				1819
				1820	#ifdef FLOATX80
				1821
				1822	/*
				1823	-------------------------------------------------------------------------------
				1824	Returns the result of converting the double-precision floating-point value
				1825	`a' to the extended double-precision floating-point format. The conversion
				1826	is performed according to the IEC/IEEE Standard for Binary Floating-point
				1827	Arithmetic.
				1828	-------------------------------------------------------------------------------
				1829	*/
				1830	floatx80 float64_to_floatx80( float64 a )
				1831	{
				1832	flag aSign;
				1833	int16 aExp;
				1834	bits64 aSig;
				1835
				1836	aSig = extractFloat64Frac( a );
				1837	aExp = extractFloat64Exp( a );
				1838	aSign = extractFloat64Sign( a );
				1839	if ( aExp == 0x7FF ) {
				1840	if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
				1841	return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				1842	}
				1843	if ( aExp == 0 ) {
				1844	if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
				1845	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				1846	}
				1847	return
				1848	packFloatx80(
				1849	aSign, aExp + 0x3C00, ( aSig \| LIT64( 0x0010000000000000 ) )<<11 );
				1850
				1851	}
				1852
				1853	#endif
				1854
				1855	/*
				1856	-------------------------------------------------------------------------------
				1857	Rounds the double-precision floating-point value `a' to an integer, and
				1858	returns the result as a double-precision floating-point value. The
				1859	operation is performed according to the IEC/IEEE Standard for Binary
				1860	Floating-point Arithmetic.
				1861	-------------------------------------------------------------------------------
				1862	*/
				1863	float64 float64_round_to_int( float64 a )
				1864	{
				1865	flag aSign;
				1866	int16 aExp;
				1867	bits64 lastBitMask, roundBitsMask;
				1868	int8 roundingMode;
				1869	float64 z;
				1870
				1871	aExp = extractFloat64Exp( a );
				1872	if ( 0x433 <= aExp ) {
				1873	if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
				1874	return propagateFloat64NaN( a, a );
				1875	}
				1876	return a;
				1877	}
				1878	if ( aExp <= 0x3FE ) {
				1879	if ( (bits64) ( a<<1 ) == 0 ) return a;
				1880	float_exception_flags \|= float_flag_inexact;
				1881	aSign = extractFloat64Sign( a );
				1882	switch ( float_rounding_mode ) {
				1883	case float_round_nearest_even:
				1884	if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
				1885	return packFloat64( aSign, 0x3FF, 0 );
				1886	}
				1887	break;
				1888	case float_round_down:
				1889	return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
				1890	case float_round_up:
				1891	return
				1892	aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
				1893	}
				1894	return packFloat64( aSign, 0, 0 );
				1895	}
				1896	lastBitMask = 1;
				1897	lastBitMask <<= 0x433 - aExp;
				1898	roundBitsMask = lastBitMask - 1;
				1899	z = a;
				1900	roundingMode = float_rounding_mode;
				1901	if ( roundingMode == float_round_nearest_even ) {
				1902	z += lastBitMask>>1;
				1903	if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
				1904	}
				1905	else if ( roundingMode != float_round_to_zero ) {
				1906	if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
				1907	z += roundBitsMask;
				1908	}
				1909	}
				1910	z &= ~ roundBitsMask;
				1911	if ( z != a ) float_exception_flags \|= float_flag_inexact;
				1912	return z;
				1913
				1914	}
				1915
				1916	/*
				1917	-------------------------------------------------------------------------------
				1918	Returns the result of adding the absolute values of the double-precision
				1919	floating-point values `a' and `b'. If `zSign' is true, the sum is negated
				1920	before being returned. `zSign' is ignored if the result is a NaN. The
				1921	addition is performed according to the IEC/IEEE Standard for Binary
				1922	Floating-point Arithmetic.
				1923	-------------------------------------------------------------------------------
				1924	*/
				1925	static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
				1926	{
				1927	int16 aExp, bExp, zExp;
				1928	bits64 aSig, bSig, zSig;
				1929	int16 expDiff;
				1930
				1931	aSig = extractFloat64Frac( a );
				1932	aExp = extractFloat64Exp( a );
				1933	bSig = extractFloat64Frac( b );
				1934	bExp = extractFloat64Exp( b );
				1935	expDiff = aExp - bExp;
				1936	aSig <<= 9;
				1937	bSig <<= 9;
				1938	if ( 0 < expDiff ) {
				1939	if ( aExp == 0x7FF ) {
				1940	if ( aSig ) return propagateFloat64NaN( a, b );
				1941	return a;
				1942	}
				1943	if ( bExp == 0 ) {
				1944	--expDiff;
				1945	}
				1946	else {
				1947	bSig \|= LIT64( 0x2000000000000000 );
				1948	}
				1949	shift64RightJamming( bSig, expDiff, &bSig );
				1950	zExp = aExp;
				1951	}
				1952	else if ( expDiff < 0 ) {
				1953	if ( bExp == 0x7FF ) {
				1954	if ( bSig ) return propagateFloat64NaN( a, b );
				1955	return packFloat64( zSign, 0x7FF, 0 );
				1956	}
				1957	if ( aExp == 0 ) {
				1958	++expDiff;
				1959	}
				1960	else {
				1961	aSig \|= LIT64( 0x2000000000000000 );
				1962	}
				1963	shift64RightJamming( aSig, - expDiff, &aSig );
				1964	zExp = bExp;
				1965	}
				1966	else {
				1967	if ( aExp == 0x7FF ) {
				1968	if ( aSig \| bSig ) return propagateFloat64NaN( a, b );
				1969	return a;
				1970	}
				1971	if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
				1972	zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
				1973	zExp = aExp;
				1974	goto roundAndPack;
				1975	}
				1976	aSig \|= LIT64( 0x2000000000000000 );
				1977	zSig = ( aSig + bSig )<<1;
				1978	--zExp;
				1979	if ( (sbits64) zSig < 0 ) {
				1980	zSig = aSig + bSig;
				1981	++zExp;
				1982	}
				1983	roundAndPack:
				1984	return roundAndPackFloat64( zSign, zExp, zSig );
				1985
				1986	}
				1987
				1988	/*
				1989	-------------------------------------------------------------------------------
				1990	Returns the result of subtracting the absolute values of the double-
				1991	precision floating-point values `a' and `b'. If `zSign' is true, the
				1992	difference is negated before being returned. `zSign' is ignored if the
				1993	result is a NaN. The subtraction is performed according to the IEC/IEEE
				1994	Standard for Binary Floating-point Arithmetic.
				1995	-------------------------------------------------------------------------------
				1996	*/
				1997	static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
				1998	{
				1999	int16 aExp, bExp, zExp;
				2000	bits64 aSig, bSig, zSig;
				2001	int16 expDiff;
				2002
				2003	aSig = extractFloat64Frac( a );
				2004	aExp = extractFloat64Exp( a );
				2005	bSig = extractFloat64Frac( b );
				2006	bExp = extractFloat64Exp( b );
				2007	expDiff = aExp - bExp;
				2008	aSig <<= 10;
				2009	bSig <<= 10;
				2010	if ( 0 < expDiff ) goto aExpBigger;
				2011	if ( expDiff < 0 ) goto bExpBigger;
				2012	if ( aExp == 0x7FF ) {
				2013	if ( aSig \| bSig ) return propagateFloat64NaN( a, b );
				2014	float_raise( float_flag_invalid );
				2015	return float64_default_nan;
				2016	}
				2017	if ( aExp == 0 ) {
				2018	aExp = 1;
				2019	bExp = 1;
				2020	}
				2021	if ( bSig < aSig ) goto aBigger;
				2022	if ( aSig < bSig ) goto bBigger;
				2023	return packFloat64( float_rounding_mode == float_round_down, 0, 0 );
				2024	bExpBigger:
				2025	if ( bExp == 0x7FF ) {
				2026	if ( bSig ) return propagateFloat64NaN( a, b );
				2027	return packFloat64( zSign ^ 1, 0x7FF, 0 );
				2028	}
				2029	if ( aExp == 0 ) {
				2030	++expDiff;
				2031	}
				2032	else {
				2033	aSig \|= LIT64( 0x4000000000000000 );
				2034	}
				2035	shift64RightJamming( aSig, - expDiff, &aSig );
				2036	bSig \|= LIT64( 0x4000000000000000 );
				2037	bBigger:
				2038	zSig = bSig - aSig;
				2039	zExp = bExp;
				2040	zSign ^= 1;
				2041	goto normalizeRoundAndPack;
				2042	aExpBigger:
				2043	if ( aExp == 0x7FF ) {
				2044	if ( aSig ) return propagateFloat64NaN( a, b );
				2045	return a;
				2046	}
				2047	if ( bExp == 0 ) {
				2048	--expDiff;
				2049	}
				2050	else {
				2051	bSig \|= LIT64( 0x4000000000000000 );
				2052	}
				2053	shift64RightJamming( bSig, expDiff, &bSig );
				2054	aSig \|= LIT64( 0x4000000000000000 );
				2055	aBigger:
				2056	zSig = aSig - bSig;
				2057	zExp = aExp;
				2058	normalizeRoundAndPack:
				2059	--zExp;
				2060	return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
				2061
				2062	}
				2063
				2064	/*
				2065	-------------------------------------------------------------------------------
				2066	Returns the result of adding the double-precision floating-point values `a'
				2067	and `b'. The operation is performed according to the IEC/IEEE Standard for
				2068	Binary Floating-point Arithmetic.
				2069	-------------------------------------------------------------------------------
				2070	*/
				2071	float64 float64_add( float64 a, float64 b )
				2072	{
				2073	flag aSign, bSign;
				2074
				2075	aSign = extractFloat64Sign( a );
				2076	bSign = extractFloat64Sign( b );
				2077	if ( aSign == bSign ) {
				2078	return addFloat64Sigs( a, b, aSign );
				2079	}
				2080	else {
				2081	return subFloat64Sigs( a, b, aSign );
				2082	}
				2083
				2084	}
				2085
				2086	/*
				2087	-------------------------------------------------------------------------------
				2088	Returns the result of subtracting the double-precision floating-point values
				2089	`a' and `b'. The operation is performed according to the IEC/IEEE Standard
				2090	for Binary Floating-point Arithmetic.
				2091	-------------------------------------------------------------------------------
				2092	*/
				2093	float64 float64_sub( float64 a, float64 b )
				2094	{
				2095	flag aSign, bSign;
				2096
				2097	aSign = extractFloat64Sign( a );
				2098	bSign = extractFloat64Sign( b );
				2099	if ( aSign == bSign ) {
				2100	return subFloat64Sigs( a, b, aSign );
				2101	}
				2102	else {
				2103	return addFloat64Sigs( a, b, aSign );
				2104	}
				2105
				2106	}
				2107
				2108	/*
				2109	-------------------------------------------------------------------------------
				2110	Returns the result of multiplying the double-precision floating-point values
				2111	`a' and `b'. The operation is performed according to the IEC/IEEE Standard
				2112	for Binary Floating-point Arithmetic.
				2113	-------------------------------------------------------------------------------
				2114	*/
				2115	float64 float64_mul( float64 a, float64 b )
				2116	{
				2117	flag aSign, bSign, zSign;
				2118	int16 aExp, bExp, zExp;
				2119	bits64 aSig, bSig, zSig0, zSig1;
				2120
				2121	aSig = extractFloat64Frac( a );
				2122	aExp = extractFloat64Exp( a );
				2123	aSign = extractFloat64Sign( a );
				2124	bSig = extractFloat64Frac( b );
				2125	bExp = extractFloat64Exp( b );
				2126	bSign = extractFloat64Sign( b );
				2127	zSign = aSign ^ bSign;
				2128	if ( aExp == 0x7FF ) {
				2129	if ( aSig \|\| ( ( bExp == 0x7FF ) && bSig ) ) {
				2130	return propagateFloat64NaN( a, b );
				2131	}
				2132	if ( ( bExp \| bSig ) == 0 ) {
				2133	float_raise( float_flag_invalid );
				2134	return float64_default_nan;
				2135	}
				2136	return packFloat64( zSign, 0x7FF, 0 );
				2137	}
				2138	if ( bExp == 0x7FF ) {
				2139	if ( bSig ) return propagateFloat64NaN( a, b );
				2140	if ( ( aExp \| aSig ) == 0 ) {
				2141	float_raise( float_flag_invalid );
				2142	return float64_default_nan;
				2143	}
				2144	return packFloat64( zSign, 0x7FF, 0 );
				2145	}
				2146	if ( aExp == 0 ) {
				2147	if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
				2148	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				2149	}
				2150	if ( bExp == 0 ) {
				2151	if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
				2152	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
				2153	}
				2154	zExp = aExp + bExp - 0x3FF;
				2155	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<10;
				2156	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
				2157	mul64To128( aSig, bSig, &zSig0, &zSig1 );
				2158	zSig0 \|= ( zSig1 != 0 );
				2159	if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
				2160	zSig0 <<= 1;
				2161	--zExp;
				2162	}
				2163	return roundAndPackFloat64( zSign, zExp, zSig0 );
				2164
				2165	}
				2166
				2167	/*
				2168	-------------------------------------------------------------------------------
				2169	Returns the result of dividing the double-precision floating-point value `a'
				2170	by the corresponding value `b'. The operation is performed according to
				2171	the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				2172	-------------------------------------------------------------------------------
				2173	*/
				2174	float64 float64_div( float64 a, float64 b )
				2175	{
				2176	flag aSign, bSign, zSign;
				2177	int16 aExp, bExp, zExp;
				2178	bits64 aSig, bSig, zSig;
				2179	bits64 rem0, rem1;
				2180	bits64 term0, term1;
				2181
				2182	aSig = extractFloat64Frac( a );
				2183	aExp = extractFloat64Exp( a );
				2184	aSign = extractFloat64Sign( a );
				2185	bSig = extractFloat64Frac( b );
				2186	bExp = extractFloat64Exp( b );
				2187	bSign = extractFloat64Sign( b );
				2188	zSign = aSign ^ bSign;
				2189	if ( aExp == 0x7FF ) {
				2190	if ( aSig ) return propagateFloat64NaN( a, b );
				2191	if ( bExp == 0x7FF ) {
				2192	if ( bSig ) return propagateFloat64NaN( a, b );
				2193	float_raise( float_flag_invalid );
				2194	return float64_default_nan;
				2195	}
				2196	return packFloat64( zSign, 0x7FF, 0 );
				2197	}
				2198	if ( bExp == 0x7FF ) {
				2199	if ( bSig ) return propagateFloat64NaN( a, b );
				2200	return packFloat64( zSign, 0, 0 );
				2201	}
				2202	if ( bExp == 0 ) {
				2203	if ( bSig == 0 ) {
				2204	if ( ( aExp \| aSig ) == 0 ) {
				2205	float_raise( float_flag_invalid );
				2206	return float64_default_nan;
				2207	}
				2208	float_raise( float_flag_divbyzero );
				2209	return packFloat64( zSign, 0x7FF, 0 );
				2210	}
				2211	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
				2212	}
				2213	if ( aExp == 0 ) {
				2214	if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
				2215	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				2216	}
				2217	zExp = aExp - bExp + 0x3FD;
				2218	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<10;
				2219	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
				2220	if ( bSig <= ( aSig + aSig ) ) {
				2221	aSig >>= 1;
				2222	++zExp;
				2223	}
				2224	zSig = estimateDiv128To64( aSig, 0, bSig );
				2225	if ( ( zSig & 0x1FF ) <= 2 ) {
				2226	mul64To128( bSig, zSig, &term0, &term1 );
				2227	sub128( aSig, 0, term0, term1, &rem0, &rem1 );
				2228	while ( (sbits64) rem0 < 0 ) {
				2229	--zSig;
				2230	add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
				2231	}
				2232	zSig \|= ( rem1 != 0 );
				2233	}
				2234	return roundAndPackFloat64( zSign, zExp, zSig );
				2235
				2236	}
				2237
				2238	/*
				2239	-------------------------------------------------------------------------------
				2240	Returns the remainder of the double-precision floating-point value `a'
				2241	with respect to the corresponding value `b'. The operation is performed
				2242	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				2243	-------------------------------------------------------------------------------
				2244	*/
				2245	float64 float64_rem( float64 a, float64 b )
				2246	{
				2247	flag aSign, bSign, zSign;
				2248	int16 aExp, bExp, expDiff;
				2249	bits64 aSig, bSig;
				2250	bits64 q, alternateASig;
				2251	sbits64 sigMean;
				2252
				2253	aSig = extractFloat64Frac( a );
				2254	aExp = extractFloat64Exp( a );
				2255	aSign = extractFloat64Sign( a );
				2256	bSig = extractFloat64Frac( b );
				2257	bExp = extractFloat64Exp( b );
				2258	bSign = extractFloat64Sign( b );
				2259	if ( aExp == 0x7FF ) {
				2260	if ( aSig \|\| ( ( bExp == 0x7FF ) && bSig ) ) {
				2261	return propagateFloat64NaN( a, b );
				2262	}
				2263	float_raise( float_flag_invalid );
				2264	return float64_default_nan;
				2265	}
				2266	if ( bExp == 0x7FF ) {
				2267	if ( bSig ) return propagateFloat64NaN( a, b );
				2268	return a;
				2269	}
				2270	if ( bExp == 0 ) {
				2271	if ( bSig == 0 ) {
				2272	float_raise( float_flag_invalid );
				2273	return float64_default_nan;
				2274	}
				2275	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
				2276	}
				2277	if ( aExp == 0 ) {
				2278	if ( aSig == 0 ) return a;
				2279	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				2280	}
				2281	expDiff = aExp - bExp;
				2282	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<11;
				2283	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
				2284	if ( expDiff < 0 ) {
				2285	if ( expDiff < -1 ) return a;
				2286	aSig >>= 1;
				2287	}
				2288	q = ( bSig <= aSig );
				2289	if ( q ) aSig -= bSig;
				2290	expDiff -= 64;
				2291	while ( 0 < expDiff ) {
				2292	q = estimateDiv128To64( aSig, 0, bSig );
				2293	q = ( 2 < q ) ? q - 2 : 0;
				2294	aSig = - ( ( bSig>>2 ) * q );
				2295	expDiff -= 62;
				2296	}
				2297	expDiff += 64;
				2298	if ( 0 < expDiff ) {
				2299	q = estimateDiv128To64( aSig, 0, bSig );
				2300	q = ( 2 < q ) ? q - 2 : 0;
				2301	q >>= 64 - expDiff;
				2302	bSig >>= 2;
				2303	aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
				2304	}
				2305	else {
				2306	aSig >>= 2;
				2307	bSig >>= 2;
				2308	}
				2309	do {
				2310	alternateASig = aSig;
				2311	++q;
				2312	aSig -= bSig;
				2313	} while ( 0 <= (sbits64) aSig );
				2314	sigMean = aSig + alternateASig;
				2315	if ( ( sigMean < 0 ) \|\| ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
				2316	aSig = alternateASig;
				2317	}
				2318	zSign = ( (sbits64) aSig < 0 );
				2319	if ( zSign ) aSig = - aSig;
				2320	return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
				2321
				2322	}
				2323
				2324	/*
				2325	-------------------------------------------------------------------------------
				2326	Returns the square root of the double-precision floating-point value `a'.
				2327	The operation is performed according to the IEC/IEEE Standard for Binary
				2328	Floating-point Arithmetic.
				2329	-------------------------------------------------------------------------------
				2330	*/
				2331	float64 float64_sqrt( float64 a )
				2332	{
				2333	flag aSign;
				2334	int16 aExp, zExp;
				2335	bits64 aSig, zSig;
				2336	bits64 rem0, rem1, term0, term1; //, shiftedRem;
				2337	//float64 z;
				2338
				2339	aSig = extractFloat64Frac( a );
				2340	aExp = extractFloat64Exp( a );
				2341	aSign = extractFloat64Sign( a );
				2342	if ( aExp == 0x7FF ) {
				2343	if ( aSig ) return propagateFloat64NaN( a, a );
				2344	if ( ! aSign ) return a;
				2345	float_raise( float_flag_invalid );
				2346	return float64_default_nan;
				2347	}
				2348	if ( aSign ) {
				2349	if ( ( aExp \| aSig ) == 0 ) return a;
				2350	float_raise( float_flag_invalid );
				2351	return float64_default_nan;
				2352	}
				2353	if ( aExp == 0 ) {
				2354	if ( aSig == 0 ) return 0;
				2355	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				2356	}
				2357	zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
				2358	aSig \|= LIT64( 0x0010000000000000 );
				2359	zSig = estimateSqrt32( aExp, aSig>>21 );
				2360	zSig <<= 31;
				2361	aSig <<= 9 - ( aExp & 1 );
				2362	zSig = estimateDiv128To64( aSig, 0, zSig ) + zSig + 2;
				2363	if ( ( zSig & 0x3FF ) <= 5 ) {
				2364	if ( zSig < 2 ) {
				2365	zSig = LIT64( 0xFFFFFFFFFFFFFFFF );
				2366	}
				2367	else {
				2368	aSig <<= 2;
				2369	mul64To128( zSig, zSig, &term0, &term1 );
				2370	sub128( aSig, 0, term0, term1, &rem0, &rem1 );
				2371	while ( (sbits64) rem0 < 0 ) {
				2372	--zSig;
				2373	shortShift128Left( 0, zSig, 1, &term0, &term1 );
				2374	term1 \|= 1;
				2375	add128( rem0, rem1, term0, term1, &rem0, &rem1 );
				2376	}
				2377	zSig \|= ( ( rem0 \| rem1 ) != 0 );
				2378	}
				2379	}
				2380	shift64RightJamming( zSig, 1, &zSig );
				2381	return roundAndPackFloat64( 0, zExp, zSig );
				2382
				2383	}
				2384
				2385	/*
				2386	-------------------------------------------------------------------------------
				2387	Returns 1 if the double-precision floating-point value `a' is equal to the
				2388	corresponding value `b', and 0 otherwise. The comparison is performed
				2389	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				2390	-------------------------------------------------------------------------------
				2391	*/
				2392	flag float64_eq( float64 a, float64 b )
				2393	{
				2394
				2395	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				2396	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				2397	) {
				2398	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
				2399	float_raise( float_flag_invalid );
				2400	}
				2401	return 0;
				2402	}
				2403	return ( a == b ) \|\| ( (bits64) ( ( a \| b )<<1 ) == 0 );
				2404
				2405	}
				2406
				2407	/*
				2408	-------------------------------------------------------------------------------
				2409	Returns 1 if the double-precision floating-point value `a' is less than or
				2410	equal to the corresponding value `b', and 0 otherwise. The comparison is
				2411	performed according to the IEC/IEEE Standard for Binary Floating-point
				2412	Arithmetic.
				2413	-------------------------------------------------------------------------------
				2414	*/
				2415	flag float64_le( float64 a, float64 b )
				2416	{
				2417	flag aSign, bSign;
				2418
				2419	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				2420	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				2421	) {
				2422	float_raise( float_flag_invalid );
				2423	return 0;
				2424	}
				2425	aSign = extractFloat64Sign( a );
				2426	bSign = extractFloat64Sign( b );
				2427	if ( aSign != bSign ) return aSign \|\| ( (bits64) ( ( a \| b )<<1 ) == 0 );
				2428	return ( a == b ) \|\| ( aSign ^ ( a < b ) );
				2429
				2430	}
				2431
				2432	/*
				2433	-------------------------------------------------------------------------------
				2434	Returns 1 if the double-precision floating-point value `a' is less than
				2435	the corresponding value `b', and 0 otherwise. The comparison is performed
				2436	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				2437	-------------------------------------------------------------------------------
				2438	*/
				2439	flag float64_lt( float64 a, float64 b )
				2440	{
				2441	flag aSign, bSign;
				2442
				2443	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				2444	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				2445	) {
				2446	float_raise( float_flag_invalid );
				2447	return 0;
				2448	}
				2449	aSign = extractFloat64Sign( a );
				2450	bSign = extractFloat64Sign( b );
				2451	if ( aSign != bSign ) return aSign && ( (bits64) ( ( a \| b )<<1 ) != 0 );
				2452	return ( a != b ) && ( aSign ^ ( a < b ) );
				2453
				2454	}
				2455
				2456	/*
				2457	-------------------------------------------------------------------------------
				2458	Returns 1 if the double-precision floating-point value `a' is equal to the
				2459	corresponding value `b', and 0 otherwise. The invalid exception is raised
				2460	if either operand is a NaN. Otherwise, the comparison is performed
				2461	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				2462	-------------------------------------------------------------------------------
				2463	*/
				2464	flag float64_eq_signaling( float64 a, float64 b )
				2465	{
				2466
				2467	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				2468	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				2469	) {
				2470	float_raise( float_flag_invalid );
				2471	return 0;
				2472	}
				2473	return ( a == b ) \|\| ( (bits64) ( ( a \| b )<<1 ) == 0 );
				2474
				2475	}
				2476
				2477	/*
				2478	-------------------------------------------------------------------------------
				2479	Returns 1 if the double-precision floating-point value `a' is less than or
				2480	equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
				2481	cause an exception. Otherwise, the comparison is performed according to the
				2482	IEC/IEEE Standard for Binary Floating-point Arithmetic.
				2483	-------------------------------------------------------------------------------
				2484	*/
				2485	flag float64_le_quiet( float64 a, float64 b )
				2486	{
				2487	flag aSign, bSign;
				2488	//int16 aExp, bExp;
				2489
				2490	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				2491	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				2492	) {
				2493	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
				2494	float_raise( float_flag_invalid );
				2495	}
				2496	return 0;
				2497	}
				2498	aSign = extractFloat64Sign( a );
				2499	bSign = extractFloat64Sign( b );
				2500	if ( aSign != bSign ) return aSign \|\| ( (bits64) ( ( a \| b )<<1 ) == 0 );
				2501	return ( a == b ) \|\| ( aSign ^ ( a < b ) );
				2502
				2503	}
				2504
				2505	/*
				2506	-------------------------------------------------------------------------------
				2507	Returns 1 if the double-precision floating-point value `a' is less than
				2508	the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				2509	exception. Otherwise, the comparison is performed according to the IEC/IEEE
				2510	Standard for Binary Floating-point Arithmetic.
				2511	-------------------------------------------------------------------------------
				2512	*/
				2513	flag float64_lt_quiet( float64 a, float64 b )
				2514	{
				2515	flag aSign, bSign;
				2516
				2517	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				2518	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				2519	) {
				2520	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
				2521	float_raise( float_flag_invalid );
				2522	}
				2523	return 0;
				2524	}
				2525	aSign = extractFloat64Sign( a );
				2526	bSign = extractFloat64Sign( b );
				2527	if ( aSign != bSign ) return aSign && ( (bits64) ( ( a \| b )<<1 ) != 0 );
				2528	return ( a != b ) && ( aSign ^ ( a < b ) );
				2529
				2530	}
				2531
				2532	#ifdef FLOATX80
				2533
				2534	/*
				2535	-------------------------------------------------------------------------------
				2536	Returns the result of converting the extended double-precision floating-
				2537	point value `a' to the 32-bit two's complement integer format. The
				2538	conversion is performed according to the IEC/IEEE Standard for Binary
				2539	Floating-point Arithmetic---which means in particular that the conversion
				2540	is rounded according to the current rounding mode. If `a' is a NaN, the
				2541	largest positive integer is returned. Otherwise, if the conversion
				2542	overflows, the largest integer with the same sign as `a' is returned.
				2543	-------------------------------------------------------------------------------
				2544	*/
				2545	int32 floatx80_to_int32( floatx80 a )
				2546	{
				2547	flag aSign;
				2548	int32 aExp, shiftCount;
				2549	bits64 aSig;
				2550
				2551	aSig = extractFloatx80Frac( a );
				2552	aExp = extractFloatx80Exp( a );
				2553	aSign = extractFloatx80Sign( a );
				2554	if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
				2555	shiftCount = 0x4037 - aExp;
				2556	if ( shiftCount <= 0 ) shiftCount = 1;
				2557	shift64RightJamming( aSig, shiftCount, &aSig );
				2558	return roundAndPackInt32( aSign, aSig );
				2559
				2560	}
				2561
				2562	/*
				2563	-------------------------------------------------------------------------------
				2564	Returns the result of converting the extended double-precision floating-
				2565	point value `a' to the 32-bit two's complement integer format. The
				2566	conversion is performed according to the IEC/IEEE Standard for Binary
				2567	Floating-point Arithmetic, except that the conversion is always rounded
				2568	toward zero. If `a' is a NaN, the largest positive integer is returned.
				2569	Otherwise, if the conversion overflows, the largest integer with the same
				2570	sign as `a' is returned.
				2571	-------------------------------------------------------------------------------
				2572	*/
				2573	int32 floatx80_to_int32_round_to_zero( floatx80 a )
				2574	{
				2575	flag aSign;
				2576	int32 aExp, shiftCount;
				2577	bits64 aSig, savedASig;
				2578	int32 z;
				2579
				2580	aSig = extractFloatx80Frac( a );
				2581	aExp = extractFloatx80Exp( a );
				2582	aSign = extractFloatx80Sign( a );
				2583	shiftCount = 0x403E - aExp;
				2584	if ( shiftCount < 32 ) {
				2585	if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
				2586	goto invalid;
				2587	}
				2588	else if ( 63 < shiftCount ) {
				2589	if ( aExp \|\| aSig ) float_exception_flags \|= float_flag_inexact;
				2590	return 0;
				2591	}
				2592	savedASig = aSig;
				2593	aSig >>= shiftCount;
				2594	z = aSig;
				2595	if ( aSign ) z = - z;
				2596	if ( ( z < 0 ) ^ aSign ) {
				2597	invalid:
				2598	float_exception_flags \|= float_flag_invalid;
				2599	return aSign ? 0x80000000 : 0x7FFFFFFF;
				2600	}
				2601	if ( ( aSig<<shiftCount ) != savedASig ) {
				2602	float_exception_flags \|= float_flag_inexact;
				2603	}
				2604	return z;
				2605
				2606	}
				2607
				2608	/*
				2609	-------------------------------------------------------------------------------
				2610	Returns the result of converting the extended double-precision floating-
				2611	point value `a' to the single-precision floating-point format. The
				2612	conversion is performed according to the IEC/IEEE Standard for Binary
				2613	Floating-point Arithmetic.
				2614	-------------------------------------------------------------------------------
				2615	*/
				2616	float32 floatx80_to_float32( floatx80 a )
				2617	{
				2618	flag aSign;
				2619	int32 aExp;
				2620	bits64 aSig;
				2621
				2622	aSig = extractFloatx80Frac( a );
				2623	aExp = extractFloatx80Exp( a );
				2624	aSign = extractFloatx80Sign( a );
				2625	if ( aExp == 0x7FFF ) {
				2626	if ( (bits64) ( aSig<<1 ) ) {
				2627	return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
				2628	}
				2629	return packFloat32( aSign, 0xFF, 0 );
				2630	}
				2631	shift64RightJamming( aSig, 33, &aSig );
				2632	if ( aExp \|\| aSig ) aExp -= 0x3F81;
				2633	return roundAndPackFloat32( aSign, aExp, aSig );
				2634
				2635	}
				2636
				2637	/*
				2638	-------------------------------------------------------------------------------
				2639	Returns the result of converting the extended double-precision floating-
				2640	point value `a' to the double-precision floating-point format. The
				2641	conversion is performed according to the IEC/IEEE Standard for Binary
				2642	Floating-point Arithmetic.
				2643	-------------------------------------------------------------------------------
				2644	*/
				2645	float64 floatx80_to_float64( floatx80 a )
				2646	{
				2647	flag aSign;
				2648	int32 aExp;
				2649	bits64 aSig, zSig;
				2650
				2651	aSig = extractFloatx80Frac( a );
				2652	aExp = extractFloatx80Exp( a );
				2653	aSign = extractFloatx80Sign( a );
				2654	if ( aExp == 0x7FFF ) {
				2655	if ( (bits64) ( aSig<<1 ) ) {
				2656	return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
				2657	}
				2658	return packFloat64( aSign, 0x7FF, 0 );
				2659	}
				2660	shift64RightJamming( aSig, 1, &zSig );
				2661	if ( aExp \|\| aSig ) aExp -= 0x3C01;
				2662	return roundAndPackFloat64( aSign, aExp, zSig );
				2663
				2664	}
				2665
				2666	/*
				2667	-------------------------------------------------------------------------------
				2668	Rounds the extended double-precision floating-point value `a' to an integer,
				2669	and returns the result as an extended quadruple-precision floating-point
				2670	value. The operation is performed according to the IEC/IEEE Standard for
				2671	Binary Floating-point Arithmetic.
				2672	-------------------------------------------------------------------------------
				2673	*/
				2674	floatx80 floatx80_round_to_int( floatx80 a )
				2675	{
				2676	flag aSign;
				2677	int32 aExp;
				2678	bits64 lastBitMask, roundBitsMask;
				2679	int8 roundingMode;
				2680	floatx80 z;
				2681
				2682	aExp = extractFloatx80Exp( a );
				2683	if ( 0x403E <= aExp ) {
				2684	if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
				2685	return propagateFloatx80NaN( a, a );
				2686	}
				2687	return a;
				2688	}
				2689	if ( aExp <= 0x3FFE ) {
				2690	if ( ( aExp == 0 )
				2691	&& ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
				2692	return a;
				2693	}
				2694	float_exception_flags \|= float_flag_inexact;
				2695	aSign = extractFloatx80Sign( a );
				2696	switch ( float_rounding_mode ) {
				2697	case float_round_nearest_even:
				2698	if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
				2699	) {
				2700	return
				2701	packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
				2702	}
				2703	break;
				2704	case float_round_down:
				2705	return
				2706	aSign ?
				2707	packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
				2708	: packFloatx80( 0, 0, 0 );
				2709	case float_round_up:
				2710	return
				2711	aSign ? packFloatx80( 1, 0, 0 )
				2712	: packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
				2713	}
				2714	return packFloatx80( aSign, 0, 0 );
				2715	}
				2716	lastBitMask = 1;
				2717	lastBitMask <<= 0x403E - aExp;
				2718	roundBitsMask = lastBitMask - 1;
				2719	z = a;
				2720	roundingMode = float_rounding_mode;
				2721	if ( roundingMode == float_round_nearest_even ) {
				2722	z.low += lastBitMask>>1;
				2723	if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
				2724	}
				2725	else if ( roundingMode != float_round_to_zero ) {
				2726	if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
				2727	z.low += roundBitsMask;
				2728	}
				2729	}
				2730	z.low &= ~ roundBitsMask;
				2731	if ( z.low == 0 ) {
				2732	++z.high;
				2733	z.low = LIT64( 0x8000000000000000 );
				2734	}
				2735	if ( z.low != a.low ) float_exception_flags \|= float_flag_inexact;
				2736	return z;
				2737
				2738	}
				2739
				2740	/*
				2741	-------------------------------------------------------------------------------
				2742	Returns the result of adding the absolute values of the extended double-
				2743	precision floating-point values `a' and `b'. If `zSign' is true, the sum is
				2744	negated before being returned. `zSign' is ignored if the result is a NaN.
				2745	The addition is performed according to the IEC/IEEE Standard for Binary
				2746	Floating-point Arithmetic.
				2747	-------------------------------------------------------------------------------
				2748	*/
				2749	static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
				2750	{
				2751	int32 aExp, bExp, zExp;
				2752	bits64 aSig, bSig, zSig0, zSig1;
				2753	int32 expDiff;
				2754
				2755	aSig = extractFloatx80Frac( a );
				2756	aExp = extractFloatx80Exp( a );
				2757	bSig = extractFloatx80Frac( b );
				2758	bExp = extractFloatx80Exp( b );
				2759	expDiff = aExp - bExp;
				2760	if ( 0 < expDiff ) {
				2761	if ( aExp == 0x7FFF ) {
				2762	if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
				2763	return a;
				2764	}
				2765	if ( bExp == 0 ) --expDiff;
				2766	shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
				2767	zExp = aExp;
				2768	}
				2769	else if ( expDiff < 0 ) {
				2770	if ( bExp == 0x7FFF ) {
				2771	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
				2772	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				2773	}
				2774	if ( aExp == 0 ) ++expDiff;
				2775	shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
				2776	zExp = bExp;
				2777	}
				2778	else {
				2779	if ( aExp == 0x7FFF ) {
				2780	if ( (bits64) ( ( aSig \| bSig )<<1 ) ) {
				2781	return propagateFloatx80NaN( a, b );
				2782	}
				2783	return a;
				2784	}
				2785	zSig1 = 0;
				2786	zSig0 = aSig + bSig;
				2787	if ( aExp == 0 ) {
				2788	normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
				2789	goto roundAndPack;
				2790	}
				2791	zExp = aExp;
				2792	goto shiftRight1;
				2793	}
				2794
				2795	zSig0 = aSig + bSig;
				2796
				2797	if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
				2798	shiftRight1:
				2799	shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
				2800	zSig0 \|= LIT64( 0x8000000000000000 );
				2801	++zExp;
				2802	roundAndPack:
				2803	return
				2804	roundAndPackFloatx80(
				2805	floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
				2806
				2807	}
				2808
				2809	/*
				2810	-------------------------------------------------------------------------------
				2811	Returns the result of subtracting the absolute values of the extended
				2812	double-precision floating-point values `a' and `b'. If `zSign' is true,
				2813	the difference is negated before being returned. `zSign' is ignored if the
				2814	result is a NaN. The subtraction is performed according to the IEC/IEEE
				2815	Standard for Binary Floating-point Arithmetic.
				2816	-------------------------------------------------------------------------------
				2817	*/
				2818	static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
				2819	{
				2820	int32 aExp, bExp, zExp;
				2821	bits64 aSig, bSig, zSig0, zSig1;
				2822	int32 expDiff;
				2823	floatx80 z;
				2824
				2825	aSig = extractFloatx80Frac( a );
				2826	aExp = extractFloatx80Exp( a );
				2827	bSig = extractFloatx80Frac( b );
				2828	bExp = extractFloatx80Exp( b );
				2829	expDiff = aExp - bExp;
				2830	if ( 0 < expDiff ) goto aExpBigger;
				2831	if ( expDiff < 0 ) goto bExpBigger;
				2832	if ( aExp == 0x7FFF ) {
				2833	if ( (bits64) ( ( aSig \| bSig )<<1 ) ) {
				2834	return propagateFloatx80NaN( a, b );
				2835	}
				2836	float_raise( float_flag_invalid );
				2837	z.low = floatx80_default_nan_low;
				2838	z.high = floatx80_default_nan_high;
				2839	return z;
				2840	}
				2841	if ( aExp == 0 ) {
				2842	aExp = 1;
				2843	bExp = 1;
				2844	}
				2845	zSig1 = 0;
				2846	if ( bSig < aSig ) goto aBigger;
				2847	if ( aSig < bSig ) goto bBigger;
				2848	return packFloatx80( float_rounding_mode == float_round_down, 0, 0 );
				2849	bExpBigger:
				2850	if ( bExp == 0x7FFF ) {
				2851	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
				2852	return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
				2853	}
				2854	if ( aExp == 0 ) ++expDiff;
				2855	shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
				2856	bBigger:
				2857	sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
				2858	zExp = bExp;
				2859	zSign ^= 1;
				2860	goto normalizeRoundAndPack;
				2861	aExpBigger:
				2862	if ( aExp == 0x7FFF ) {
				2863	if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
				2864	return a;
				2865	}
				2866	if ( bExp == 0 ) --expDiff;
				2867	shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
				2868	aBigger:
				2869	sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
				2870	zExp = aExp;
				2871	normalizeRoundAndPack:
				2872	return
				2873	normalizeRoundAndPackFloatx80(
				2874	floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
				2875
				2876	}
				2877
				2878	/*
				2879	-------------------------------------------------------------------------------
				2880	Returns the result of adding the extended double-precision floating-point
				2881	values `a' and `b'. The operation is performed according to the IEC/IEEE
				2882	Standard for Binary Floating-point Arithmetic.
				2883	-------------------------------------------------------------------------------
				2884	*/
				2885	floatx80 floatx80_add( floatx80 a, floatx80 b )
				2886	{
				2887	flag aSign, bSign;
				2888
				2889	aSign = extractFloatx80Sign( a );
				2890	bSign = extractFloatx80Sign( b );
				2891	if ( aSign == bSign ) {
				2892	return addFloatx80Sigs( a, b, aSign );
				2893	}
				2894	else {
				2895	return subFloatx80Sigs( a, b, aSign );
				2896	}
				2897
				2898	}
				2899
				2900	/*
				2901	-------------------------------------------------------------------------------
				2902	Returns the result of subtracting the extended double-precision floating-
				2903	point values `a' and `b'. The operation is performed according to the
				2904	IEC/IEEE Standard for Binary Floating-point Arithmetic.
				2905	-------------------------------------------------------------------------------
				2906	*/
				2907	floatx80 floatx80_sub( floatx80 a, floatx80 b )
				2908	{
				2909	flag aSign, bSign;
				2910
				2911	aSign = extractFloatx80Sign( a );
				2912	bSign = extractFloatx80Sign( b );
				2913	if ( aSign == bSign ) {
				2914	return subFloatx80Sigs( a, b, aSign );
				2915	}
				2916	else {
				2917	return addFloatx80Sigs( a, b, aSign );
				2918	}
				2919
				2920	}
				2921
				2922	/*
				2923	-------------------------------------------------------------------------------
				2924	Returns the result of multiplying the extended double-precision floating-
				2925	point values `a' and `b'. The operation is performed according to the
				2926	IEC/IEEE Standard for Binary Floating-point Arithmetic.
				2927	-------------------------------------------------------------------------------
				2928	*/
				2929	floatx80 floatx80_mul( floatx80 a, floatx80 b )
				2930	{
				2931	flag aSign, bSign, zSign;
				2932	int32 aExp, bExp, zExp;
				2933	bits64 aSig, bSig, zSig0, zSig1;
				2934	floatx80 z;
				2935
				2936	aSig = extractFloatx80Frac( a );
				2937	aExp = extractFloatx80Exp( a );
				2938	aSign = extractFloatx80Sign( a );
				2939	bSig = extractFloatx80Frac( b );
				2940	bExp = extractFloatx80Exp( b );
				2941	bSign = extractFloatx80Sign( b );
				2942	zSign = aSign ^ bSign;
				2943	if ( aExp == 0x7FFF ) {
				2944	if ( (bits64) ( aSig<<1 )
				2945	\|\| ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
				2946	return propagateFloatx80NaN( a, b );
				2947	}
				2948	if ( ( bExp \| bSig ) == 0 ) goto invalid;
				2949	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				2950	}
				2951	if ( bExp == 0x7FFF ) {
				2952	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
				2953	if ( ( aExp \| aSig ) == 0 ) {
				2954	invalid:
				2955	float_raise( float_flag_invalid );
				2956	z.low = floatx80_default_nan_low;
				2957	z.high = floatx80_default_nan_high;
				2958	return z;
				2959	}
				2960	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				2961	}
				2962	if ( aExp == 0 ) {
				2963	if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
				2964	normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
				2965	}
				2966	if ( bExp == 0 ) {
				2967	if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
				2968	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
				2969	}
				2970	zExp = aExp + bExp - 0x3FFE;
				2971	mul64To128( aSig, bSig, &zSig0, &zSig1 );
				2972	if ( 0 < (sbits64) zSig0 ) {
				2973	shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
				2974	--zExp;
				2975	}
				2976	return
				2977	roundAndPackFloatx80(
				2978	floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
				2979
				2980	}
				2981
				2982	/*
				2983	-------------------------------------------------------------------------------
				2984	Returns the result of dividing the extended double-precision floating-point
				2985	value `a' by the corresponding value `b'. The operation is performed
				2986	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				2987	-------------------------------------------------------------------------------
				2988	*/
				2989	floatx80 floatx80_div( floatx80 a, floatx80 b )
				2990	{
				2991	flag aSign, bSign, zSign;
				2992	int32 aExp, bExp, zExp;
				2993	bits64 aSig, bSig, zSig0, zSig1;
				2994	bits64 rem0, rem1, rem2, term0, term1, term2;
				2995	floatx80 z;
				2996
				2997	aSig = extractFloatx80Frac( a );
				2998	aExp = extractFloatx80Exp( a );
				2999	aSign = extractFloatx80Sign( a );
				3000	bSig = extractFloatx80Frac( b );
				3001	bExp = extractFloatx80Exp( b );
				3002	bSign = extractFloatx80Sign( b );
				3003	zSign = aSign ^ bSign;
				3004	if ( aExp == 0x7FFF ) {
				3005	if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
				3006	if ( bExp == 0x7FFF ) {
				3007	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
				3008	goto invalid;
				3009	}
				3010	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				3011	}
				3012	if ( bExp == 0x7FFF ) {
				3013	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
				3014	return packFloatx80( zSign, 0, 0 );
				3015	}
				3016	if ( bExp == 0 ) {
				3017	if ( bSig == 0 ) {
				3018	if ( ( aExp \| aSig ) == 0 ) {
				3019	invalid:
				3020	float_raise( float_flag_invalid );
				3021	z.low = floatx80_default_nan_low;
				3022	z.high = floatx80_default_nan_high;
				3023	return z;
				3024	}
				3025	float_raise( float_flag_divbyzero );
				3026	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				3027	}
				3028	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
				3029	}
				3030	if ( aExp == 0 ) {
				3031	if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
				3032	normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
				3033	}
				3034	zExp = aExp - bExp + 0x3FFE;
				3035	rem1 = 0;
				3036	if ( bSig <= aSig ) {
				3037	shift128Right( aSig, 0, 1, &aSig, &rem1 );
				3038	++zExp;
				3039	}
				3040	zSig0 = estimateDiv128To64( aSig, rem1, bSig );
				3041	mul64To128( bSig, zSig0, &term0, &term1 );
				3042	sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
				3043	while ( (sbits64) rem0 < 0 ) {
				3044	--zSig0;
				3045	add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
				3046	}
				3047	zSig1 = estimateDiv128To64( rem1, 0, bSig );
				3048	if ( (bits64) ( zSig1<<1 ) <= 8 ) {
				3049	mul64To128( bSig, zSig1, &term1, &term2 );
				3050	sub128( rem1, 0, term1, term2, &rem1, &rem2 );
				3051	while ( (sbits64) rem1 < 0 ) {
				3052	--zSig1;
				3053	add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
				3054	}
				3055	zSig1 \|= ( ( rem1 \| rem2 ) != 0 );
				3056	}
				3057	return
				3058	roundAndPackFloatx80(
				3059	floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
				3060
				3061	}
				3062
				3063	/*
				3064	-------------------------------------------------------------------------------
				3065	Returns the remainder of the extended double-precision floating-point value
				3066	`a' with respect to the corresponding value `b'. The operation is performed
				3067	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				3068	-------------------------------------------------------------------------------
				3069	*/
				3070	floatx80 floatx80_rem( floatx80 a, floatx80 b )
				3071	{
				3072	flag aSign, bSign, zSign;
				3073	int32 aExp, bExp, expDiff;
				3074	bits64 aSig0, aSig1, bSig;
				3075	bits64 q, term0, term1, alternateASig0, alternateASig1;
				3076	floatx80 z;
				3077
				3078	aSig0 = extractFloatx80Frac( a );
				3079	aExp = extractFloatx80Exp( a );
				3080	aSign = extractFloatx80Sign( a );
				3081	bSig = extractFloatx80Frac( b );
				3082	bExp = extractFloatx80Exp( b );
				3083	bSign = extractFloatx80Sign( b );
				3084	if ( aExp == 0x7FFF ) {
				3085	if ( (bits64) ( aSig0<<1 )
				3086	\|\| ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
				3087	return propagateFloatx80NaN( a, b );
				3088	}
				3089	goto invalid;
				3090	}
				3091	if ( bExp == 0x7FFF ) {
				3092	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
				3093	return a;
				3094	}
				3095	if ( bExp == 0 ) {
				3096	if ( bSig == 0 ) {
				3097	invalid:
				3098	float_raise( float_flag_invalid );
				3099	z.low = floatx80_default_nan_low;
				3100	z.high = floatx80_default_nan_high;
				3101	return z;
				3102	}
				3103	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
				3104	}
				3105	if ( aExp == 0 ) {
				3106	if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
				3107	normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
				3108	}
				3109	bSig \|= LIT64( 0x8000000000000000 );
				3110	zSign = aSign;
				3111	expDiff = aExp - bExp;
				3112	aSig1 = 0;
				3113	if ( expDiff < 0 ) {
				3114	if ( expDiff < -1 ) return a;
				3115	shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
				3116	expDiff = 0;
				3117	}
				3118	q = ( bSig <= aSig0 );
				3119	if ( q ) aSig0 -= bSig;
				3120	expDiff -= 64;
				3121	while ( 0 < expDiff ) {
				3122	q = estimateDiv128To64( aSig0, aSig1, bSig );
				3123	q = ( 2 < q ) ? q - 2 : 0;
				3124	mul64To128( bSig, q, &term0, &term1 );
				3125	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
				3126	shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
				3127	expDiff -= 62;
				3128	}
				3129	expDiff += 64;
				3130	if ( 0 < expDiff ) {
				3131	q = estimateDiv128To64( aSig0, aSig1, bSig );
				3132	q = ( 2 < q ) ? q - 2 : 0;
				3133	q >>= 64 - expDiff;
				3134	mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
				3135	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
				3136	shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
				3137	while ( le128( term0, term1, aSig0, aSig1 ) ) {
				3138	++q;
				3139	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
				3140	}
				3141	}
				3142	else {
				3143	term1 = 0;
				3144	term0 = bSig;
				3145	}
				3146	sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
				3147	if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
				3148	\|\| ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
				3149	&& ( q & 1 ) )
				3150	) {
				3151	aSig0 = alternateASig0;
				3152	aSig1 = alternateASig1;
				3153	zSign = ! zSign;
				3154	}
				3155	return
				3156	normalizeRoundAndPackFloatx80(
				3157	80, zSign, bExp + expDiff, aSig0, aSig1 );
				3158
				3159	}
				3160
				3161	/*
				3162	-------------------------------------------------------------------------------
				3163	Returns the square root of the extended double-precision floating-point
				3164	value `a'. The operation is performed according to the IEC/IEEE Standard
				3165	for Binary Floating-point Arithmetic.
				3166	-------------------------------------------------------------------------------
				3167	*/
				3168	floatx80 floatx80_sqrt( floatx80 a )
				3169	{
				3170	flag aSign;
				3171	int32 aExp, zExp;
				3172	bits64 aSig0, aSig1, zSig0, zSig1;
				3173	bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
				3174	bits64 shiftedRem0, shiftedRem1;
				3175	floatx80 z;
				3176
				3177	aSig0 = extractFloatx80Frac( a );
				3178	aExp = extractFloatx80Exp( a );
				3179	aSign = extractFloatx80Sign( a );
				3180	if ( aExp == 0x7FFF ) {
				3181	if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
				3182	if ( ! aSign ) return a;
				3183	goto invalid;
				3184	}
				3185	if ( aSign ) {
				3186	if ( ( aExp \| aSig0 ) == 0 ) return a;
				3187	invalid:
				3188	float_raise( float_flag_invalid );
				3189	z.low = floatx80_default_nan_low;
				3190	z.high = floatx80_default_nan_high;
				3191	return z;
				3192	}
				3193	if ( aExp == 0 ) {
				3194	if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
				3195	normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
				3196	}
				3197	zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
				3198	zSig0 = estimateSqrt32( aExp, aSig0>>32 );
				3199	zSig0 <<= 31;
				3200	aSig1 = 0;
				3201	shift128Right( aSig0, 0, ( aExp & 1 ) + 2, &aSig0, &aSig1 );
				3202	zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0 ) + zSig0 + 4;
				3203	if ( 0 <= (sbits64) zSig0 ) zSig0 = LIT64( 0xFFFFFFFFFFFFFFFF );
				3204	shortShift128Left( aSig0, aSig1, 2, &aSig0, &aSig1 );
				3205	mul64To128( zSig0, zSig0, &term0, &term1 );
				3206	sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
				3207	while ( (sbits64) rem0 < 0 ) {
				3208	--zSig0;
				3209	shortShift128Left( 0, zSig0, 1, &term0, &term1 );
				3210	term1 \|= 1;
				3211	add128( rem0, rem1, term0, term1, &rem0, &rem1 );
				3212	}
				3213	shortShift128Left( rem0, rem1, 63, &shiftedRem0, &shiftedRem1 );
				3214	zSig1 = estimateDiv128To64( shiftedRem0, shiftedRem1, zSig0 );
				3215	if ( (bits64) ( zSig1<<1 ) <= 10 ) {
				3216	if ( zSig1 == 0 ) zSig1 = 1;
				3217	mul64To128( zSig0, zSig1, &term1, &term2 );
				3218	shortShift128Left( term1, term2, 1, &term1, &term2 );
				3219	sub128( rem1, 0, term1, term2, &rem1, &rem2 );
				3220	mul64To128( zSig1, zSig1, &term2, &term3 );
				3221	sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
				3222	while ( (sbits64) rem1 < 0 ) {
				3223	--zSig1;
				3224	shortShift192Left( 0, zSig0, zSig1, 1, &term1, &term2, &term3 );
				3225	term3 \|= 1;
				3226	add192(
				3227	rem1, rem2, rem3, term1, term2, term3, &rem1, &rem2, &rem3 );
				3228	}
				3229	zSig1 \|= ( ( rem1 \| rem2 \| rem3 ) != 0 );
				3230	}
				3231	return
				3232	roundAndPackFloatx80(
				3233	floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
				3234
				3235	}
				3236
				3237	/*
				3238	-------------------------------------------------------------------------------
				3239	Returns 1 if the extended double-precision floating-point value `a' is
				3240	equal to the corresponding value `b', and 0 otherwise. The comparison is
				3241	performed according to the IEC/IEEE Standard for Binary Floating-point
				3242	Arithmetic.
				3243	-------------------------------------------------------------------------------
				3244	*/
				3245	flag floatx80_eq( floatx80 a, floatx80 b )
				3246	{
				3247
				3248	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				3249	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				3250	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				3251	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				3252	) {
				3253	if ( floatx80_is_signaling_nan( a )
				3254	\|\| floatx80_is_signaling_nan( b ) ) {
				3255	float_raise( float_flag_invalid );
				3256	}
				3257	return 0;
				3258	}
				3259	return
				3260	( a.low == b.low )
				3261	&& ( ( a.high == b.high )
				3262	\|\| ( ( a.low == 0 )
				3263	&& ( (bits16) ( ( a.high \| b.high )<<1 ) == 0 ) )
				3264	);
				3265
				3266	}
				3267
				3268	/*
				3269	-------------------------------------------------------------------------------
				3270	Returns 1 if the extended double-precision floating-point value `a' is
				3271	less than or equal to the corresponding value `b', and 0 otherwise. The
				3272	comparison is performed according to the IEC/IEEE Standard for Binary
				3273	Floating-point Arithmetic.
				3274	-------------------------------------------------------------------------------
				3275	*/
				3276	flag floatx80_le( floatx80 a, floatx80 b )
				3277	{
				3278	flag aSign, bSign;
				3279
				3280	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				3281	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				3282	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				3283	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				3284	) {
				3285	float_raise( float_flag_invalid );
				3286	return 0;
				3287	}
				3288	aSign = extractFloatx80Sign( a );
				3289	bSign = extractFloatx80Sign( b );
				3290	if ( aSign != bSign ) {
				3291	return
				3292	aSign
				3293	\|\| ( ( ( (bits16) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				3294	== 0 );
				3295	}
				3296	return
				3297	aSign ? le128( b.high, b.low, a.high, a.low )
				3298	: le128( a.high, a.low, b.high, b.low );
				3299
				3300	}
				3301
				3302	/*
				3303	-------------------------------------------------------------------------------
				3304	Returns 1 if the extended double-precision floating-point value `a' is
				3305	less than the corresponding value `b', and 0 otherwise. The comparison
				3306	is performed according to the IEC/IEEE Standard for Binary Floating-point
				3307	Arithmetic.
				3308	-------------------------------------------------------------------------------
				3309	*/
				3310	flag floatx80_lt( floatx80 a, floatx80 b )
				3311	{
				3312	flag aSign, bSign;
				3313
				3314	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				3315	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				3316	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				3317	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				3318	) {
				3319	float_raise( float_flag_invalid );
				3320	return 0;
				3321	}
				3322	aSign = extractFloatx80Sign( a );
				3323	bSign = extractFloatx80Sign( b );
				3324	if ( aSign != bSign ) {
				3325	return
				3326	aSign
				3327	&& ( ( ( (bits16) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				3328	!= 0 );
				3329	}
				3330	return
				3331	aSign ? lt128( b.high, b.low, a.high, a.low )
				3332	: lt128( a.high, a.low, b.high, b.low );
				3333
				3334	}
				3335
				3336	/*
				3337	-------------------------------------------------------------------------------
				3338	Returns 1 if the extended double-precision floating-point value `a' is equal
				3339	to the corresponding value `b', and 0 otherwise. The invalid exception is
				3340	raised if either operand is a NaN. Otherwise, the comparison is performed
				3341	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				3342	-------------------------------------------------------------------------------
				3343	*/
				3344	flag floatx80_eq_signaling( floatx80 a, floatx80 b )
				3345	{
				3346
				3347	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				3348	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				3349	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				3350	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				3351	) {
				3352	float_raise( float_flag_invalid );
				3353	return 0;
				3354	}
				3355	return
				3356	( a.low == b.low )
				3357	&& ( ( a.high == b.high )
				3358	\|\| ( ( a.low == 0 )
				3359	&& ( (bits16) ( ( a.high \| b.high )<<1 ) == 0 ) )
				3360	);
				3361
				3362	}
				3363
				3364	/*
				3365	-------------------------------------------------------------------------------
				3366	Returns 1 if the extended double-precision floating-point value `a' is less
				3367	than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
				3368	do not cause an exception. Otherwise, the comparison is performed according
				3369	to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				3370	-------------------------------------------------------------------------------
				3371	*/
				3372	flag floatx80_le_quiet( floatx80 a, floatx80 b )
				3373	{
				3374	flag aSign, bSign;
				3375
				3376	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				3377	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				3378	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				3379	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				3380	) {
				3381	if ( floatx80_is_signaling_nan( a )
				3382	\|\| floatx80_is_signaling_nan( b ) ) {
				3383	float_raise( float_flag_invalid );
				3384	}
				3385	return 0;
				3386	}
				3387	aSign = extractFloatx80Sign( a );
				3388	bSign = extractFloatx80Sign( b );
				3389	if ( aSign != bSign ) {
				3390	return
				3391	aSign
				3392	\|\| ( ( ( (bits16) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				3393	== 0 );
				3394	}
				3395	return
				3396	aSign ? le128( b.high, b.low, a.high, a.low )
				3397	: le128( a.high, a.low, b.high, b.low );
				3398
				3399	}
				3400
				3401	/*
				3402	-------------------------------------------------------------------------------
				3403	Returns 1 if the extended double-precision floating-point value `a' is less
				3404	than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
				3405	an exception. Otherwise, the comparison is performed according to the
				3406	IEC/IEEE Standard for Binary Floating-point Arithmetic.
				3407	-------------------------------------------------------------------------------
				3408	*/
				3409	flag floatx80_lt_quiet( floatx80 a, floatx80 b )
				3410	{
				3411	flag aSign, bSign;
				3412
				3413	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				3414	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				3415	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				3416	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				3417	) {
				3418	if ( floatx80_is_signaling_nan( a )
				3419	\|\| floatx80_is_signaling_nan( b ) ) {
				3420	float_raise( float_flag_invalid );
				3421	}
				3422	return 0;
				3423	}
				3424	aSign = extractFloatx80Sign( a );
				3425	bSign = extractFloatx80Sign( b );
				3426	if ( aSign != bSign ) {
				3427	return
				3428	aSign
				3429	&& ( ( ( (bits16) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				3430	!= 0 );
				3431	}
				3432	return
				3433	aSign ? lt128( b.high, b.low, a.high, a.low )
				3434	: lt128( a.high, a.low, b.high, b.low );
				3435
				3436	}
				3437
				3438	#endif
				3439