Blame - arch/arm/nwfpe/softfloat.c - kernel/msm-4.9

blob: 9d743ae29062b221acf4e74ce474e6560c3ae761 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame^]	1	/*
				2	===============================================================================
				3
				4	This C source file is part of the SoftFloat IEC/IEEE Floating-point
				5	Arithmetic Package, Release 2.
				6
				7	Written by John R. Hauser. This work was made possible in part by the
				8	International Computer Science Institute, located at Suite 600, 1947 Center
				9	Street, Berkeley, California 94704. Funding was partially provided by the
				10	National Science Foundation under grant MIP-9311980. The original version
				11	of this code was written as part of a project to build a fixed-point vector
				12	processor in collaboration with the University of California at Berkeley,
				13	overseen by Profs. Nelson Morgan and John Wawrzynek. More information
				14	is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
				15	arithmetic/softfloat.html'.
				16
				17	THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
				18	has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
				19	TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
				20	PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
				21	AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
				22
				23	Derivative works are acceptable, even for commercial purposes, so long as
				24	(1) they include prominent notice that the work is derivative, and (2) they
				25	include prominent notice akin to these three paragraphs for those parts of
				26	this code that are retained.
				27
				28	===============================================================================
				29	*/
				30
				31	#include "fpa11.h"
				32	//#include "milieu.h"
				33	//#include "softfloat.h"
				34
				35	/*
				36	-------------------------------------------------------------------------------
				37	Floating-point rounding mode, extended double-precision rounding precision,
				38	and exception flags.
				39	-------------------------------------------------------------------------------
				40	*/
				41	int8 float_rounding_mode = float_round_nearest_even;
				42	int8 floatx80_rounding_precision = 80;
				43	int8 float_exception_flags;
				44
				45	/*
				46	-------------------------------------------------------------------------------
				47	Primitive arithmetic functions, including multi-word arithmetic, and
				48	division and square root approximations. (Can be specialized to target if
				49	desired.)
				50	-------------------------------------------------------------------------------
				51	*/
				52	#include "softfloat-macros"
				53
				54	/*
				55	-------------------------------------------------------------------------------
				56	Functions and definitions to determine: (1) whether tininess for underflow
				57	is detected before or after rounding by default, (2) what (if anything)
				58	happens when exceptions are raised, (3) how signaling NaNs are distinguished
				59	from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
				60	are propagated from function inputs to output. These details are target-
				61	specific.
				62	-------------------------------------------------------------------------------
				63	*/
				64	#include "softfloat-specialize"
				65
				66	/*
				67	-------------------------------------------------------------------------------
				68	Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
				69	and 7, and returns the properly rounded 32-bit integer corresponding to the
				70	input. If `zSign' is nonzero, the input is negated before being converted
				71	to an integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point
				72	input is simply rounded to an integer, with the inexact exception raised if
				73	the input cannot be represented exactly as an integer. If the fixed-point
				74	input is too large, however, the invalid exception is raised and the largest
				75	positive or negative integer is returned.
				76	-------------------------------------------------------------------------------
				77	*/
				78	static int32 roundAndPackInt32( flag zSign, bits64 absZ )
				79	{
				80	int8 roundingMode;
				81	flag roundNearestEven;
				82	int8 roundIncrement, roundBits;
				83	int32 z;
				84
				85	roundingMode = float_rounding_mode;
				86	roundNearestEven = ( roundingMode == float_round_nearest_even );
				87	roundIncrement = 0x40;
				88	if ( ! roundNearestEven ) {
				89	if ( roundingMode == float_round_to_zero ) {
				90	roundIncrement = 0;
				91	}
				92	else {
				93	roundIncrement = 0x7F;
				94	if ( zSign ) {
				95	if ( roundingMode == float_round_up ) roundIncrement = 0;
				96	}
				97	else {
				98	if ( roundingMode == float_round_down ) roundIncrement = 0;
				99	}
				100	}
				101	}
				102	roundBits = absZ & 0x7F;
				103	absZ = ( absZ + roundIncrement )>>7;
				104	absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
				105	z = absZ;
				106	if ( zSign ) z = - z;
				107	if ( ( absZ>>32 ) \|\| ( z && ( ( z < 0 ) ^ zSign ) ) ) {
				108	float_exception_flags \|= float_flag_invalid;
				109	return zSign ? 0x80000000 : 0x7FFFFFFF;
				110	}
				111	if ( roundBits ) float_exception_flags \|= float_flag_inexact;
				112	return z;
				113
				114	}
				115
				116	/*
				117	-------------------------------------------------------------------------------
				118	Returns the fraction bits of the single-precision floating-point value `a'.
				119	-------------------------------------------------------------------------------
				120	*/
				121	INLINE bits32 extractFloat32Frac( float32 a )
				122	{
				123
				124	return a & 0x007FFFFF;
				125
				126	}
				127
				128	/*
				129	-------------------------------------------------------------------------------
				130	Returns the exponent bits of the single-precision floating-point value `a'.
				131	-------------------------------------------------------------------------------
				132	*/
				133	INLINE int16 extractFloat32Exp( float32 a )
				134	{
				135
				136	return ( a>>23 ) & 0xFF;
				137
				138	}
				139
				140	/*
				141	-------------------------------------------------------------------------------
				142	Returns the sign bit of the single-precision floating-point value `a'.
				143	-------------------------------------------------------------------------------
				144	*/
				145	#if 0 /* in softfloat.h */
				146	INLINE flag extractFloat32Sign( float32 a )
				147	{
				148
				149	return a>>31;
				150
				151	}
				152	#endif
				153
				154	/*
				155	-------------------------------------------------------------------------------
				156	Normalizes the subnormal single-precision floating-point value represented
				157	by the denormalized significand `aSig'. The normalized exponent and
				158	significand are stored at the locations pointed to by `zExpPtr' and
				159	`zSigPtr', respectively.
				160	-------------------------------------------------------------------------------
				161	*/
				162	static void
				163	normalizeFloat32Subnormal( bits32 aSig, int16 zExpPtr, bits32 zSigPtr )
				164	{
				165	int8 shiftCount;
				166
				167	shiftCount = countLeadingZeros32( aSig ) - 8;
				168	*zSigPtr = aSig<<shiftCount;
				169	*zExpPtr = 1 - shiftCount;
				170
				171	}
				172
				173	/*
				174	-------------------------------------------------------------------------------
				175	Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
				176	single-precision floating-point value, returning the result. After being
				177	shifted into the proper positions, the three fields are simply added
				178	together to form the result. This means that any integer portion of `zSig'
				179	will be added into the exponent. Since a properly normalized significand
				180	will have an integer portion equal to 1, the `zExp' input should be 1 less
				181	than the desired result exponent whenever `zSig' is a complete, normalized
				182	significand.
				183	-------------------------------------------------------------------------------
				184	*/
				185	INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
				186	{
				187	#if 0
				188	float32 f;
				189	__asm__("@ packFloat32 \n\
				190	mov %0, %1, asl #31 \n\
				191	orr %0, %2, asl #23 \n\
				192	orr %0, %3"
				193	: /* no outputs */
				194	: "g" (f), "g" (zSign), "g" (zExp), "g" (zSig)
				195	: "cc");
				196	return f;
				197	#else
				198	return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
				199	#endif
				200	}
				201
				202	/*
				203	-------------------------------------------------------------------------------
				204	Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				205	and significand `zSig', and returns the proper single-precision floating-
				206	point value corresponding to the abstract input. Ordinarily, the abstract
				207	value is simply rounded and packed into the single-precision format, with
				208	the inexact exception raised if the abstract input cannot be represented
				209	exactly. If the abstract value is too large, however, the overflow and
				210	inexact exceptions are raised and an infinity or maximal finite value is
				211	returned. If the abstract value is too small, the input value is rounded to
				212	a subnormal number, and the underflow and inexact exceptions are raised if
				213	the abstract input cannot be represented exactly as a subnormal single-
				214	precision floating-point number.
				215	The input significand `zSig' has its binary point between bits 30
				216	and 29, which is 7 bits to the left of the usual location. This shifted
				217	significand must be normalized or smaller. If `zSig' is not normalized,
				218	`zExp' must be 0; in that case, the result returned is a subnormal number,
				219	and it must not require rounding. In the usual case that `zSig' is
				220	normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
				221	The handling of underflow and overflow follows the IEC/IEEE Standard for
				222	Binary Floating-point Arithmetic.
				223	-------------------------------------------------------------------------------
				224	*/
				225	static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
				226	{
				227	int8 roundingMode;
				228	flag roundNearestEven;
				229	int8 roundIncrement, roundBits;
				230	flag isTiny;
				231
				232	roundingMode = float_rounding_mode;
				233	roundNearestEven = ( roundingMode == float_round_nearest_even );
				234	roundIncrement = 0x40;
				235	if ( ! roundNearestEven ) {
				236	if ( roundingMode == float_round_to_zero ) {
				237	roundIncrement = 0;
				238	}
				239	else {
				240	roundIncrement = 0x7F;
				241	if ( zSign ) {
				242	if ( roundingMode == float_round_up ) roundIncrement = 0;
				243	}
				244	else {
				245	if ( roundingMode == float_round_down ) roundIncrement = 0;
				246	}
				247	}
				248	}
				249	roundBits = zSig & 0x7F;
				250	if ( 0xFD <= (bits16) zExp ) {
				251	if ( ( 0xFD < zExp )
				252	\|\| ( ( zExp == 0xFD )
				253	&& ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
				254	) {
				255	float_raise( float_flag_overflow \| float_flag_inexact );
				256	return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
				257	}
				258	if ( zExp < 0 ) {
				259	isTiny =
				260	( float_detect_tininess == float_tininess_before_rounding )
				261	\|\| ( zExp < -1 )
				262	\|\| ( zSig + roundIncrement < 0x80000000 );
				263	shift32RightJamming( zSig, - zExp, &zSig );
				264	zExp = 0;
				265	roundBits = zSig & 0x7F;
				266	if ( isTiny && roundBits ) float_raise( float_flag_underflow );
				267	}
				268	}
				269	if ( roundBits ) float_exception_flags \|= float_flag_inexact;
				270	zSig = ( zSig + roundIncrement )>>7;
				271	zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
				272	if ( zSig == 0 ) zExp = 0;
				273	return packFloat32( zSign, zExp, zSig );
				274
				275	}
				276
				277	/*
				278	-------------------------------------------------------------------------------
				279	Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				280	and significand `zSig', and returns the proper single-precision floating-
				281	point value corresponding to the abstract input. This routine is just like
				282	`roundAndPackFloat32' except that `zSig' does not have to be normalized in
				283	any way. In all cases, `zExp' must be 1 less than the ``true'' floating-
				284	point exponent.
				285	-------------------------------------------------------------------------------
				286	*/
				287	static float32
				288	normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
				289	{
				290	int8 shiftCount;
				291
				292	shiftCount = countLeadingZeros32( zSig ) - 1;
				293	return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
				294
				295	}
				296
				297	/*
				298	-------------------------------------------------------------------------------
				299	Returns the fraction bits of the double-precision floating-point value `a'.
				300	-------------------------------------------------------------------------------
				301	*/
				302	INLINE bits64 extractFloat64Frac( float64 a )
				303	{
				304
				305	return a & LIT64( 0x000FFFFFFFFFFFFF );
				306
				307	}
				308
				309	/*
				310	-------------------------------------------------------------------------------
				311	Returns the exponent bits of the double-precision floating-point value `a'.
				312	-------------------------------------------------------------------------------
				313	*/
				314	INLINE int16 extractFloat64Exp( float64 a )
				315	{
				316
				317	return ( a>>52 ) & 0x7FF;
				318
				319	}
				320
				321	/*
				322	-------------------------------------------------------------------------------
				323	Returns the sign bit of the double-precision floating-point value `a'.
				324	-------------------------------------------------------------------------------
				325	*/
				326	#if 0 /* in softfloat.h */
				327	INLINE flag extractFloat64Sign( float64 a )
				328	{
				329
				330	return a>>63;
				331
				332	}
				333	#endif
				334
				335	/*
				336	-------------------------------------------------------------------------------
				337	Normalizes the subnormal double-precision floating-point value represented
				338	by the denormalized significand `aSig'. The normalized exponent and
				339	significand are stored at the locations pointed to by `zExpPtr' and
				340	`zSigPtr', respectively.
				341	-------------------------------------------------------------------------------
				342	*/
				343	static void
				344	normalizeFloat64Subnormal( bits64 aSig, int16 zExpPtr, bits64 zSigPtr )
				345	{
				346	int8 shiftCount;
				347
				348	shiftCount = countLeadingZeros64( aSig ) - 11;
				349	*zSigPtr = aSig<<shiftCount;
				350	*zExpPtr = 1 - shiftCount;
				351
				352	}
				353
				354	/*
				355	-------------------------------------------------------------------------------
				356	Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
				357	double-precision floating-point value, returning the result. After being
				358	shifted into the proper positions, the three fields are simply added
				359	together to form the result. This means that any integer portion of `zSig'
				360	will be added into the exponent. Since a properly normalized significand
				361	will have an integer portion equal to 1, the `zExp' input should be 1 less
				362	than the desired result exponent whenever `zSig' is a complete, normalized
				363	significand.
				364	-------------------------------------------------------------------------------
				365	*/
				366	INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
				367	{
				368
				369	return ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig;
				370
				371	}
				372
				373	/*
				374	-------------------------------------------------------------------------------
				375	Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				376	and significand `zSig', and returns the proper double-precision floating-
				377	point value corresponding to the abstract input. Ordinarily, the abstract
				378	value is simply rounded and packed into the double-precision format, with
				379	the inexact exception raised if the abstract input cannot be represented
				380	exactly. If the abstract value is too large, however, the overflow and
				381	inexact exceptions are raised and an infinity or maximal finite value is
				382	returned. If the abstract value is too small, the input value is rounded to
				383	a subnormal number, and the underflow and inexact exceptions are raised if
				384	the abstract input cannot be represented exactly as a subnormal double-
				385	precision floating-point number.
				386	The input significand `zSig' has its binary point between bits 62
				387	and 61, which is 10 bits to the left of the usual location. This shifted
				388	significand must be normalized or smaller. If `zSig' is not normalized,
				389	`zExp' must be 0; in that case, the result returned is a subnormal number,
				390	and it must not require rounding. In the usual case that `zSig' is
				391	normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
				392	The handling of underflow and overflow follows the IEC/IEEE Standard for
				393	Binary Floating-point Arithmetic.
				394	-------------------------------------------------------------------------------
				395	*/
				396	static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
				397	{
				398	int8 roundingMode;
				399	flag roundNearestEven;
				400	int16 roundIncrement, roundBits;
				401	flag isTiny;
				402
				403	roundingMode = float_rounding_mode;
				404	roundNearestEven = ( roundingMode == float_round_nearest_even );
				405	roundIncrement = 0x200;
				406	if ( ! roundNearestEven ) {
				407	if ( roundingMode == float_round_to_zero ) {
				408	roundIncrement = 0;
				409	}
				410	else {
				411	roundIncrement = 0x3FF;
				412	if ( zSign ) {
				413	if ( roundingMode == float_round_up ) roundIncrement = 0;
				414	}
				415	else {
				416	if ( roundingMode == float_round_down ) roundIncrement = 0;
				417	}
				418	}
				419	}
				420	roundBits = zSig & 0x3FF;
				421	if ( 0x7FD <= (bits16) zExp ) {
				422	if ( ( 0x7FD < zExp )
				423	\|\| ( ( zExp == 0x7FD )
				424	&& ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
				425	) {
				426	//register int lr = __builtin_return_address(0);
				427	//printk("roundAndPackFloat64 called from 0x%08x\n",lr);
				428	float_raise( float_flag_overflow \| float_flag_inexact );
				429	return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );
				430	}
				431	if ( zExp < 0 ) {
				432	isTiny =
				433	( float_detect_tininess == float_tininess_before_rounding )
				434	\|\| ( zExp < -1 )
				435	\|\| ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
				436	shift64RightJamming( zSig, - zExp, &zSig );
				437	zExp = 0;
				438	roundBits = zSig & 0x3FF;
				439	if ( isTiny && roundBits ) float_raise( float_flag_underflow );
				440	}
				441	}
				442	if ( roundBits ) float_exception_flags \|= float_flag_inexact;
				443	zSig = ( zSig + roundIncrement )>>10;
				444	zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
				445	if ( zSig == 0 ) zExp = 0;
				446	return packFloat64( zSign, zExp, zSig );
				447
				448	}
				449
				450	/*
				451	-------------------------------------------------------------------------------
				452	Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				453	and significand `zSig', and returns the proper double-precision floating-
				454	point value corresponding to the abstract input. This routine is just like
				455	`roundAndPackFloat64' except that `zSig' does not have to be normalized in
				456	any way. In all cases, `zExp' must be 1 less than the ``true'' floating-
				457	point exponent.
				458	-------------------------------------------------------------------------------
				459	*/
				460	static float64
				461	normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
				462	{
				463	int8 shiftCount;
				464
				465	shiftCount = countLeadingZeros64( zSig ) - 1;
				466	return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
				467
				468	}
				469
				470	#ifdef FLOATX80
				471
				472	/*
				473	-------------------------------------------------------------------------------
				474	Returns the fraction bits of the extended double-precision floating-point
				475	value `a'.
				476	-------------------------------------------------------------------------------
				477	*/
				478	INLINE bits64 extractFloatx80Frac( floatx80 a )
				479	{
				480
				481	return a.low;
				482
				483	}
				484
				485	/*
				486	-------------------------------------------------------------------------------
				487	Returns the exponent bits of the extended double-precision floating-point
				488	value `a'.
				489	-------------------------------------------------------------------------------
				490	*/
				491	INLINE int32 extractFloatx80Exp( floatx80 a )
				492	{
				493
				494	return a.high & 0x7FFF;
				495
				496	}
				497
				498	/*
				499	-------------------------------------------------------------------------------
				500	Returns the sign bit of the extended double-precision floating-point value
				501	`a'.
				502	-------------------------------------------------------------------------------
				503	*/
				504	INLINE flag extractFloatx80Sign( floatx80 a )
				505	{
				506
				507	return a.high>>15;
				508
				509	}
				510
				511	/*
				512	-------------------------------------------------------------------------------
				513	Normalizes the subnormal extended double-precision floating-point value
				514	represented by the denormalized significand `aSig'. The normalized exponent
				515	and significand are stored at the locations pointed to by `zExpPtr' and
				516	`zSigPtr', respectively.
				517	-------------------------------------------------------------------------------
				518	*/
				519	static void
				520	normalizeFloatx80Subnormal( bits64 aSig, int32 zExpPtr, bits64 zSigPtr )
				521	{
				522	int8 shiftCount;
				523
				524	shiftCount = countLeadingZeros64( aSig );
				525	*zSigPtr = aSig<<shiftCount;
				526	*zExpPtr = 1 - shiftCount;
				527
				528	}
				529
				530	/*
				531	-------------------------------------------------------------------------------
				532	Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
				533	extended double-precision floating-point value, returning the result.
				534	-------------------------------------------------------------------------------
				535	*/
				536	INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
				537	{
				538	floatx80 z;
				539
				540	z.low = zSig;
				541	z.high = ( ( (bits16) zSign )<<15 ) + zExp;
				542	return z;
				543
				544	}
				545
				546	/*
				547	-------------------------------------------------------------------------------
				548	Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				549	and extended significand formed by the concatenation of `zSig0' and `zSig1',
				550	and returns the proper extended double-precision floating-point value
				551	corresponding to the abstract input. Ordinarily, the abstract value is
				552	rounded and packed into the extended double-precision format, with the
				553	inexact exception raised if the abstract input cannot be represented
				554	exactly. If the abstract value is too large, however, the overflow and
				555	inexact exceptions are raised and an infinity or maximal finite value is
				556	returned. If the abstract value is too small, the input value is rounded to
				557	a subnormal number, and the underflow and inexact exceptions are raised if
				558	the abstract input cannot be represented exactly as a subnormal extended
				559	double-precision floating-point number.
				560	If `roundingPrecision' is 32 or 64, the result is rounded to the same
				561	number of bits as single or double precision, respectively. Otherwise, the
				562	result is rounded to the full precision of the extended double-precision
				563	format.
				564	The input significand must be normalized or smaller. If the input
				565	significand is not normalized, `zExp' must be 0; in that case, the result
				566	returned is a subnormal number, and it must not require rounding. The
				567	handling of underflow and overflow follows the IEC/IEEE Standard for Binary
				568	Floating-point Arithmetic.
				569	-------------------------------------------------------------------------------
				570	*/
				571	static floatx80
				572	roundAndPackFloatx80(
				573	int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
				574	)
				575	{
				576	int8 roundingMode;
				577	flag roundNearestEven, increment, isTiny;
				578	int64 roundIncrement, roundMask, roundBits;
				579
				580	roundingMode = float_rounding_mode;
				581	roundNearestEven = ( roundingMode == float_round_nearest_even );
				582	if ( roundingPrecision == 80 ) goto precision80;
				583	if ( roundingPrecision == 64 ) {
				584	roundIncrement = LIT64( 0x0000000000000400 );
				585	roundMask = LIT64( 0x00000000000007FF );
				586	}
				587	else if ( roundingPrecision == 32 ) {
				588	roundIncrement = LIT64( 0x0000008000000000 );
				589	roundMask = LIT64( 0x000000FFFFFFFFFF );
				590	}
				591	else {
				592	goto precision80;
				593	}
				594	zSig0 \|= ( zSig1 != 0 );
				595	if ( ! roundNearestEven ) {
				596	if ( roundingMode == float_round_to_zero ) {
				597	roundIncrement = 0;
				598	}
				599	else {
				600	roundIncrement = roundMask;
				601	if ( zSign ) {
				602	if ( roundingMode == float_round_up ) roundIncrement = 0;
				603	}
				604	else {
				605	if ( roundingMode == float_round_down ) roundIncrement = 0;
				606	}
				607	}
				608	}
				609	roundBits = zSig0 & roundMask;
				610	if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
				611	if ( ( 0x7FFE < zExp )
				612	\|\| ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
				613	) {
				614	goto overflow;
				615	}
				616	if ( zExp <= 0 ) {
				617	isTiny =
				618	( float_detect_tininess == float_tininess_before_rounding )
				619	\|\| ( zExp < 0 )
				620	\|\| ( zSig0 <= zSig0 + roundIncrement );
				621	shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
				622	zExp = 0;
				623	roundBits = zSig0 & roundMask;
				624	if ( isTiny && roundBits ) float_raise( float_flag_underflow );
				625	if ( roundBits ) float_exception_flags \|= float_flag_inexact;
				626	zSig0 += roundIncrement;
				627	if ( (sbits64) zSig0 < 0 ) zExp = 1;
				628	roundIncrement = roundMask + 1;
				629	if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
				630	roundMask \|= roundIncrement;
				631	}
				632	zSig0 &= ~ roundMask;
				633	return packFloatx80( zSign, zExp, zSig0 );
				634	}
				635	}
				636	if ( roundBits ) float_exception_flags \|= float_flag_inexact;
				637	zSig0 += roundIncrement;
				638	if ( zSig0 < roundIncrement ) {
				639	++zExp;
				640	zSig0 = LIT64( 0x8000000000000000 );
				641	}
				642	roundIncrement = roundMask + 1;
				643	if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
				644	roundMask \|= roundIncrement;
				645	}
				646	zSig0 &= ~ roundMask;
				647	if ( zSig0 == 0 ) zExp = 0;
				648	return packFloatx80( zSign, zExp, zSig0 );
				649	precision80:
				650	increment = ( (sbits64) zSig1 < 0 );
				651	if ( ! roundNearestEven ) {
				652	if ( roundingMode == float_round_to_zero ) {
				653	increment = 0;
				654	}
				655	else {
				656	if ( zSign ) {
				657	increment = ( roundingMode == float_round_down ) && zSig1;
				658	}
				659	else {
				660	increment = ( roundingMode == float_round_up ) && zSig1;
				661	}
				662	}
				663	}
				664	if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
				665	if ( ( 0x7FFE < zExp )
				666	\|\| ( ( zExp == 0x7FFE )
				667	&& ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
				668	&& increment
				669	)
				670	) {
				671	roundMask = 0;
				672	overflow:
				673	float_raise( float_flag_overflow \| float_flag_inexact );
				674	if ( ( roundingMode == float_round_to_zero )
				675	\|\| ( zSign && ( roundingMode == float_round_up ) )
				676	\|\| ( ! zSign && ( roundingMode == float_round_down ) )
				677	) {
				678	return packFloatx80( zSign, 0x7FFE, ~ roundMask );
				679	}
				680	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				681	}
				682	if ( zExp <= 0 ) {
				683	isTiny =
				684	( float_detect_tininess == float_tininess_before_rounding )
				685	\|\| ( zExp < 0 )
				686	\|\| ! increment
				687	\|\| ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
				688	shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
				689	zExp = 0;
				690	if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
				691	if ( zSig1 ) float_exception_flags \|= float_flag_inexact;
				692	if ( roundNearestEven ) {
				693	increment = ( (sbits64) zSig1 < 0 );
				694	}
				695	else {
				696	if ( zSign ) {
				697	increment = ( roundingMode == float_round_down ) && zSig1;
				698	}
				699	else {
				700	increment = ( roundingMode == float_round_up ) && zSig1;
				701	}
				702	}
				703	if ( increment ) {
				704	++zSig0;
				705	zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );
				706	if ( (sbits64) zSig0 < 0 ) zExp = 1;
				707	}
				708	return packFloatx80( zSign, zExp, zSig0 );
				709	}
				710	}
				711	if ( zSig1 ) float_exception_flags \|= float_flag_inexact;
				712	if ( increment ) {
				713	++zSig0;
				714	if ( zSig0 == 0 ) {
				715	++zExp;
				716	zSig0 = LIT64( 0x8000000000000000 );
				717	}
				718	else {
				719	zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );
				720	}
				721	}
				722	else {
				723	if ( zSig0 == 0 ) zExp = 0;
				724	}
				725
				726	return packFloatx80( zSign, zExp, zSig0 );
				727	}
				728
				729	/*
				730	-------------------------------------------------------------------------------
				731	Takes an abstract floating-point value having sign `zSign', exponent
				732	`zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
				733	and returns the proper extended double-precision floating-point value
				734	corresponding to the abstract input. This routine is just like
				735	`roundAndPackFloatx80' except that the input significand does not have to be
				736	normalized.
				737	-------------------------------------------------------------------------------
				738	*/
				739	static floatx80
				740	normalizeRoundAndPackFloatx80(
				741	int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
				742	)
				743	{
				744	int8 shiftCount;
				745
				746	if ( zSig0 == 0 ) {
				747	zSig0 = zSig1;
				748	zSig1 = 0;
				749	zExp -= 64;
				750	}
				751	shiftCount = countLeadingZeros64( zSig0 );
				752	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
				753	zExp -= shiftCount;
				754	return
				755	roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
				756
				757	}
				758
				759	#endif
				760
				761	/*
				762	-------------------------------------------------------------------------------
				763	Returns the result of converting the 32-bit two's complement integer `a' to
				764	the single-precision floating-point format. The conversion is performed
				765	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				766	-------------------------------------------------------------------------------
				767	*/
				768	float32 int32_to_float32( int32 a )
				769	{
				770	flag zSign;
				771
				772	if ( a == 0 ) return 0;
				773	if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
				774	zSign = ( a < 0 );
				775	return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
				776
				777	}
				778
				779	/*
				780	-------------------------------------------------------------------------------
				781	Returns the result of converting the 32-bit two's complement integer `a' to
				782	the double-precision floating-point format. The conversion is performed
				783	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				784	-------------------------------------------------------------------------------
				785	*/
				786	float64 int32_to_float64( int32 a )
				787	{
				788	flag aSign;
				789	uint32 absA;
				790	int8 shiftCount;
				791	bits64 zSig;
				792
				793	if ( a == 0 ) return 0;
				794	aSign = ( a < 0 );
				795	absA = aSign ? - a : a;
				796	shiftCount = countLeadingZeros32( absA ) + 21;
				797	zSig = absA;
				798	return packFloat64( aSign, 0x432 - shiftCount, zSig<<shiftCount );
				799
				800	}
				801
				802	#ifdef FLOATX80
				803
				804	/*
				805	-------------------------------------------------------------------------------
				806	Returns the result of converting the 32-bit two's complement integer `a'
				807	to the extended double-precision floating-point format. The conversion
				808	is performed according to the IEC/IEEE Standard for Binary Floating-point
				809	Arithmetic.
				810	-------------------------------------------------------------------------------
				811	*/
				812	floatx80 int32_to_floatx80( int32 a )
				813	{
				814	flag zSign;
				815	uint32 absA;
				816	int8 shiftCount;
				817	bits64 zSig;
				818
				819	if ( a == 0 ) return packFloatx80( 0, 0, 0 );
				820	zSign = ( a < 0 );
				821	absA = zSign ? - a : a;
				822	shiftCount = countLeadingZeros32( absA ) + 32;
				823	zSig = absA;
				824	return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
				825
				826	}
				827
				828	#endif
				829
				830	/*
				831	-------------------------------------------------------------------------------
				832	Returns the result of converting the single-precision floating-point value
				833	`a' to the 32-bit two's complement integer format. The conversion is
				834	performed according to the IEC/IEEE Standard for Binary Floating-point
				835	Arithmetic---which means in particular that the conversion is rounded
				836	according to the current rounding mode. If `a' is a NaN, the largest
				837	positive integer is returned. Otherwise, if the conversion overflows, the
				838	largest integer with the same sign as `a' is returned.
				839	-------------------------------------------------------------------------------
				840	*/
				841	int32 float32_to_int32( float32 a )
				842	{
				843	flag aSign;
				844	int16 aExp, shiftCount;
				845	bits32 aSig;
				846	bits64 zSig;
				847
				848	aSig = extractFloat32Frac( a );
				849	aExp = extractFloat32Exp( a );
				850	aSign = extractFloat32Sign( a );
				851	if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
				852	if ( aExp ) aSig \|= 0x00800000;
				853	shiftCount = 0xAF - aExp;
				854	zSig = aSig;
				855	zSig <<= 32;
				856	if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig );
				857	return roundAndPackInt32( aSign, zSig );
				858
				859	}
				860
				861	/*
				862	-------------------------------------------------------------------------------
				863	Returns the result of converting the single-precision floating-point value
				864	`a' to the 32-bit two's complement integer format. The conversion is
				865	performed according to the IEC/IEEE Standard for Binary Floating-point
				866	Arithmetic, except that the conversion is always rounded toward zero. If
				867	`a' is a NaN, the largest positive integer is returned. Otherwise, if the
				868	conversion overflows, the largest integer with the same sign as `a' is
				869	returned.
				870	-------------------------------------------------------------------------------
				871	*/
				872	int32 float32_to_int32_round_to_zero( float32 a )
				873	{
				874	flag aSign;
				875	int16 aExp, shiftCount;
				876	bits32 aSig;
				877	int32 z;
				878
				879	aSig = extractFloat32Frac( a );
				880	aExp = extractFloat32Exp( a );
				881	aSign = extractFloat32Sign( a );
				882	shiftCount = aExp - 0x9E;
				883	if ( 0 <= shiftCount ) {
				884	if ( a == 0xCF000000 ) return 0x80000000;
				885	float_raise( float_flag_invalid );
				886	if ( ! aSign \|\| ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
				887	return 0x80000000;
				888	}
				889	else if ( aExp <= 0x7E ) {
				890	if ( aExp \| aSig ) float_exception_flags \|= float_flag_inexact;
				891	return 0;
				892	}
				893	aSig = ( aSig \| 0x00800000 )<<8;
				894	z = aSig>>( - shiftCount );
				895	if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
				896	float_exception_flags \|= float_flag_inexact;
				897	}
				898	return aSign ? - z : z;
				899
				900	}
				901
				902	/*
				903	-------------------------------------------------------------------------------
				904	Returns the result of converting the single-precision floating-point value
				905	`a' to the double-precision floating-point format. The conversion is
				906	performed according to the IEC/IEEE Standard for Binary Floating-point
				907	Arithmetic.
				908	-------------------------------------------------------------------------------
				909	*/
				910	float64 float32_to_float64( float32 a )
				911	{
				912	flag aSign;
				913	int16 aExp;
				914	bits32 aSig;
				915
				916	aSig = extractFloat32Frac( a );
				917	aExp = extractFloat32Exp( a );
				918	aSign = extractFloat32Sign( a );
				919	if ( aExp == 0xFF ) {
				920	if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
				921	return packFloat64( aSign, 0x7FF, 0 );
				922	}
				923	if ( aExp == 0 ) {
				924	if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
				925	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				926	--aExp;
				927	}
				928	return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
				929
				930	}
				931
				932	#ifdef FLOATX80
				933
				934	/*
				935	-------------------------------------------------------------------------------
				936	Returns the result of converting the single-precision floating-point value
				937	`a' to the extended double-precision floating-point format. The conversion
				938	is performed according to the IEC/IEEE Standard for Binary Floating-point
				939	Arithmetic.
				940	-------------------------------------------------------------------------------
				941	*/
				942	floatx80 float32_to_floatx80( float32 a )
				943	{
				944	flag aSign;
				945	int16 aExp;
				946	bits32 aSig;
				947
				948	aSig = extractFloat32Frac( a );
				949	aExp = extractFloat32Exp( a );
				950	aSign = extractFloat32Sign( a );
				951	if ( aExp == 0xFF ) {
				952	if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
				953	return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				954	}
				955	if ( aExp == 0 ) {
				956	if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
				957	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				958	}
				959	aSig \|= 0x00800000;
				960	return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
				961
				962	}
				963
				964	#endif
				965
				966	/*
				967	-------------------------------------------------------------------------------
				968	Rounds the single-precision floating-point value `a' to an integer, and
				969	returns the result as a single-precision floating-point value. The
				970	operation is performed according to the IEC/IEEE Standard for Binary
				971	Floating-point Arithmetic.
				972	-------------------------------------------------------------------------------
				973	*/
				974	float32 float32_round_to_int( float32 a )
				975	{
				976	flag aSign;
				977	int16 aExp;
				978	bits32 lastBitMask, roundBitsMask;
				979	int8 roundingMode;
				980	float32 z;
				981
				982	aExp = extractFloat32Exp( a );
				983	if ( 0x96 <= aExp ) {
				984	if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
				985	return propagateFloat32NaN( a, a );
				986	}
				987	return a;
				988	}
				989	if ( aExp <= 0x7E ) {
				990	if ( (bits32) ( a<<1 ) == 0 ) return a;
				991	float_exception_flags \|= float_flag_inexact;
				992	aSign = extractFloat32Sign( a );
				993	switch ( float_rounding_mode ) {
				994	case float_round_nearest_even:
				995	if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
				996	return packFloat32( aSign, 0x7F, 0 );
				997	}
				998	break;
				999	case float_round_down:
				1000	return aSign ? 0xBF800000 : 0;
				1001	case float_round_up:
				1002	return aSign ? 0x80000000 : 0x3F800000;
				1003	}
				1004	return packFloat32( aSign, 0, 0 );
				1005	}
				1006	lastBitMask = 1;
				1007	lastBitMask <<= 0x96 - aExp;
				1008	roundBitsMask = lastBitMask - 1;
				1009	z = a;
				1010	roundingMode = float_rounding_mode;
				1011	if ( roundingMode == float_round_nearest_even ) {
				1012	z += lastBitMask>>1;
				1013	if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
				1014	}
				1015	else if ( roundingMode != float_round_to_zero ) {
				1016	if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
				1017	z += roundBitsMask;
				1018	}
				1019	}
				1020	z &= ~ roundBitsMask;
				1021	if ( z != a ) float_exception_flags \|= float_flag_inexact;
				1022	return z;
				1023
				1024	}
				1025
				1026	/*
				1027	-------------------------------------------------------------------------------
				1028	Returns the result of adding the absolute values of the single-precision
				1029	floating-point values `a' and `b'. If `zSign' is true, the sum is negated
				1030	before being returned. `zSign' is ignored if the result is a NaN. The
				1031	addition is performed according to the IEC/IEEE Standard for Binary
				1032	Floating-point Arithmetic.
				1033	-------------------------------------------------------------------------------
				1034	*/
				1035	static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
				1036	{
				1037	int16 aExp, bExp, zExp;
				1038	bits32 aSig, bSig, zSig;
				1039	int16 expDiff;
				1040
				1041	aSig = extractFloat32Frac( a );
				1042	aExp = extractFloat32Exp( a );
				1043	bSig = extractFloat32Frac( b );
				1044	bExp = extractFloat32Exp( b );
				1045	expDiff = aExp - bExp;
				1046	aSig <<= 6;
				1047	bSig <<= 6;
				1048	if ( 0 < expDiff ) {
				1049	if ( aExp == 0xFF ) {
				1050	if ( aSig ) return propagateFloat32NaN( a, b );
				1051	return a;
				1052	}
				1053	if ( bExp == 0 ) {
				1054	--expDiff;
				1055	}
				1056	else {
				1057	bSig \|= 0x20000000;
				1058	}
				1059	shift32RightJamming( bSig, expDiff, &bSig );
				1060	zExp = aExp;
				1061	}
				1062	else if ( expDiff < 0 ) {
				1063	if ( bExp == 0xFF ) {
				1064	if ( bSig ) return propagateFloat32NaN( a, b );
				1065	return packFloat32( zSign, 0xFF, 0 );
				1066	}
				1067	if ( aExp == 0 ) {
				1068	++expDiff;
				1069	}
				1070	else {
				1071	aSig \|= 0x20000000;
				1072	}
				1073	shift32RightJamming( aSig, - expDiff, &aSig );
				1074	zExp = bExp;
				1075	}
				1076	else {
				1077	if ( aExp == 0xFF ) {
				1078	if ( aSig \| bSig ) return propagateFloat32NaN( a, b );
				1079	return a;
				1080	}
				1081	if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
				1082	zSig = 0x40000000 + aSig + bSig;
				1083	zExp = aExp;
				1084	goto roundAndPack;
				1085	}
				1086	aSig \|= 0x20000000;
				1087	zSig = ( aSig + bSig )<<1;
				1088	--zExp;
				1089	if ( (sbits32) zSig < 0 ) {
				1090	zSig = aSig + bSig;
				1091	++zExp;
				1092	}
				1093	roundAndPack:
				1094	return roundAndPackFloat32( zSign, zExp, zSig );
				1095
				1096	}
				1097
				1098	/*
				1099	-------------------------------------------------------------------------------
				1100	Returns the result of subtracting the absolute values of the single-
				1101	precision floating-point values `a' and `b'. If `zSign' is true, the
				1102	difference is negated before being returned. `zSign' is ignored if the
				1103	result is a NaN. The subtraction is performed according to the IEC/IEEE
				1104	Standard for Binary Floating-point Arithmetic.
				1105	-------------------------------------------------------------------------------
				1106	*/
				1107	static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
				1108	{
				1109	int16 aExp, bExp, zExp;
				1110	bits32 aSig, bSig, zSig;
				1111	int16 expDiff;
				1112
				1113	aSig = extractFloat32Frac( a );
				1114	aExp = extractFloat32Exp( a );
				1115	bSig = extractFloat32Frac( b );
				1116	bExp = extractFloat32Exp( b );
				1117	expDiff = aExp - bExp;
				1118	aSig <<= 7;
				1119	bSig <<= 7;
				1120	if ( 0 < expDiff ) goto aExpBigger;
				1121	if ( expDiff < 0 ) goto bExpBigger;
				1122	if ( aExp == 0xFF ) {
				1123	if ( aSig \| bSig ) return propagateFloat32NaN( a, b );
				1124	float_raise( float_flag_invalid );
				1125	return float32_default_nan;
				1126	}
				1127	if ( aExp == 0 ) {
				1128	aExp = 1;
				1129	bExp = 1;
				1130	}
				1131	if ( bSig < aSig ) goto aBigger;
				1132	if ( aSig < bSig ) goto bBigger;
				1133	return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
				1134	bExpBigger:
				1135	if ( bExp == 0xFF ) {
				1136	if ( bSig ) return propagateFloat32NaN( a, b );
				1137	return packFloat32( zSign ^ 1, 0xFF, 0 );
				1138	}
				1139	if ( aExp == 0 ) {
				1140	++expDiff;
				1141	}
				1142	else {
				1143	aSig \|= 0x40000000;
				1144	}
				1145	shift32RightJamming( aSig, - expDiff, &aSig );
				1146	bSig \|= 0x40000000;
				1147	bBigger:
				1148	zSig = bSig - aSig;
				1149	zExp = bExp;
				1150	zSign ^= 1;
				1151	goto normalizeRoundAndPack;
				1152	aExpBigger:
				1153	if ( aExp == 0xFF ) {
				1154	if ( aSig ) return propagateFloat32NaN( a, b );
				1155	return a;
				1156	}
				1157	if ( bExp == 0 ) {
				1158	--expDiff;
				1159	}
				1160	else {
				1161	bSig \|= 0x40000000;
				1162	}
				1163	shift32RightJamming( bSig, expDiff, &bSig );
				1164	aSig \|= 0x40000000;
				1165	aBigger:
				1166	zSig = aSig - bSig;
				1167	zExp = aExp;
				1168	normalizeRoundAndPack:
				1169	--zExp;
				1170	return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
				1171
				1172	}
				1173
				1174	/*
				1175	-------------------------------------------------------------------------------
				1176	Returns the result of adding the single-precision floating-point values `a'
				1177	and `b'. The operation is performed according to the IEC/IEEE Standard for
				1178	Binary Floating-point Arithmetic.
				1179	-------------------------------------------------------------------------------
				1180	*/
				1181	float32 float32_add( float32 a, float32 b )
				1182	{
				1183	flag aSign, bSign;
				1184
				1185	aSign = extractFloat32Sign( a );
				1186	bSign = extractFloat32Sign( b );
				1187	if ( aSign == bSign ) {
				1188	return addFloat32Sigs( a, b, aSign );
				1189	}
				1190	else {
				1191	return subFloat32Sigs( a, b, aSign );
				1192	}
				1193
				1194	}
				1195
				1196	/*
				1197	-------------------------------------------------------------------------------
				1198	Returns the result of subtracting the single-precision floating-point values
				1199	`a' and `b'. The operation is performed according to the IEC/IEEE Standard
				1200	for Binary Floating-point Arithmetic.
				1201	-------------------------------------------------------------------------------
				1202	*/
				1203	float32 float32_sub( float32 a, float32 b )
				1204	{
				1205	flag aSign, bSign;
				1206
				1207	aSign = extractFloat32Sign( a );
				1208	bSign = extractFloat32Sign( b );
				1209	if ( aSign == bSign ) {
				1210	return subFloat32Sigs( a, b, aSign );
				1211	}
				1212	else {
				1213	return addFloat32Sigs( a, b, aSign );
				1214	}
				1215
				1216	}
				1217
				1218	/*
				1219	-------------------------------------------------------------------------------
				1220	Returns the result of multiplying the single-precision floating-point values
				1221	`a' and `b'. The operation is performed according to the IEC/IEEE Standard
				1222	for Binary Floating-point Arithmetic.
				1223	-------------------------------------------------------------------------------
				1224	*/
				1225	float32 float32_mul( float32 a, float32 b )
				1226	{
				1227	flag aSign, bSign, zSign;
				1228	int16 aExp, bExp, zExp;
				1229	bits32 aSig, bSig;
				1230	bits64 zSig64;
				1231	bits32 zSig;
				1232
				1233	aSig = extractFloat32Frac( a );
				1234	aExp = extractFloat32Exp( a );
				1235	aSign = extractFloat32Sign( a );
				1236	bSig = extractFloat32Frac( b );
				1237	bExp = extractFloat32Exp( b );
				1238	bSign = extractFloat32Sign( b );
				1239	zSign = aSign ^ bSign;
				1240	if ( aExp == 0xFF ) {
				1241	if ( aSig \|\| ( ( bExp == 0xFF ) && bSig ) ) {
				1242	return propagateFloat32NaN( a, b );
				1243	}
				1244	if ( ( bExp \| bSig ) == 0 ) {
				1245	float_raise( float_flag_invalid );
				1246	return float32_default_nan;
				1247	}
				1248	return packFloat32( zSign, 0xFF, 0 );
				1249	}
				1250	if ( bExp == 0xFF ) {
				1251	if ( bSig ) return propagateFloat32NaN( a, b );
				1252	if ( ( aExp \| aSig ) == 0 ) {
				1253	float_raise( float_flag_invalid );
				1254	return float32_default_nan;
				1255	}
				1256	return packFloat32( zSign, 0xFF, 0 );
				1257	}
				1258	if ( aExp == 0 ) {
				1259	if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
				1260	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1261	}
				1262	if ( bExp == 0 ) {
				1263	if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
				1264	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
				1265	}
				1266	zExp = aExp + bExp - 0x7F;
				1267	aSig = ( aSig \| 0x00800000 )<<7;
				1268	bSig = ( bSig \| 0x00800000 )<<8;
				1269	shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
				1270	zSig = zSig64;
				1271	if ( 0 <= (sbits32) ( zSig<<1 ) ) {
				1272	zSig <<= 1;
				1273	--zExp;
				1274	}
				1275	return roundAndPackFloat32( zSign, zExp, zSig );
				1276
				1277	}
				1278
				1279	/*
				1280	-------------------------------------------------------------------------------
				1281	Returns the result of dividing the single-precision floating-point value `a'
				1282	by the corresponding value `b'. The operation is performed according to the
				1283	IEC/IEEE Standard for Binary Floating-point Arithmetic.
				1284	-------------------------------------------------------------------------------
				1285	*/
				1286	float32 float32_div( float32 a, float32 b )
				1287	{
				1288	flag aSign, bSign, zSign;
				1289	int16 aExp, bExp, zExp;
				1290	bits32 aSig, bSig, zSig;
				1291
				1292	aSig = extractFloat32Frac( a );
				1293	aExp = extractFloat32Exp( a );
				1294	aSign = extractFloat32Sign( a );
				1295	bSig = extractFloat32Frac( b );
				1296	bExp = extractFloat32Exp( b );
				1297	bSign = extractFloat32Sign( b );
				1298	zSign = aSign ^ bSign;
				1299	if ( aExp == 0xFF ) {
				1300	if ( aSig ) return propagateFloat32NaN( a, b );
				1301	if ( bExp == 0xFF ) {
				1302	if ( bSig ) return propagateFloat32NaN( a, b );
				1303	float_raise( float_flag_invalid );
				1304	return float32_default_nan;
				1305	}
				1306	return packFloat32( zSign, 0xFF, 0 );
				1307	}
				1308	if ( bExp == 0xFF ) {
				1309	if ( bSig ) return propagateFloat32NaN( a, b );
				1310	return packFloat32( zSign, 0, 0 );
				1311	}
				1312	if ( bExp == 0 ) {
				1313	if ( bSig == 0 ) {
				1314	if ( ( aExp \| aSig ) == 0 ) {
				1315	float_raise( float_flag_invalid );
				1316	return float32_default_nan;
				1317	}
				1318	float_raise( float_flag_divbyzero );
				1319	return packFloat32( zSign, 0xFF, 0 );
				1320	}
				1321	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
				1322	}
				1323	if ( aExp == 0 ) {
				1324	if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
				1325	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1326	}
				1327	zExp = aExp - bExp + 0x7D;
				1328	aSig = ( aSig \| 0x00800000 )<<7;
				1329	bSig = ( bSig \| 0x00800000 )<<8;
				1330	if ( bSig <= ( aSig + aSig ) ) {
				1331	aSig >>= 1;
				1332	++zExp;
				1333	}
				1334	zSig = ( ( (bits64) aSig )<<32 ) / bSig;
				1335	if ( ( zSig & 0x3F ) == 0 ) {
				1336	zSig \|= ( ( (bits64) bSig ) * zSig != ( (bits64) aSig )<<32 );
				1337	}
				1338	return roundAndPackFloat32( zSign, zExp, zSig );
				1339
				1340	}
				1341
				1342	/*
				1343	-------------------------------------------------------------------------------
				1344	Returns the remainder of the single-precision floating-point value `a'
				1345	with respect to the corresponding value `b'. The operation is performed
				1346	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				1347	-------------------------------------------------------------------------------
				1348	*/
				1349	float32 float32_rem( float32 a, float32 b )
				1350	{
				1351	flag aSign, bSign, zSign;
				1352	int16 aExp, bExp, expDiff;
				1353	bits32 aSig, bSig;
				1354	bits32 q;
				1355	bits64 aSig64, bSig64, q64;
				1356	bits32 alternateASig;
				1357	sbits32 sigMean;
				1358
				1359	aSig = extractFloat32Frac( a );
				1360	aExp = extractFloat32Exp( a );
				1361	aSign = extractFloat32Sign( a );
				1362	bSig = extractFloat32Frac( b );
				1363	bExp = extractFloat32Exp( b );
				1364	bSign = extractFloat32Sign( b );
				1365	if ( aExp == 0xFF ) {
				1366	if ( aSig \|\| ( ( bExp == 0xFF ) && bSig ) ) {
				1367	return propagateFloat32NaN( a, b );
				1368	}
				1369	float_raise( float_flag_invalid );
				1370	return float32_default_nan;
				1371	}
				1372	if ( bExp == 0xFF ) {
				1373	if ( bSig ) return propagateFloat32NaN( a, b );
				1374	return a;
				1375	}
				1376	if ( bExp == 0 ) {
				1377	if ( bSig == 0 ) {
				1378	float_raise( float_flag_invalid );
				1379	return float32_default_nan;
				1380	}
				1381	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
				1382	}
				1383	if ( aExp == 0 ) {
				1384	if ( aSig == 0 ) return a;
				1385	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1386	}
				1387	expDiff = aExp - bExp;
				1388	aSig \|= 0x00800000;
				1389	bSig \|= 0x00800000;
				1390	if ( expDiff < 32 ) {
				1391	aSig <<= 8;
				1392	bSig <<= 8;
				1393	if ( expDiff < 0 ) {
				1394	if ( expDiff < -1 ) return a;
				1395	aSig >>= 1;
				1396	}
				1397	q = ( bSig <= aSig );
				1398	if ( q ) aSig -= bSig;
				1399	if ( 0 < expDiff ) {
				1400	q = ( ( (bits64) aSig )<<32 ) / bSig;
				1401	q >>= 32 - expDiff;
				1402	bSig >>= 2;
				1403	aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
				1404	}
				1405	else {
				1406	aSig >>= 2;
				1407	bSig >>= 2;
				1408	}
				1409	}
				1410	else {
				1411	if ( bSig <= aSig ) aSig -= bSig;
				1412	aSig64 = ( (bits64) aSig )<<40;
				1413	bSig64 = ( (bits64) bSig )<<40;
				1414	expDiff -= 64;
				1415	while ( 0 < expDiff ) {
				1416	q64 = estimateDiv128To64( aSig64, 0, bSig64 );
				1417	q64 = ( 2 < q64 ) ? q64 - 2 : 0;
				1418	aSig64 = - ( ( bSig * q64 )<<38 );
				1419	expDiff -= 62;
				1420	}
				1421	expDiff += 64;
				1422	q64 = estimateDiv128To64( aSig64, 0, bSig64 );
				1423	q64 = ( 2 < q64 ) ? q64 - 2 : 0;
				1424	q = q64>>( 64 - expDiff );
				1425	bSig <<= 6;
				1426	aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
				1427	}
				1428	do {
				1429	alternateASig = aSig;
				1430	++q;
				1431	aSig -= bSig;
				1432	} while ( 0 <= (sbits32) aSig );
				1433	sigMean = aSig + alternateASig;
				1434	if ( ( sigMean < 0 ) \|\| ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
				1435	aSig = alternateASig;
				1436	}
				1437	zSign = ( (sbits32) aSig < 0 );
				1438	if ( zSign ) aSig = - aSig;
				1439	return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
				1440
				1441	}
				1442
				1443	/*
				1444	-------------------------------------------------------------------------------
				1445	Returns the square root of the single-precision floating-point value `a'.
				1446	The operation is performed according to the IEC/IEEE Standard for Binary
				1447	Floating-point Arithmetic.
				1448	-------------------------------------------------------------------------------
				1449	*/
				1450	float32 float32_sqrt( float32 a )
				1451	{
				1452	flag aSign;
				1453	int16 aExp, zExp;
				1454	bits32 aSig, zSig;
				1455	bits64 rem, term;
				1456
				1457	aSig = extractFloat32Frac( a );
				1458	aExp = extractFloat32Exp( a );
				1459	aSign = extractFloat32Sign( a );
				1460	if ( aExp == 0xFF ) {
				1461	if ( aSig ) return propagateFloat32NaN( a, 0 );
				1462	if ( ! aSign ) return a;
				1463	float_raise( float_flag_invalid );
				1464	return float32_default_nan;
				1465	}
				1466	if ( aSign ) {
				1467	if ( ( aExp \| aSig ) == 0 ) return a;
				1468	float_raise( float_flag_invalid );
				1469	return float32_default_nan;
				1470	}
				1471	if ( aExp == 0 ) {
				1472	if ( aSig == 0 ) return 0;
				1473	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1474	}
				1475	zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
				1476	aSig = ( aSig \| 0x00800000 )<<8;
				1477	zSig = estimateSqrt32( aExp, aSig ) + 2;
				1478	if ( ( zSig & 0x7F ) <= 5 ) {
				1479	if ( zSig < 2 ) {
				1480	zSig = 0xFFFFFFFF;
				1481	}
				1482	else {
				1483	aSig >>= aExp & 1;
				1484	term = ( (bits64) zSig ) * zSig;
				1485	rem = ( ( (bits64) aSig )<<32 ) - term;
				1486	while ( (sbits64) rem < 0 ) {
				1487	--zSig;
				1488	rem += ( ( (bits64) zSig )<<1 ) \| 1;
				1489	}
				1490	zSig \|= ( rem != 0 );
				1491	}
				1492	}
				1493	shift32RightJamming( zSig, 1, &zSig );
				1494	return roundAndPackFloat32( 0, zExp, zSig );
				1495
				1496	}
				1497
				1498	/*
				1499	-------------------------------------------------------------------------------
				1500	Returns 1 if the single-precision floating-point value `a' is equal to the
				1501	corresponding value `b', and 0 otherwise. The comparison is performed
				1502	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				1503	-------------------------------------------------------------------------------
				1504	*/
				1505	flag float32_eq( float32 a, float32 b )
				1506	{
				1507
				1508	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				1509	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				1510	) {
				1511	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
				1512	float_raise( float_flag_invalid );
				1513	}
				1514	return 0;
				1515	}
				1516	return ( a == b ) \|\| ( (bits32) ( ( a \| b )<<1 ) == 0 );
				1517
				1518	}
				1519
				1520	/*
				1521	-------------------------------------------------------------------------------
				1522	Returns 1 if the single-precision floating-point value `a' is less than or
				1523	equal to the corresponding value `b', and 0 otherwise. The comparison is
				1524	performed according to the IEC/IEEE Standard for Binary Floating-point
				1525	Arithmetic.
				1526	-------------------------------------------------------------------------------
				1527	*/
				1528	flag float32_le( float32 a, float32 b )
				1529	{
				1530	flag aSign, bSign;
				1531
				1532	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				1533	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				1534	) {
				1535	float_raise( float_flag_invalid );
				1536	return 0;
				1537	}
				1538	aSign = extractFloat32Sign( a );
				1539	bSign = extractFloat32Sign( b );
				1540	if ( aSign != bSign ) return aSign \|\| ( (bits32) ( ( a \| b )<<1 ) == 0 );
				1541	return ( a == b ) \|\| ( aSign ^ ( a < b ) );
				1542
				1543	}
				1544
				1545	/*
				1546	-------------------------------------------------------------------------------
				1547	Returns 1 if the single-precision floating-point value `a' is less than
				1548	the corresponding value `b', and 0 otherwise. The comparison is performed
				1549	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				1550	-------------------------------------------------------------------------------
				1551	*/
				1552	flag float32_lt( float32 a, float32 b )
				1553	{
				1554	flag aSign, bSign;
				1555
				1556	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				1557	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				1558	) {
				1559	float_raise( float_flag_invalid );
				1560	return 0;
				1561	}
				1562	aSign = extractFloat32Sign( a );
				1563	bSign = extractFloat32Sign( b );
				1564	if ( aSign != bSign ) return aSign && ( (bits32) ( ( a \| b )<<1 ) != 0 );
				1565	return ( a != b ) && ( aSign ^ ( a < b ) );
				1566
				1567	}
				1568
				1569	/*
				1570	-------------------------------------------------------------------------------
				1571	Returns 1 if the single-precision floating-point value `a' is equal to the
				1572	corresponding value `b', and 0 otherwise. The invalid exception is raised
				1573	if either operand is a NaN. Otherwise, the comparison is performed
				1574	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				1575	-------------------------------------------------------------------------------
				1576	*/
				1577	flag float32_eq_signaling( float32 a, float32 b )
				1578	{
				1579
				1580	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				1581	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				1582	) {
				1583	float_raise( float_flag_invalid );
				1584	return 0;
				1585	}
				1586	return ( a == b ) \|\| ( (bits32) ( ( a \| b )<<1 ) == 0 );
				1587
				1588	}
				1589
				1590	/*
				1591	-------------------------------------------------------------------------------
				1592	Returns 1 if the single-precision floating-point value `a' is less than or
				1593	equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
				1594	cause an exception. Otherwise, the comparison is performed according to the
				1595	IEC/IEEE Standard for Binary Floating-point Arithmetic.
				1596	-------------------------------------------------------------------------------
				1597	*/
				1598	flag float32_le_quiet( float32 a, float32 b )
				1599	{
				1600	flag aSign, bSign;
				1601	//int16 aExp, bExp;
				1602
				1603	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				1604	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				1605	) {
				1606	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
				1607	float_raise( float_flag_invalid );
				1608	}
				1609	return 0;
				1610	}
				1611	aSign = extractFloat32Sign( a );
				1612	bSign = extractFloat32Sign( b );
				1613	if ( aSign != bSign ) return aSign \|\| ( (bits32) ( ( a \| b )<<1 ) == 0 );
				1614	return ( a == b ) \|\| ( aSign ^ ( a < b ) );
				1615
				1616	}
				1617
				1618	/*
				1619	-------------------------------------------------------------------------------
				1620	Returns 1 if the single-precision floating-point value `a' is less than
				1621	the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				1622	exception. Otherwise, the comparison is performed according to the IEC/IEEE
				1623	Standard for Binary Floating-point Arithmetic.
				1624	-------------------------------------------------------------------------------
				1625	*/
				1626	flag float32_lt_quiet( float32 a, float32 b )
				1627	{
				1628	flag aSign, bSign;
				1629
				1630	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				1631	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				1632	) {
				1633	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
				1634	float_raise( float_flag_invalid );
				1635	}
				1636	return 0;
				1637	}
				1638	aSign = extractFloat32Sign( a );
				1639	bSign = extractFloat32Sign( b );
				1640	if ( aSign != bSign ) return aSign && ( (bits32) ( ( a \| b )<<1 ) != 0 );
				1641	return ( a != b ) && ( aSign ^ ( a < b ) );
				1642
				1643	}
				1644
				1645	/*
				1646	-------------------------------------------------------------------------------
				1647	Returns the result of converting the double-precision floating-point value
				1648	`a' to the 32-bit two's complement integer format. The conversion is
				1649	performed according to the IEC/IEEE Standard for Binary Floating-point
				1650	Arithmetic---which means in particular that the conversion is rounded
				1651	according to the current rounding mode. If `a' is a NaN, the largest
				1652	positive integer is returned. Otherwise, if the conversion overflows, the
				1653	largest integer with the same sign as `a' is returned.
				1654	-------------------------------------------------------------------------------
				1655	*/
				1656	int32 float64_to_int32( float64 a )
				1657	{
				1658	flag aSign;
				1659	int16 aExp, shiftCount;
				1660	bits64 aSig;
				1661
				1662	aSig = extractFloat64Frac( a );
				1663	aExp = extractFloat64Exp( a );
				1664	aSign = extractFloat64Sign( a );
				1665	if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
				1666	if ( aExp ) aSig \|= LIT64( 0x0010000000000000 );
				1667	shiftCount = 0x42C - aExp;
				1668	if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
				1669	return roundAndPackInt32( aSign, aSig );
				1670
				1671	}
				1672
				1673	/*
				1674	-------------------------------------------------------------------------------
				1675	Returns the result of converting the double-precision floating-point value
				1676	`a' to the 32-bit two's complement integer format. The conversion is
				1677	performed according to the IEC/IEEE Standard for Binary Floating-point
				1678	Arithmetic, except that the conversion is always rounded toward zero. If
				1679	`a' is a NaN, the largest positive integer is returned. Otherwise, if the
				1680	conversion overflows, the largest integer with the same sign as `a' is
				1681	returned.
				1682	-------------------------------------------------------------------------------
				1683	*/
				1684	int32 float64_to_int32_round_to_zero( float64 a )
				1685	{
				1686	flag aSign;
				1687	int16 aExp, shiftCount;
				1688	bits64 aSig, savedASig;
				1689	int32 z;
				1690
				1691	aSig = extractFloat64Frac( a );
				1692	aExp = extractFloat64Exp( a );
				1693	aSign = extractFloat64Sign( a );
				1694	shiftCount = 0x433 - aExp;
				1695	if ( shiftCount < 21 ) {
				1696	if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
				1697	goto invalid;
				1698	}
				1699	else if ( 52 < shiftCount ) {
				1700	if ( aExp \|\| aSig ) float_exception_flags \|= float_flag_inexact;
				1701	return 0;
				1702	}
				1703	aSig \|= LIT64( 0x0010000000000000 );
				1704	savedASig = aSig;
				1705	aSig >>= shiftCount;
				1706	z = aSig;
				1707	if ( aSign ) z = - z;
				1708	if ( ( z < 0 ) ^ aSign ) {
				1709	invalid:
				1710	float_exception_flags \|= float_flag_invalid;
				1711	return aSign ? 0x80000000 : 0x7FFFFFFF;
				1712	}
				1713	if ( ( aSig<<shiftCount ) != savedASig ) {
				1714	float_exception_flags \|= float_flag_inexact;
				1715	}
				1716	return z;
				1717
				1718	}
				1719
				1720	/*
				1721	-------------------------------------------------------------------------------
				1722	Returns the result of converting the double-precision floating-point value
				1723	`a' to the 32-bit two's complement unsigned integer format. The conversion
				1724	is performed according to the IEC/IEEE Standard for Binary Floating-point
				1725	Arithmetic---which means in particular that the conversion is rounded
				1726	according to the current rounding mode. If `a' is a NaN, the largest
				1727	positive integer is returned. Otherwise, if the conversion overflows, the
				1728	largest positive integer is returned.
				1729	-------------------------------------------------------------------------------
				1730	*/
				1731	int32 float64_to_uint32( float64 a )
				1732	{
				1733	flag aSign;
				1734	int16 aExp, shiftCount;
				1735	bits64 aSig;
				1736
				1737	aSig = extractFloat64Frac( a );
				1738	aExp = extractFloat64Exp( a );
				1739	aSign = 0; //extractFloat64Sign( a );
				1740	//if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
				1741	if ( aExp ) aSig \|= LIT64( 0x0010000000000000 );
				1742	shiftCount = 0x42C - aExp;
				1743	if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
				1744	return roundAndPackInt32( aSign, aSig );
				1745	}
				1746
				1747	/*
				1748	-------------------------------------------------------------------------------
				1749	Returns the result of converting the double-precision floating-point value
				1750	`a' to the 32-bit two's complement integer format. The conversion is
				1751	performed according to the IEC/IEEE Standard for Binary Floating-point
				1752	Arithmetic, except that the conversion is always rounded toward zero. If
				1753	`a' is a NaN, the largest positive integer is returned. Otherwise, if the
				1754	conversion overflows, the largest positive integer is returned.
				1755	-------------------------------------------------------------------------------
				1756	*/
				1757	int32 float64_to_uint32_round_to_zero( float64 a )
				1758	{
				1759	flag aSign;
				1760	int16 aExp, shiftCount;
				1761	bits64 aSig, savedASig;
				1762	int32 z;
				1763
				1764	aSig = extractFloat64Frac( a );
				1765	aExp = extractFloat64Exp( a );
				1766	aSign = extractFloat64Sign( a );
				1767	shiftCount = 0x433 - aExp;
				1768	if ( shiftCount < 21 ) {
				1769	if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
				1770	goto invalid;
				1771	}
				1772	else if ( 52 < shiftCount ) {
				1773	if ( aExp \|\| aSig ) float_exception_flags \|= float_flag_inexact;
				1774	return 0;
				1775	}
				1776	aSig \|= LIT64( 0x0010000000000000 );
				1777	savedASig = aSig;
				1778	aSig >>= shiftCount;
				1779	z = aSig;
				1780	if ( aSign ) z = - z;
				1781	if ( ( z < 0 ) ^ aSign ) {
				1782	invalid:
				1783	float_exception_flags \|= float_flag_invalid;
				1784	return aSign ? 0x80000000 : 0x7FFFFFFF;
				1785	}
				1786	if ( ( aSig<<shiftCount ) != savedASig ) {
				1787	float_exception_flags \|= float_flag_inexact;
				1788	}
				1789	return z;
				1790	}
				1791
				1792	/*
				1793	-------------------------------------------------------------------------------
				1794	Returns the result of converting the double-precision floating-point value
				1795	`a' to the single-precision floating-point format. The conversion is
				1796	performed according to the IEC/IEEE Standard for Binary Floating-point
				1797	Arithmetic.
				1798	-------------------------------------------------------------------------------
				1799	*/
				1800	float32 float64_to_float32( float64 a )
				1801	{
				1802	flag aSign;
				1803	int16 aExp;
				1804	bits64 aSig;
				1805	bits32 zSig;
				1806
				1807	aSig = extractFloat64Frac( a );
				1808	aExp = extractFloat64Exp( a );
				1809	aSign = extractFloat64Sign( a );
				1810	if ( aExp == 0x7FF ) {
				1811	if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
				1812	return packFloat32( aSign, 0xFF, 0 );
				1813	}
				1814	shift64RightJamming( aSig, 22, &aSig );
				1815	zSig = aSig;
				1816	if ( aExp \|\| zSig ) {
				1817	zSig \|= 0x40000000;
				1818	aExp -= 0x381;
				1819	}
				1820	return roundAndPackFloat32( aSign, aExp, zSig );
				1821
				1822	}
				1823
				1824	#ifdef FLOATX80
				1825
				1826	/*
				1827	-------------------------------------------------------------------------------
				1828	Returns the result of converting the double-precision floating-point value
				1829	`a' to the extended double-precision floating-point format. The conversion
				1830	is performed according to the IEC/IEEE Standard for Binary Floating-point
				1831	Arithmetic.
				1832	-------------------------------------------------------------------------------
				1833	*/
				1834	floatx80 float64_to_floatx80( float64 a )
				1835	{
				1836	flag aSign;
				1837	int16 aExp;
				1838	bits64 aSig;
				1839
				1840	aSig = extractFloat64Frac( a );
				1841	aExp = extractFloat64Exp( a );
				1842	aSign = extractFloat64Sign( a );
				1843	if ( aExp == 0x7FF ) {
				1844	if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
				1845	return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				1846	}
				1847	if ( aExp == 0 ) {
				1848	if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
				1849	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				1850	}
				1851	return
				1852	packFloatx80(
				1853	aSign, aExp + 0x3C00, ( aSig \| LIT64( 0x0010000000000000 ) )<<11 );
				1854
				1855	}
				1856
				1857	#endif
				1858
				1859	/*
				1860	-------------------------------------------------------------------------------
				1861	Rounds the double-precision floating-point value `a' to an integer, and
				1862	returns the result as a double-precision floating-point value. The
				1863	operation is performed according to the IEC/IEEE Standard for Binary
				1864	Floating-point Arithmetic.
				1865	-------------------------------------------------------------------------------
				1866	*/
				1867	float64 float64_round_to_int( float64 a )
				1868	{
				1869	flag aSign;
				1870	int16 aExp;
				1871	bits64 lastBitMask, roundBitsMask;
				1872	int8 roundingMode;
				1873	float64 z;
				1874
				1875	aExp = extractFloat64Exp( a );
				1876	if ( 0x433 <= aExp ) {
				1877	if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
				1878	return propagateFloat64NaN( a, a );
				1879	}
				1880	return a;
				1881	}
				1882	if ( aExp <= 0x3FE ) {
				1883	if ( (bits64) ( a<<1 ) == 0 ) return a;
				1884	float_exception_flags \|= float_flag_inexact;
				1885	aSign = extractFloat64Sign( a );
				1886	switch ( float_rounding_mode ) {
				1887	case float_round_nearest_even:
				1888	if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
				1889	return packFloat64( aSign, 0x3FF, 0 );
				1890	}
				1891	break;
				1892	case float_round_down:
				1893	return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
				1894	case float_round_up:
				1895	return
				1896	aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
				1897	}
				1898	return packFloat64( aSign, 0, 0 );
				1899	}
				1900	lastBitMask = 1;
				1901	lastBitMask <<= 0x433 - aExp;
				1902	roundBitsMask = lastBitMask - 1;
				1903	z = a;
				1904	roundingMode = float_rounding_mode;
				1905	if ( roundingMode == float_round_nearest_even ) {
				1906	z += lastBitMask>>1;
				1907	if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
				1908	}
				1909	else if ( roundingMode != float_round_to_zero ) {
				1910	if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
				1911	z += roundBitsMask;
				1912	}
				1913	}
				1914	z &= ~ roundBitsMask;
				1915	if ( z != a ) float_exception_flags \|= float_flag_inexact;
				1916	return z;
				1917
				1918	}
				1919
				1920	/*
				1921	-------------------------------------------------------------------------------
				1922	Returns the result of adding the absolute values of the double-precision
				1923	floating-point values `a' and `b'. If `zSign' is true, the sum is negated
				1924	before being returned. `zSign' is ignored if the result is a NaN. The
				1925	addition is performed according to the IEC/IEEE Standard for Binary
				1926	Floating-point Arithmetic.
				1927	-------------------------------------------------------------------------------
				1928	*/
				1929	static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
				1930	{
				1931	int16 aExp, bExp, zExp;
				1932	bits64 aSig, bSig, zSig;
				1933	int16 expDiff;
				1934
				1935	aSig = extractFloat64Frac( a );
				1936	aExp = extractFloat64Exp( a );
				1937	bSig = extractFloat64Frac( b );
				1938	bExp = extractFloat64Exp( b );
				1939	expDiff = aExp - bExp;
				1940	aSig <<= 9;
				1941	bSig <<= 9;
				1942	if ( 0 < expDiff ) {
				1943	if ( aExp == 0x7FF ) {
				1944	if ( aSig ) return propagateFloat64NaN( a, b );
				1945	return a;
				1946	}
				1947	if ( bExp == 0 ) {
				1948	--expDiff;
				1949	}
				1950	else {
				1951	bSig \|= LIT64( 0x2000000000000000 );
				1952	}
				1953	shift64RightJamming( bSig, expDiff, &bSig );
				1954	zExp = aExp;
				1955	}
				1956	else if ( expDiff < 0 ) {
				1957	if ( bExp == 0x7FF ) {
				1958	if ( bSig ) return propagateFloat64NaN( a, b );
				1959	return packFloat64( zSign, 0x7FF, 0 );
				1960	}
				1961	if ( aExp == 0 ) {
				1962	++expDiff;
				1963	}
				1964	else {
				1965	aSig \|= LIT64( 0x2000000000000000 );
				1966	}
				1967	shift64RightJamming( aSig, - expDiff, &aSig );
				1968	zExp = bExp;
				1969	}
				1970	else {
				1971	if ( aExp == 0x7FF ) {
				1972	if ( aSig \| bSig ) return propagateFloat64NaN( a, b );
				1973	return a;
				1974	}
				1975	if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
				1976	zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
				1977	zExp = aExp;
				1978	goto roundAndPack;
				1979	}
				1980	aSig \|= LIT64( 0x2000000000000000 );
				1981	zSig = ( aSig + bSig )<<1;
				1982	--zExp;
				1983	if ( (sbits64) zSig < 0 ) {
				1984	zSig = aSig + bSig;
				1985	++zExp;
				1986	}
				1987	roundAndPack:
				1988	return roundAndPackFloat64( zSign, zExp, zSig );
				1989
				1990	}
				1991
				1992	/*
				1993	-------------------------------------------------------------------------------
				1994	Returns the result of subtracting the absolute values of the double-
				1995	precision floating-point values `a' and `b'. If `zSign' is true, the
				1996	difference is negated before being returned. `zSign' is ignored if the
				1997	result is a NaN. The subtraction is performed according to the IEC/IEEE
				1998	Standard for Binary Floating-point Arithmetic.
				1999	-------------------------------------------------------------------------------
				2000	*/
				2001	static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
				2002	{
				2003	int16 aExp, bExp, zExp;
				2004	bits64 aSig, bSig, zSig;
				2005	int16 expDiff;
				2006
				2007	aSig = extractFloat64Frac( a );
				2008	aExp = extractFloat64Exp( a );
				2009	bSig = extractFloat64Frac( b );
				2010	bExp = extractFloat64Exp( b );
				2011	expDiff = aExp - bExp;
				2012	aSig <<= 10;
				2013	bSig <<= 10;
				2014	if ( 0 < expDiff ) goto aExpBigger;
				2015	if ( expDiff < 0 ) goto bExpBigger;
				2016	if ( aExp == 0x7FF ) {
				2017	if ( aSig \| bSig ) return propagateFloat64NaN( a, b );
				2018	float_raise( float_flag_invalid );
				2019	return float64_default_nan;
				2020	}
				2021	if ( aExp == 0 ) {
				2022	aExp = 1;
				2023	bExp = 1;
				2024	}
				2025	if ( bSig < aSig ) goto aBigger;
				2026	if ( aSig < bSig ) goto bBigger;
				2027	return packFloat64( float_rounding_mode == float_round_down, 0, 0 );
				2028	bExpBigger:
				2029	if ( bExp == 0x7FF ) {
				2030	if ( bSig ) return propagateFloat64NaN( a, b );
				2031	return packFloat64( zSign ^ 1, 0x7FF, 0 );
				2032	}
				2033	if ( aExp == 0 ) {
				2034	++expDiff;
				2035	}
				2036	else {
				2037	aSig \|= LIT64( 0x4000000000000000 );
				2038	}
				2039	shift64RightJamming( aSig, - expDiff, &aSig );
				2040	bSig \|= LIT64( 0x4000000000000000 );
				2041	bBigger:
				2042	zSig = bSig - aSig;
				2043	zExp = bExp;
				2044	zSign ^= 1;
				2045	goto normalizeRoundAndPack;
				2046	aExpBigger:
				2047	if ( aExp == 0x7FF ) {
				2048	if ( aSig ) return propagateFloat64NaN( a, b );
				2049	return a;
				2050	}
				2051	if ( bExp == 0 ) {
				2052	--expDiff;
				2053	}
				2054	else {
				2055	bSig \|= LIT64( 0x4000000000000000 );
				2056	}
				2057	shift64RightJamming( bSig, expDiff, &bSig );
				2058	aSig \|= LIT64( 0x4000000000000000 );
				2059	aBigger:
				2060	zSig = aSig - bSig;
				2061	zExp = aExp;
				2062	normalizeRoundAndPack:
				2063	--zExp;
				2064	return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
				2065
				2066	}
				2067
				2068	/*
				2069	-------------------------------------------------------------------------------
				2070	Returns the result of adding the double-precision floating-point values `a'
				2071	and `b'. The operation is performed according to the IEC/IEEE Standard for
				2072	Binary Floating-point Arithmetic.
				2073	-------------------------------------------------------------------------------
				2074	*/
				2075	float64 float64_add( float64 a, float64 b )
				2076	{
				2077	flag aSign, bSign;
				2078
				2079	aSign = extractFloat64Sign( a );
				2080	bSign = extractFloat64Sign( b );
				2081	if ( aSign == bSign ) {
				2082	return addFloat64Sigs( a, b, aSign );
				2083	}
				2084	else {
				2085	return subFloat64Sigs( a, b, aSign );
				2086	}
				2087
				2088	}
				2089
				2090	/*
				2091	-------------------------------------------------------------------------------
				2092	Returns the result of subtracting the double-precision floating-point values
				2093	`a' and `b'. The operation is performed according to the IEC/IEEE Standard
				2094	for Binary Floating-point Arithmetic.
				2095	-------------------------------------------------------------------------------
				2096	*/
				2097	float64 float64_sub( float64 a, float64 b )
				2098	{
				2099	flag aSign, bSign;
				2100
				2101	aSign = extractFloat64Sign( a );
				2102	bSign = extractFloat64Sign( b );
				2103	if ( aSign == bSign ) {
				2104	return subFloat64Sigs( a, b, aSign );
				2105	}
				2106	else {
				2107	return addFloat64Sigs( a, b, aSign );
				2108	}
				2109
				2110	}
				2111
				2112	/*
				2113	-------------------------------------------------------------------------------
				2114	Returns the result of multiplying the double-precision floating-point values
				2115	`a' and `b'. The operation is performed according to the IEC/IEEE Standard
				2116	for Binary Floating-point Arithmetic.
				2117	-------------------------------------------------------------------------------
				2118	*/
				2119	float64 float64_mul( float64 a, float64 b )
				2120	{
				2121	flag aSign, bSign, zSign;
				2122	int16 aExp, bExp, zExp;
				2123	bits64 aSig, bSig, zSig0, zSig1;
				2124
				2125	aSig = extractFloat64Frac( a );
				2126	aExp = extractFloat64Exp( a );
				2127	aSign = extractFloat64Sign( a );
				2128	bSig = extractFloat64Frac( b );
				2129	bExp = extractFloat64Exp( b );
				2130	bSign = extractFloat64Sign( b );
				2131	zSign = aSign ^ bSign;
				2132	if ( aExp == 0x7FF ) {
				2133	if ( aSig \|\| ( ( bExp == 0x7FF ) && bSig ) ) {
				2134	return propagateFloat64NaN( a, b );
				2135	}
				2136	if ( ( bExp \| bSig ) == 0 ) {
				2137	float_raise( float_flag_invalid );
				2138	return float64_default_nan;
				2139	}
				2140	return packFloat64( zSign, 0x7FF, 0 );
				2141	}
				2142	if ( bExp == 0x7FF ) {
				2143	if ( bSig ) return propagateFloat64NaN( a, b );
				2144	if ( ( aExp \| aSig ) == 0 ) {
				2145	float_raise( float_flag_invalid );
				2146	return float64_default_nan;
				2147	}
				2148	return packFloat64( zSign, 0x7FF, 0 );
				2149	}
				2150	if ( aExp == 0 ) {
				2151	if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
				2152	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				2153	}
				2154	if ( bExp == 0 ) {
				2155	if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
				2156	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
				2157	}
				2158	zExp = aExp + bExp - 0x3FF;
				2159	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<10;
				2160	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
				2161	mul64To128( aSig, bSig, &zSig0, &zSig1 );
				2162	zSig0 \|= ( zSig1 != 0 );
				2163	if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
				2164	zSig0 <<= 1;
				2165	--zExp;
				2166	}
				2167	return roundAndPackFloat64( zSign, zExp, zSig0 );
				2168
				2169	}
				2170
				2171	/*
				2172	-------------------------------------------------------------------------------
				2173	Returns the result of dividing the double-precision floating-point value `a'
				2174	by the corresponding value `b'. The operation is performed according to
				2175	the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				2176	-------------------------------------------------------------------------------
				2177	*/
				2178	float64 float64_div( float64 a, float64 b )
				2179	{
				2180	flag aSign, bSign, zSign;
				2181	int16 aExp, bExp, zExp;
				2182	bits64 aSig, bSig, zSig;
				2183	bits64 rem0, rem1;
				2184	bits64 term0, term1;
				2185
				2186	aSig = extractFloat64Frac( a );
				2187	aExp = extractFloat64Exp( a );
				2188	aSign = extractFloat64Sign( a );
				2189	bSig = extractFloat64Frac( b );
				2190	bExp = extractFloat64Exp( b );
				2191	bSign = extractFloat64Sign( b );
				2192	zSign = aSign ^ bSign;
				2193	if ( aExp == 0x7FF ) {
				2194	if ( aSig ) return propagateFloat64NaN( a, b );
				2195	if ( bExp == 0x7FF ) {
				2196	if ( bSig ) return propagateFloat64NaN( a, b );
				2197	float_raise( float_flag_invalid );
				2198	return float64_default_nan;
				2199	}
				2200	return packFloat64( zSign, 0x7FF, 0 );
				2201	}
				2202	if ( bExp == 0x7FF ) {
				2203	if ( bSig ) return propagateFloat64NaN( a, b );
				2204	return packFloat64( zSign, 0, 0 );
				2205	}
				2206	if ( bExp == 0 ) {
				2207	if ( bSig == 0 ) {
				2208	if ( ( aExp \| aSig ) == 0 ) {
				2209	float_raise( float_flag_invalid );
				2210	return float64_default_nan;
				2211	}
				2212	float_raise( float_flag_divbyzero );
				2213	return packFloat64( zSign, 0x7FF, 0 );
				2214	}
				2215	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
				2216	}
				2217	if ( aExp == 0 ) {
				2218	if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
				2219	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				2220	}
				2221	zExp = aExp - bExp + 0x3FD;
				2222	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<10;
				2223	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
				2224	if ( bSig <= ( aSig + aSig ) ) {
				2225	aSig >>= 1;
				2226	++zExp;
				2227	}
				2228	zSig = estimateDiv128To64( aSig, 0, bSig );
				2229	if ( ( zSig & 0x1FF ) <= 2 ) {
				2230	mul64To128( bSig, zSig, &term0, &term1 );
				2231	sub128( aSig, 0, term0, term1, &rem0, &rem1 );
				2232	while ( (sbits64) rem0 < 0 ) {
				2233	--zSig;
				2234	add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
				2235	}
				2236	zSig \|= ( rem1 != 0 );
				2237	}
				2238	return roundAndPackFloat64( zSign, zExp, zSig );
				2239
				2240	}
				2241
				2242	/*
				2243	-------------------------------------------------------------------------------
				2244	Returns the remainder of the double-precision floating-point value `a'
				2245	with respect to the corresponding value `b'. The operation is performed
				2246	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				2247	-------------------------------------------------------------------------------
				2248	*/
				2249	float64 float64_rem( float64 a, float64 b )
				2250	{
				2251	flag aSign, bSign, zSign;
				2252	int16 aExp, bExp, expDiff;
				2253	bits64 aSig, bSig;
				2254	bits64 q, alternateASig;
				2255	sbits64 sigMean;
				2256
				2257	aSig = extractFloat64Frac( a );
				2258	aExp = extractFloat64Exp( a );
				2259	aSign = extractFloat64Sign( a );
				2260	bSig = extractFloat64Frac( b );
				2261	bExp = extractFloat64Exp( b );
				2262	bSign = extractFloat64Sign( b );
				2263	if ( aExp == 0x7FF ) {
				2264	if ( aSig \|\| ( ( bExp == 0x7FF ) && bSig ) ) {
				2265	return propagateFloat64NaN( a, b );
				2266	}
				2267	float_raise( float_flag_invalid );
				2268	return float64_default_nan;
				2269	}
				2270	if ( bExp == 0x7FF ) {
				2271	if ( bSig ) return propagateFloat64NaN( a, b );
				2272	return a;
				2273	}
				2274	if ( bExp == 0 ) {
				2275	if ( bSig == 0 ) {
				2276	float_raise( float_flag_invalid );
				2277	return float64_default_nan;
				2278	}
				2279	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
				2280	}
				2281	if ( aExp == 0 ) {
				2282	if ( aSig == 0 ) return a;
				2283	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				2284	}
				2285	expDiff = aExp - bExp;
				2286	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<11;
				2287	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
				2288	if ( expDiff < 0 ) {
				2289	if ( expDiff < -1 ) return a;
				2290	aSig >>= 1;
				2291	}
				2292	q = ( bSig <= aSig );
				2293	if ( q ) aSig -= bSig;
				2294	expDiff -= 64;
				2295	while ( 0 < expDiff ) {
				2296	q = estimateDiv128To64( aSig, 0, bSig );
				2297	q = ( 2 < q ) ? q - 2 : 0;
				2298	aSig = - ( ( bSig>>2 ) * q );
				2299	expDiff -= 62;
				2300	}
				2301	expDiff += 64;
				2302	if ( 0 < expDiff ) {
				2303	q = estimateDiv128To64( aSig, 0, bSig );
				2304	q = ( 2 < q ) ? q - 2 : 0;
				2305	q >>= 64 - expDiff;
				2306	bSig >>= 2;
				2307	aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
				2308	}
				2309	else {
				2310	aSig >>= 2;
				2311	bSig >>= 2;
				2312	}
				2313	do {
				2314	alternateASig = aSig;
				2315	++q;
				2316	aSig -= bSig;
				2317	} while ( 0 <= (sbits64) aSig );
				2318	sigMean = aSig + alternateASig;
				2319	if ( ( sigMean < 0 ) \|\| ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
				2320	aSig = alternateASig;
				2321	}
				2322	zSign = ( (sbits64) aSig < 0 );
				2323	if ( zSign ) aSig = - aSig;
				2324	return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
				2325
				2326	}
				2327
				2328	/*
				2329	-------------------------------------------------------------------------------
				2330	Returns the square root of the double-precision floating-point value `a'.
				2331	The operation is performed according to the IEC/IEEE Standard for Binary
				2332	Floating-point Arithmetic.
				2333	-------------------------------------------------------------------------------
				2334	*/
				2335	float64 float64_sqrt( float64 a )
				2336	{
				2337	flag aSign;
				2338	int16 aExp, zExp;
				2339	bits64 aSig, zSig;
				2340	bits64 rem0, rem1, term0, term1; //, shiftedRem;
				2341	//float64 z;
				2342
				2343	aSig = extractFloat64Frac( a );
				2344	aExp = extractFloat64Exp( a );
				2345	aSign = extractFloat64Sign( a );
				2346	if ( aExp == 0x7FF ) {
				2347	if ( aSig ) return propagateFloat64NaN( a, a );
				2348	if ( ! aSign ) return a;
				2349	float_raise( float_flag_invalid );
				2350	return float64_default_nan;
				2351	}
				2352	if ( aSign ) {
				2353	if ( ( aExp \| aSig ) == 0 ) return a;
				2354	float_raise( float_flag_invalid );
				2355	return float64_default_nan;
				2356	}
				2357	if ( aExp == 0 ) {
				2358	if ( aSig == 0 ) return 0;
				2359	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				2360	}
				2361	zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
				2362	aSig \|= LIT64( 0x0010000000000000 );
				2363	zSig = estimateSqrt32( aExp, aSig>>21 );
				2364	zSig <<= 31;
				2365	aSig <<= 9 - ( aExp & 1 );
				2366	zSig = estimateDiv128To64( aSig, 0, zSig ) + zSig + 2;
				2367	if ( ( zSig & 0x3FF ) <= 5 ) {
				2368	if ( zSig < 2 ) {
				2369	zSig = LIT64( 0xFFFFFFFFFFFFFFFF );
				2370	}
				2371	else {
				2372	aSig <<= 2;
				2373	mul64To128( zSig, zSig, &term0, &term1 );
				2374	sub128( aSig, 0, term0, term1, &rem0, &rem1 );
				2375	while ( (sbits64) rem0 < 0 ) {
				2376	--zSig;
				2377	shortShift128Left( 0, zSig, 1, &term0, &term1 );
				2378	term1 \|= 1;
				2379	add128( rem0, rem1, term0, term1, &rem0, &rem1 );
				2380	}
				2381	zSig \|= ( ( rem0 \| rem1 ) != 0 );
				2382	}
				2383	}
				2384	shift64RightJamming( zSig, 1, &zSig );
				2385	return roundAndPackFloat64( 0, zExp, zSig );
				2386
				2387	}
				2388
				2389	/*
				2390	-------------------------------------------------------------------------------
				2391	Returns 1 if the double-precision floating-point value `a' is equal to the
				2392	corresponding value `b', and 0 otherwise. The comparison is performed
				2393	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				2394	-------------------------------------------------------------------------------
				2395	*/
				2396	flag float64_eq( float64 a, float64 b )
				2397	{
				2398
				2399	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				2400	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				2401	) {
				2402	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
				2403	float_raise( float_flag_invalid );
				2404	}
				2405	return 0;
				2406	}
				2407	return ( a == b ) \|\| ( (bits64) ( ( a \| b )<<1 ) == 0 );
				2408
				2409	}
				2410
				2411	/*
				2412	-------------------------------------------------------------------------------
				2413	Returns 1 if the double-precision floating-point value `a' is less than or
				2414	equal to the corresponding value `b', and 0 otherwise. The comparison is
				2415	performed according to the IEC/IEEE Standard for Binary Floating-point
				2416	Arithmetic.
				2417	-------------------------------------------------------------------------------
				2418	*/
				2419	flag float64_le( float64 a, float64 b )
				2420	{
				2421	flag aSign, bSign;
				2422
				2423	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				2424	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				2425	) {
				2426	float_raise( float_flag_invalid );
				2427	return 0;
				2428	}
				2429	aSign = extractFloat64Sign( a );
				2430	bSign = extractFloat64Sign( b );
				2431	if ( aSign != bSign ) return aSign \|\| ( (bits64) ( ( a \| b )<<1 ) == 0 );
				2432	return ( a == b ) \|\| ( aSign ^ ( a < b ) );
				2433
				2434	}
				2435
				2436	/*
				2437	-------------------------------------------------------------------------------
				2438	Returns 1 if the double-precision floating-point value `a' is less than
				2439	the corresponding value `b', and 0 otherwise. The comparison is performed
				2440	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				2441	-------------------------------------------------------------------------------
				2442	*/
				2443	flag float64_lt( float64 a, float64 b )
				2444	{
				2445	flag aSign, bSign;
				2446
				2447	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				2448	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				2449	) {
				2450	float_raise( float_flag_invalid );
				2451	return 0;
				2452	}
				2453	aSign = extractFloat64Sign( a );
				2454	bSign = extractFloat64Sign( b );
				2455	if ( aSign != bSign ) return aSign && ( (bits64) ( ( a \| b )<<1 ) != 0 );
				2456	return ( a != b ) && ( aSign ^ ( a < b ) );
				2457
				2458	}
				2459
				2460	/*
				2461	-------------------------------------------------------------------------------
				2462	Returns 1 if the double-precision floating-point value `a' is equal to the
				2463	corresponding value `b', and 0 otherwise. The invalid exception is raised
				2464	if either operand is a NaN. Otherwise, the comparison is performed
				2465	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				2466	-------------------------------------------------------------------------------
				2467	*/
				2468	flag float64_eq_signaling( float64 a, float64 b )
				2469	{
				2470
				2471	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				2472	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				2473	) {
				2474	float_raise( float_flag_invalid );
				2475	return 0;
				2476	}
				2477	return ( a == b ) \|\| ( (bits64) ( ( a \| b )<<1 ) == 0 );
				2478
				2479	}
				2480
				2481	/*
				2482	-------------------------------------------------------------------------------
				2483	Returns 1 if the double-precision floating-point value `a' is less than or
				2484	equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
				2485	cause an exception. Otherwise, the comparison is performed according to the
				2486	IEC/IEEE Standard for Binary Floating-point Arithmetic.
				2487	-------------------------------------------------------------------------------
				2488	*/
				2489	flag float64_le_quiet( float64 a, float64 b )
				2490	{
				2491	flag aSign, bSign;
				2492	//int16 aExp, bExp;
				2493
				2494	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				2495	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				2496	) {
				2497	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
				2498	float_raise( float_flag_invalid );
				2499	}
				2500	return 0;
				2501	}
				2502	aSign = extractFloat64Sign( a );
				2503	bSign = extractFloat64Sign( b );
				2504	if ( aSign != bSign ) return aSign \|\| ( (bits64) ( ( a \| b )<<1 ) == 0 );
				2505	return ( a == b ) \|\| ( aSign ^ ( a < b ) );
				2506
				2507	}
				2508
				2509	/*
				2510	-------------------------------------------------------------------------------
				2511	Returns 1 if the double-precision floating-point value `a' is less than
				2512	the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				2513	exception. Otherwise, the comparison is performed according to the IEC/IEEE
				2514	Standard for Binary Floating-point Arithmetic.
				2515	-------------------------------------------------------------------------------
				2516	*/
				2517	flag float64_lt_quiet( float64 a, float64 b )
				2518	{
				2519	flag aSign, bSign;
				2520
				2521	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				2522	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				2523	) {
				2524	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
				2525	float_raise( float_flag_invalid );
				2526	}
				2527	return 0;
				2528	}
				2529	aSign = extractFloat64Sign( a );
				2530	bSign = extractFloat64Sign( b );
				2531	if ( aSign != bSign ) return aSign && ( (bits64) ( ( a \| b )<<1 ) != 0 );
				2532	return ( a != b ) && ( aSign ^ ( a < b ) );
				2533
				2534	}
				2535
				2536	#ifdef FLOATX80
				2537
				2538	/*
				2539	-------------------------------------------------------------------------------
				2540	Returns the result of converting the extended double-precision floating-
				2541	point value `a' to the 32-bit two's complement integer format. The
				2542	conversion is performed according to the IEC/IEEE Standard for Binary
				2543	Floating-point Arithmetic---which means in particular that the conversion
				2544	is rounded according to the current rounding mode. If `a' is a NaN, the
				2545	largest positive integer is returned. Otherwise, if the conversion
				2546	overflows, the largest integer with the same sign as `a' is returned.
				2547	-------------------------------------------------------------------------------
				2548	*/
				2549	int32 floatx80_to_int32( floatx80 a )
				2550	{
				2551	flag aSign;
				2552	int32 aExp, shiftCount;
				2553	bits64 aSig;
				2554
				2555	aSig = extractFloatx80Frac( a );
				2556	aExp = extractFloatx80Exp( a );
				2557	aSign = extractFloatx80Sign( a );
				2558	if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
				2559	shiftCount = 0x4037 - aExp;
				2560	if ( shiftCount <= 0 ) shiftCount = 1;
				2561	shift64RightJamming( aSig, shiftCount, &aSig );
				2562	return roundAndPackInt32( aSign, aSig );
				2563
				2564	}
				2565
				2566	/*
				2567	-------------------------------------------------------------------------------
				2568	Returns the result of converting the extended double-precision floating-
				2569	point value `a' to the 32-bit two's complement integer format. The
				2570	conversion is performed according to the IEC/IEEE Standard for Binary
				2571	Floating-point Arithmetic, except that the conversion is always rounded
				2572	toward zero. If `a' is a NaN, the largest positive integer is returned.
				2573	Otherwise, if the conversion overflows, the largest integer with the same
				2574	sign as `a' is returned.
				2575	-------------------------------------------------------------------------------
				2576	*/
				2577	int32 floatx80_to_int32_round_to_zero( floatx80 a )
				2578	{
				2579	flag aSign;
				2580	int32 aExp, shiftCount;
				2581	bits64 aSig, savedASig;
				2582	int32 z;
				2583
				2584	aSig = extractFloatx80Frac( a );
				2585	aExp = extractFloatx80Exp( a );
				2586	aSign = extractFloatx80Sign( a );
				2587	shiftCount = 0x403E - aExp;
				2588	if ( shiftCount < 32 ) {
				2589	if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
				2590	goto invalid;
				2591	}
				2592	else if ( 63 < shiftCount ) {
				2593	if ( aExp \|\| aSig ) float_exception_flags \|= float_flag_inexact;
				2594	return 0;
				2595	}
				2596	savedASig = aSig;
				2597	aSig >>= shiftCount;
				2598	z = aSig;
				2599	if ( aSign ) z = - z;
				2600	if ( ( z < 0 ) ^ aSign ) {
				2601	invalid:
				2602	float_exception_flags \|= float_flag_invalid;
				2603	return aSign ? 0x80000000 : 0x7FFFFFFF;
				2604	}
				2605	if ( ( aSig<<shiftCount ) != savedASig ) {
				2606	float_exception_flags \|= float_flag_inexact;
				2607	}
				2608	return z;
				2609
				2610	}
				2611
				2612	/*
				2613	-------------------------------------------------------------------------------
				2614	Returns the result of converting the extended double-precision floating-
				2615	point value `a' to the single-precision floating-point format. The
				2616	conversion is performed according to the IEC/IEEE Standard for Binary
				2617	Floating-point Arithmetic.
				2618	-------------------------------------------------------------------------------
				2619	*/
				2620	float32 floatx80_to_float32( floatx80 a )
				2621	{
				2622	flag aSign;
				2623	int32 aExp;
				2624	bits64 aSig;
				2625
				2626	aSig = extractFloatx80Frac( a );
				2627	aExp = extractFloatx80Exp( a );
				2628	aSign = extractFloatx80Sign( a );
				2629	if ( aExp == 0x7FFF ) {
				2630	if ( (bits64) ( aSig<<1 ) ) {
				2631	return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
				2632	}
				2633	return packFloat32( aSign, 0xFF, 0 );
				2634	}
				2635	shift64RightJamming( aSig, 33, &aSig );
				2636	if ( aExp \|\| aSig ) aExp -= 0x3F81;
				2637	return roundAndPackFloat32( aSign, aExp, aSig );
				2638
				2639	}
				2640
				2641	/*
				2642	-------------------------------------------------------------------------------
				2643	Returns the result of converting the extended double-precision floating-
				2644	point value `a' to the double-precision floating-point format. The
				2645	conversion is performed according to the IEC/IEEE Standard for Binary
				2646	Floating-point Arithmetic.
				2647	-------------------------------------------------------------------------------
				2648	*/
				2649	float64 floatx80_to_float64( floatx80 a )
				2650	{
				2651	flag aSign;
				2652	int32 aExp;
				2653	bits64 aSig, zSig;
				2654
				2655	aSig = extractFloatx80Frac( a );
				2656	aExp = extractFloatx80Exp( a );
				2657	aSign = extractFloatx80Sign( a );
				2658	if ( aExp == 0x7FFF ) {
				2659	if ( (bits64) ( aSig<<1 ) ) {
				2660	return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
				2661	}
				2662	return packFloat64( aSign, 0x7FF, 0 );
				2663	}
				2664	shift64RightJamming( aSig, 1, &zSig );
				2665	if ( aExp \|\| aSig ) aExp -= 0x3C01;
				2666	return roundAndPackFloat64( aSign, aExp, zSig );
				2667
				2668	}
				2669
				2670	/*
				2671	-------------------------------------------------------------------------------
				2672	Rounds the extended double-precision floating-point value `a' to an integer,
				2673	and returns the result as an extended quadruple-precision floating-point
				2674	value. The operation is performed according to the IEC/IEEE Standard for
				2675	Binary Floating-point Arithmetic.
				2676	-------------------------------------------------------------------------------
				2677	*/
				2678	floatx80 floatx80_round_to_int( floatx80 a )
				2679	{
				2680	flag aSign;
				2681	int32 aExp;
				2682	bits64 lastBitMask, roundBitsMask;
				2683	int8 roundingMode;
				2684	floatx80 z;
				2685
				2686	aExp = extractFloatx80Exp( a );
				2687	if ( 0x403E <= aExp ) {
				2688	if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
				2689	return propagateFloatx80NaN( a, a );
				2690	}
				2691	return a;
				2692	}
				2693	if ( aExp <= 0x3FFE ) {
				2694	if ( ( aExp == 0 )
				2695	&& ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
				2696	return a;
				2697	}
				2698	float_exception_flags \|= float_flag_inexact;
				2699	aSign = extractFloatx80Sign( a );
				2700	switch ( float_rounding_mode ) {
				2701	case float_round_nearest_even:
				2702	if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
				2703	) {
				2704	return
				2705	packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
				2706	}
				2707	break;
				2708	case float_round_down:
				2709	return
				2710	aSign ?
				2711	packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
				2712	: packFloatx80( 0, 0, 0 );
				2713	case float_round_up:
				2714	return
				2715	aSign ? packFloatx80( 1, 0, 0 )
				2716	: packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
				2717	}
				2718	return packFloatx80( aSign, 0, 0 );
				2719	}
				2720	lastBitMask = 1;
				2721	lastBitMask <<= 0x403E - aExp;
				2722	roundBitsMask = lastBitMask - 1;
				2723	z = a;
				2724	roundingMode = float_rounding_mode;
				2725	if ( roundingMode == float_round_nearest_even ) {
				2726	z.low += lastBitMask>>1;
				2727	if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
				2728	}
				2729	else if ( roundingMode != float_round_to_zero ) {
				2730	if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
				2731	z.low += roundBitsMask;
				2732	}
				2733	}
				2734	z.low &= ~ roundBitsMask;
				2735	if ( z.low == 0 ) {
				2736	++z.high;
				2737	z.low = LIT64( 0x8000000000000000 );
				2738	}
				2739	if ( z.low != a.low ) float_exception_flags \|= float_flag_inexact;
				2740	return z;
				2741
				2742	}
				2743
				2744	/*
				2745	-------------------------------------------------------------------------------
				2746	Returns the result of adding the absolute values of the extended double-
				2747	precision floating-point values `a' and `b'. If `zSign' is true, the sum is
				2748	negated before being returned. `zSign' is ignored if the result is a NaN.
				2749	The addition is performed according to the IEC/IEEE Standard for Binary
				2750	Floating-point Arithmetic.
				2751	-------------------------------------------------------------------------------
				2752	*/
				2753	static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
				2754	{
				2755	int32 aExp, bExp, zExp;
				2756	bits64 aSig, bSig, zSig0, zSig1;
				2757	int32 expDiff;
				2758
				2759	aSig = extractFloatx80Frac( a );
				2760	aExp = extractFloatx80Exp( a );
				2761	bSig = extractFloatx80Frac( b );
				2762	bExp = extractFloatx80Exp( b );
				2763	expDiff = aExp - bExp;
				2764	if ( 0 < expDiff ) {
				2765	if ( aExp == 0x7FFF ) {
				2766	if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
				2767	return a;
				2768	}
				2769	if ( bExp == 0 ) --expDiff;
				2770	shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
				2771	zExp = aExp;
				2772	}
				2773	else if ( expDiff < 0 ) {
				2774	if ( bExp == 0x7FFF ) {
				2775	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
				2776	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				2777	}
				2778	if ( aExp == 0 ) ++expDiff;
				2779	shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
				2780	zExp = bExp;
				2781	}
				2782	else {
				2783	if ( aExp == 0x7FFF ) {
				2784	if ( (bits64) ( ( aSig \| bSig )<<1 ) ) {
				2785	return propagateFloatx80NaN( a, b );
				2786	}
				2787	return a;
				2788	}
				2789	zSig1 = 0;
				2790	zSig0 = aSig + bSig;
				2791	if ( aExp == 0 ) {
				2792	normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
				2793	goto roundAndPack;
				2794	}
				2795	zExp = aExp;
				2796	goto shiftRight1;
				2797	}
				2798
				2799	zSig0 = aSig + bSig;
				2800
				2801	if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
				2802	shiftRight1:
				2803	shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
				2804	zSig0 \|= LIT64( 0x8000000000000000 );
				2805	++zExp;
				2806	roundAndPack:
				2807	return
				2808	roundAndPackFloatx80(
				2809	floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
				2810
				2811	}
				2812
				2813	/*
				2814	-------------------------------------------------------------------------------
				2815	Returns the result of subtracting the absolute values of the extended
				2816	double-precision floating-point values `a' and `b'. If `zSign' is true,
				2817	the difference is negated before being returned. `zSign' is ignored if the
				2818	result is a NaN. The subtraction is performed according to the IEC/IEEE
				2819	Standard for Binary Floating-point Arithmetic.
				2820	-------------------------------------------------------------------------------
				2821	*/
				2822	static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
				2823	{
				2824	int32 aExp, bExp, zExp;
				2825	bits64 aSig, bSig, zSig0, zSig1;
				2826	int32 expDiff;
				2827	floatx80 z;
				2828
				2829	aSig = extractFloatx80Frac( a );
				2830	aExp = extractFloatx80Exp( a );
				2831	bSig = extractFloatx80Frac( b );
				2832	bExp = extractFloatx80Exp( b );
				2833	expDiff = aExp - bExp;
				2834	if ( 0 < expDiff ) goto aExpBigger;
				2835	if ( expDiff < 0 ) goto bExpBigger;
				2836	if ( aExp == 0x7FFF ) {
				2837	if ( (bits64) ( ( aSig \| bSig )<<1 ) ) {
				2838	return propagateFloatx80NaN( a, b );
				2839	}
				2840	float_raise( float_flag_invalid );
				2841	z.low = floatx80_default_nan_low;
				2842	z.high = floatx80_default_nan_high;
				2843	return z;
				2844	}
				2845	if ( aExp == 0 ) {
				2846	aExp = 1;
				2847	bExp = 1;
				2848	}
				2849	zSig1 = 0;
				2850	if ( bSig < aSig ) goto aBigger;
				2851	if ( aSig < bSig ) goto bBigger;
				2852	return packFloatx80( float_rounding_mode == float_round_down, 0, 0 );
				2853	bExpBigger:
				2854	if ( bExp == 0x7FFF ) {
				2855	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
				2856	return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
				2857	}
				2858	if ( aExp == 0 ) ++expDiff;
				2859	shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
				2860	bBigger:
				2861	sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
				2862	zExp = bExp;
				2863	zSign ^= 1;
				2864	goto normalizeRoundAndPack;
				2865	aExpBigger:
				2866	if ( aExp == 0x7FFF ) {
				2867	if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
				2868	return a;
				2869	}
				2870	if ( bExp == 0 ) --expDiff;
				2871	shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
				2872	aBigger:
				2873	sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
				2874	zExp = aExp;
				2875	normalizeRoundAndPack:
				2876	return
				2877	normalizeRoundAndPackFloatx80(
				2878	floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
				2879
				2880	}
				2881
				2882	/*
				2883	-------------------------------------------------------------------------------
				2884	Returns the result of adding the extended double-precision floating-point
				2885	values `a' and `b'. The operation is performed according to the IEC/IEEE
				2886	Standard for Binary Floating-point Arithmetic.
				2887	-------------------------------------------------------------------------------
				2888	*/
				2889	floatx80 floatx80_add( floatx80 a, floatx80 b )
				2890	{
				2891	flag aSign, bSign;
				2892
				2893	aSign = extractFloatx80Sign( a );
				2894	bSign = extractFloatx80Sign( b );
				2895	if ( aSign == bSign ) {
				2896	return addFloatx80Sigs( a, b, aSign );
				2897	}
				2898	else {
				2899	return subFloatx80Sigs( a, b, aSign );
				2900	}
				2901
				2902	}
				2903
				2904	/*
				2905	-------------------------------------------------------------------------------
				2906	Returns the result of subtracting the extended double-precision floating-
				2907	point values `a' and `b'. The operation is performed according to the
				2908	IEC/IEEE Standard for Binary Floating-point Arithmetic.
				2909	-------------------------------------------------------------------------------
				2910	*/
				2911	floatx80 floatx80_sub( floatx80 a, floatx80 b )
				2912	{
				2913	flag aSign, bSign;
				2914
				2915	aSign = extractFloatx80Sign( a );
				2916	bSign = extractFloatx80Sign( b );
				2917	if ( aSign == bSign ) {
				2918	return subFloatx80Sigs( a, b, aSign );
				2919	}
				2920	else {
				2921	return addFloatx80Sigs( a, b, aSign );
				2922	}
				2923
				2924	}
				2925
				2926	/*
				2927	-------------------------------------------------------------------------------
				2928	Returns the result of multiplying the extended double-precision floating-
				2929	point values `a' and `b'. The operation is performed according to the
				2930	IEC/IEEE Standard for Binary Floating-point Arithmetic.
				2931	-------------------------------------------------------------------------------
				2932	*/
				2933	floatx80 floatx80_mul( floatx80 a, floatx80 b )
				2934	{
				2935	flag aSign, bSign, zSign;
				2936	int32 aExp, bExp, zExp;
				2937	bits64 aSig, bSig, zSig0, zSig1;
				2938	floatx80 z;
				2939
				2940	aSig = extractFloatx80Frac( a );
				2941	aExp = extractFloatx80Exp( a );
				2942	aSign = extractFloatx80Sign( a );
				2943	bSig = extractFloatx80Frac( b );
				2944	bExp = extractFloatx80Exp( b );
				2945	bSign = extractFloatx80Sign( b );
				2946	zSign = aSign ^ bSign;
				2947	if ( aExp == 0x7FFF ) {
				2948	if ( (bits64) ( aSig<<1 )
				2949	\|\| ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
				2950	return propagateFloatx80NaN( a, b );
				2951	}
				2952	if ( ( bExp \| bSig ) == 0 ) goto invalid;
				2953	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				2954	}
				2955	if ( bExp == 0x7FFF ) {
				2956	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
				2957	if ( ( aExp \| aSig ) == 0 ) {
				2958	invalid:
				2959	float_raise( float_flag_invalid );
				2960	z.low = floatx80_default_nan_low;
				2961	z.high = floatx80_default_nan_high;
				2962	return z;
				2963	}
				2964	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				2965	}
				2966	if ( aExp == 0 ) {
				2967	if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
				2968	normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
				2969	}
				2970	if ( bExp == 0 ) {
				2971	if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
				2972	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
				2973	}
				2974	zExp = aExp + bExp - 0x3FFE;
				2975	mul64To128( aSig, bSig, &zSig0, &zSig1 );
				2976	if ( 0 < (sbits64) zSig0 ) {
				2977	shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
				2978	--zExp;
				2979	}
				2980	return
				2981	roundAndPackFloatx80(
				2982	floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
				2983
				2984	}
				2985
				2986	/*
				2987	-------------------------------------------------------------------------------
				2988	Returns the result of dividing the extended double-precision floating-point
				2989	value `a' by the corresponding value `b'. The operation is performed
				2990	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				2991	-------------------------------------------------------------------------------
				2992	*/
				2993	floatx80 floatx80_div( floatx80 a, floatx80 b )
				2994	{
				2995	flag aSign, bSign, zSign;
				2996	int32 aExp, bExp, zExp;
				2997	bits64 aSig, bSig, zSig0, zSig1;
				2998	bits64 rem0, rem1, rem2, term0, term1, term2;
				2999	floatx80 z;
				3000
				3001	aSig = extractFloatx80Frac( a );
				3002	aExp = extractFloatx80Exp( a );
				3003	aSign = extractFloatx80Sign( a );
				3004	bSig = extractFloatx80Frac( b );
				3005	bExp = extractFloatx80Exp( b );
				3006	bSign = extractFloatx80Sign( b );
				3007	zSign = aSign ^ bSign;
				3008	if ( aExp == 0x7FFF ) {
				3009	if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
				3010	if ( bExp == 0x7FFF ) {
				3011	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
				3012	goto invalid;
				3013	}
				3014	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				3015	}
				3016	if ( bExp == 0x7FFF ) {
				3017	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
				3018	return packFloatx80( zSign, 0, 0 );
				3019	}
				3020	if ( bExp == 0 ) {
				3021	if ( bSig == 0 ) {
				3022	if ( ( aExp \| aSig ) == 0 ) {
				3023	invalid:
				3024	float_raise( float_flag_invalid );
				3025	z.low = floatx80_default_nan_low;
				3026	z.high = floatx80_default_nan_high;
				3027	return z;
				3028	}
				3029	float_raise( float_flag_divbyzero );
				3030	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				3031	}
				3032	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
				3033	}
				3034	if ( aExp == 0 ) {
				3035	if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
				3036	normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
				3037	}
				3038	zExp = aExp - bExp + 0x3FFE;
				3039	rem1 = 0;
				3040	if ( bSig <= aSig ) {
				3041	shift128Right( aSig, 0, 1, &aSig, &rem1 );
				3042	++zExp;
				3043	}
				3044	zSig0 = estimateDiv128To64( aSig, rem1, bSig );
				3045	mul64To128( bSig, zSig0, &term0, &term1 );
				3046	sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
				3047	while ( (sbits64) rem0 < 0 ) {
				3048	--zSig0;
				3049	add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
				3050	}
				3051	zSig1 = estimateDiv128To64( rem1, 0, bSig );
				3052	if ( (bits64) ( zSig1<<1 ) <= 8 ) {
				3053	mul64To128( bSig, zSig1, &term1, &term2 );
				3054	sub128( rem1, 0, term1, term2, &rem1, &rem2 );
				3055	while ( (sbits64) rem1 < 0 ) {
				3056	--zSig1;
				3057	add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
				3058	}
				3059	zSig1 \|= ( ( rem1 \| rem2 ) != 0 );
				3060	}
				3061	return
				3062	roundAndPackFloatx80(
				3063	floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
				3064
				3065	}
				3066
				3067	/*
				3068	-------------------------------------------------------------------------------
				3069	Returns the remainder of the extended double-precision floating-point value
				3070	`a' with respect to the corresponding value `b'. The operation is performed
				3071	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				3072	-------------------------------------------------------------------------------
				3073	*/
				3074	floatx80 floatx80_rem( floatx80 a, floatx80 b )
				3075	{
				3076	flag aSign, bSign, zSign;
				3077	int32 aExp, bExp, expDiff;
				3078	bits64 aSig0, aSig1, bSig;
				3079	bits64 q, term0, term1, alternateASig0, alternateASig1;
				3080	floatx80 z;
				3081
				3082	aSig0 = extractFloatx80Frac( a );
				3083	aExp = extractFloatx80Exp( a );
				3084	aSign = extractFloatx80Sign( a );
				3085	bSig = extractFloatx80Frac( b );
				3086	bExp = extractFloatx80Exp( b );
				3087	bSign = extractFloatx80Sign( b );
				3088	if ( aExp == 0x7FFF ) {
				3089	if ( (bits64) ( aSig0<<1 )
				3090	\|\| ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
				3091	return propagateFloatx80NaN( a, b );
				3092	}
				3093	goto invalid;
				3094	}
				3095	if ( bExp == 0x7FFF ) {
				3096	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
				3097	return a;
				3098	}
				3099	if ( bExp == 0 ) {
				3100	if ( bSig == 0 ) {
				3101	invalid:
				3102	float_raise( float_flag_invalid );
				3103	z.low = floatx80_default_nan_low;
				3104	z.high = floatx80_default_nan_high;
				3105	return z;
				3106	}
				3107	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
				3108	}
				3109	if ( aExp == 0 ) {
				3110	if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
				3111	normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
				3112	}
				3113	bSig \|= LIT64( 0x8000000000000000 );
				3114	zSign = aSign;
				3115	expDiff = aExp - bExp;
				3116	aSig1 = 0;
				3117	if ( expDiff < 0 ) {
				3118	if ( expDiff < -1 ) return a;
				3119	shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
				3120	expDiff = 0;
				3121	}
				3122	q = ( bSig <= aSig0 );
				3123	if ( q ) aSig0 -= bSig;
				3124	expDiff -= 64;
				3125	while ( 0 < expDiff ) {
				3126	q = estimateDiv128To64( aSig0, aSig1, bSig );
				3127	q = ( 2 < q ) ? q - 2 : 0;
				3128	mul64To128( bSig, q, &term0, &term1 );
				3129	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
				3130	shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
				3131	expDiff -= 62;
				3132	}
				3133	expDiff += 64;
				3134	if ( 0 < expDiff ) {
				3135	q = estimateDiv128To64( aSig0, aSig1, bSig );
				3136	q = ( 2 < q ) ? q - 2 : 0;
				3137	q >>= 64 - expDiff;
				3138	mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
				3139	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
				3140	shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
				3141	while ( le128( term0, term1, aSig0, aSig1 ) ) {
				3142	++q;
				3143	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
				3144	}
				3145	}
				3146	else {
				3147	term1 = 0;
				3148	term0 = bSig;
				3149	}
				3150	sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
				3151	if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
				3152	\|\| ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
				3153	&& ( q & 1 ) )
				3154	) {
				3155	aSig0 = alternateASig0;
				3156	aSig1 = alternateASig1;
				3157	zSign = ! zSign;
				3158	}
				3159	return
				3160	normalizeRoundAndPackFloatx80(
				3161	80, zSign, bExp + expDiff, aSig0, aSig1 );
				3162
				3163	}
				3164
				3165	/*
				3166	-------------------------------------------------------------------------------
				3167	Returns the square root of the extended double-precision floating-point
				3168	value `a'. The operation is performed according to the IEC/IEEE Standard
				3169	for Binary Floating-point Arithmetic.
				3170	-------------------------------------------------------------------------------
				3171	*/
				3172	floatx80 floatx80_sqrt( floatx80 a )
				3173	{
				3174	flag aSign;
				3175	int32 aExp, zExp;
				3176	bits64 aSig0, aSig1, zSig0, zSig1;
				3177	bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
				3178	bits64 shiftedRem0, shiftedRem1;
				3179	floatx80 z;
				3180
				3181	aSig0 = extractFloatx80Frac( a );
				3182	aExp = extractFloatx80Exp( a );
				3183	aSign = extractFloatx80Sign( a );
				3184	if ( aExp == 0x7FFF ) {
				3185	if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
				3186	if ( ! aSign ) return a;
				3187	goto invalid;
				3188	}
				3189	if ( aSign ) {
				3190	if ( ( aExp \| aSig0 ) == 0 ) return a;
				3191	invalid:
				3192	float_raise( float_flag_invalid );
				3193	z.low = floatx80_default_nan_low;
				3194	z.high = floatx80_default_nan_high;
				3195	return z;
				3196	}
				3197	if ( aExp == 0 ) {
				3198	if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
				3199	normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
				3200	}
				3201	zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
				3202	zSig0 = estimateSqrt32( aExp, aSig0>>32 );
				3203	zSig0 <<= 31;
				3204	aSig1 = 0;
				3205	shift128Right( aSig0, 0, ( aExp & 1 ) + 2, &aSig0, &aSig1 );
				3206	zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0 ) + zSig0 + 4;
				3207	if ( 0 <= (sbits64) zSig0 ) zSig0 = LIT64( 0xFFFFFFFFFFFFFFFF );
				3208	shortShift128Left( aSig0, aSig1, 2, &aSig0, &aSig1 );
				3209	mul64To128( zSig0, zSig0, &term0, &term1 );
				3210	sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
				3211	while ( (sbits64) rem0 < 0 ) {
				3212	--zSig0;
				3213	shortShift128Left( 0, zSig0, 1, &term0, &term1 );
				3214	term1 \|= 1;
				3215	add128( rem0, rem1, term0, term1, &rem0, &rem1 );
				3216	}
				3217	shortShift128Left( rem0, rem1, 63, &shiftedRem0, &shiftedRem1 );
				3218	zSig1 = estimateDiv128To64( shiftedRem0, shiftedRem1, zSig0 );
				3219	if ( (bits64) ( zSig1<<1 ) <= 10 ) {
				3220	if ( zSig1 == 0 ) zSig1 = 1;
				3221	mul64To128( zSig0, zSig1, &term1, &term2 );
				3222	shortShift128Left( term1, term2, 1, &term1, &term2 );
				3223	sub128( rem1, 0, term1, term2, &rem1, &rem2 );
				3224	mul64To128( zSig1, zSig1, &term2, &term3 );
				3225	sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
				3226	while ( (sbits64) rem1 < 0 ) {
				3227	--zSig1;
				3228	shortShift192Left( 0, zSig0, zSig1, 1, &term1, &term2, &term3 );
				3229	term3 \|= 1;
				3230	add192(
				3231	rem1, rem2, rem3, term1, term2, term3, &rem1, &rem2, &rem3 );
				3232	}
				3233	zSig1 \|= ( ( rem1 \| rem2 \| rem3 ) != 0 );
				3234	}
				3235	return
				3236	roundAndPackFloatx80(
				3237	floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
				3238
				3239	}
				3240
				3241	/*
				3242	-------------------------------------------------------------------------------
				3243	Returns 1 if the extended double-precision floating-point value `a' is
				3244	equal to the corresponding value `b', and 0 otherwise. The comparison is
				3245	performed according to the IEC/IEEE Standard for Binary Floating-point
				3246	Arithmetic.
				3247	-------------------------------------------------------------------------------
				3248	*/
				3249	flag floatx80_eq( floatx80 a, floatx80 b )
				3250	{
				3251
				3252	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				3253	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				3254	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				3255	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				3256	) {
				3257	if ( floatx80_is_signaling_nan( a )
				3258	\|\| floatx80_is_signaling_nan( b ) ) {
				3259	float_raise( float_flag_invalid );
				3260	}
				3261	return 0;
				3262	}
				3263	return
				3264	( a.low == b.low )
				3265	&& ( ( a.high == b.high )
				3266	\|\| ( ( a.low == 0 )
				3267	&& ( (bits16) ( ( a.high \| b.high )<<1 ) == 0 ) )
				3268	);
				3269
				3270	}
				3271
				3272	/*
				3273	-------------------------------------------------------------------------------
				3274	Returns 1 if the extended double-precision floating-point value `a' is
				3275	less than or equal to the corresponding value `b', and 0 otherwise. The
				3276	comparison is performed according to the IEC/IEEE Standard for Binary
				3277	Floating-point Arithmetic.
				3278	-------------------------------------------------------------------------------
				3279	*/
				3280	flag floatx80_le( floatx80 a, floatx80 b )
				3281	{
				3282	flag aSign, bSign;
				3283
				3284	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				3285	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				3286	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				3287	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				3288	) {
				3289	float_raise( float_flag_invalid );
				3290	return 0;
				3291	}
				3292	aSign = extractFloatx80Sign( a );
				3293	bSign = extractFloatx80Sign( b );
				3294	if ( aSign != bSign ) {
				3295	return
				3296	aSign
				3297	\|\| ( ( ( (bits16) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				3298	== 0 );
				3299	}
				3300	return
				3301	aSign ? le128( b.high, b.low, a.high, a.low )
				3302	: le128( a.high, a.low, b.high, b.low );
				3303
				3304	}
				3305
				3306	/*
				3307	-------------------------------------------------------------------------------
				3308	Returns 1 if the extended double-precision floating-point value `a' is
				3309	less than the corresponding value `b', and 0 otherwise. The comparison
				3310	is performed according to the IEC/IEEE Standard for Binary Floating-point
				3311	Arithmetic.
				3312	-------------------------------------------------------------------------------
				3313	*/
				3314	flag floatx80_lt( floatx80 a, floatx80 b )
				3315	{
				3316	flag aSign, bSign;
				3317
				3318	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				3319	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				3320	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				3321	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				3322	) {
				3323	float_raise( float_flag_invalid );
				3324	return 0;
				3325	}
				3326	aSign = extractFloatx80Sign( a );
				3327	bSign = extractFloatx80Sign( b );
				3328	if ( aSign != bSign ) {
				3329	return
				3330	aSign
				3331	&& ( ( ( (bits16) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				3332	!= 0 );
				3333	}
				3334	return
				3335	aSign ? lt128( b.high, b.low, a.high, a.low )
				3336	: lt128( a.high, a.low, b.high, b.low );
				3337
				3338	}
				3339
				3340	/*
				3341	-------------------------------------------------------------------------------
				3342	Returns 1 if the extended double-precision floating-point value `a' is equal
				3343	to the corresponding value `b', and 0 otherwise. The invalid exception is
				3344	raised if either operand is a NaN. Otherwise, the comparison is performed
				3345	according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				3346	-------------------------------------------------------------------------------
				3347	*/
				3348	flag floatx80_eq_signaling( floatx80 a, floatx80 b )
				3349	{
				3350
				3351	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				3352	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				3353	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				3354	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				3355	) {
				3356	float_raise( float_flag_invalid );
				3357	return 0;
				3358	}
				3359	return
				3360	( a.low == b.low )
				3361	&& ( ( a.high == b.high )
				3362	\|\| ( ( a.low == 0 )
				3363	&& ( (bits16) ( ( a.high \| b.high )<<1 ) == 0 ) )
				3364	);
				3365
				3366	}
				3367
				3368	/*
				3369	-------------------------------------------------------------------------------
				3370	Returns 1 if the extended double-precision floating-point value `a' is less
				3371	than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
				3372	do not cause an exception. Otherwise, the comparison is performed according
				3373	to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
				3374	-------------------------------------------------------------------------------
				3375	*/
				3376	flag floatx80_le_quiet( floatx80 a, floatx80 b )
				3377	{
				3378	flag aSign, bSign;
				3379
				3380	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				3381	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				3382	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				3383	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				3384	) {
				3385	if ( floatx80_is_signaling_nan( a )
				3386	\|\| floatx80_is_signaling_nan( b ) ) {
				3387	float_raise( float_flag_invalid );
				3388	}
				3389	return 0;
				3390	}
				3391	aSign = extractFloatx80Sign( a );
				3392	bSign = extractFloatx80Sign( b );
				3393	if ( aSign != bSign ) {
				3394	return
				3395	aSign
				3396	\|\| ( ( ( (bits16) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				3397	== 0 );
				3398	}
				3399	return
				3400	aSign ? le128( b.high, b.low, a.high, a.low )
				3401	: le128( a.high, a.low, b.high, b.low );
				3402
				3403	}
				3404
				3405	/*
				3406	-------------------------------------------------------------------------------
				3407	Returns 1 if the extended double-precision floating-point value `a' is less
				3408	than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
				3409	an exception. Otherwise, the comparison is performed according to the
				3410	IEC/IEEE Standard for Binary Floating-point Arithmetic.
				3411	-------------------------------------------------------------------------------
				3412	*/
				3413	flag floatx80_lt_quiet( floatx80 a, floatx80 b )
				3414	{
				3415	flag aSign, bSign;
				3416
				3417	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				3418	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				3419	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				3420	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				3421	) {
				3422	if ( floatx80_is_signaling_nan( a )
				3423	\|\| floatx80_is_signaling_nan( b ) ) {
				3424	float_raise( float_flag_invalid );
				3425	}
				3426	return 0;
				3427	}
				3428	aSign = extractFloatx80Sign( a );
				3429	bSign = extractFloatx80Sign( b );
				3430	if ( aSign != bSign ) {
				3431	return
				3432	aSign
				3433	&& ( ( ( (bits16) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				3434	!= 0 );
				3435	}
				3436	return
				3437	aSign ? lt128( b.high, b.low, a.high, a.low )
				3438	: lt128( a.high, a.low, b.high, b.low );
				3439
				3440	}
				3441
				3442	#endif
				3443