blob: 8b75a6e7cb3accd3023da61a3b0650da15c9e632 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2===============================================================================
3
4This C source file is part of the SoftFloat IEC/IEEE Floating-point
5Arithmetic Package, Release 2.
6
7Written by John R. Hauser. This work was made possible in part by the
8International Computer Science Institute, located at Suite 600, 1947 Center
9Street, Berkeley, California 94704. Funding was partially provided by the
10National Science Foundation under grant MIP-9311980. The original version
11of this code was written as part of a project to build a fixed-point vector
12processor in collaboration with the University of California at Berkeley,
13overseen by Profs. Nelson Morgan and John Wawrzynek. More information
14is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
15arithmetic/softfloat.html'.
16
17THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
18has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
19TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
20PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
21AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
22
23Derivative works are acceptable, even for commercial purposes, so long as
24(1) they include prominent notice that the work is derivative, and (2) they
25include prominent notice akin to these three paragraphs for those parts of
26this code that are retained.
27
28===============================================================================
29*/
30
Nicolas Pitrec1241c4c2005-06-23 21:56:46 +010031#include <asm/div64.h>
32
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include "fpa11.h"
34//#include "milieu.h"
35//#include "softfloat.h"
36
37/*
38-------------------------------------------------------------------------------
Linus Torvalds1da177e2005-04-16 15:20:36 -070039Primitive arithmetic functions, including multi-word arithmetic, and
40division and square root approximations. (Can be specialized to target if
41desired.)
42-------------------------------------------------------------------------------
43*/
44#include "softfloat-macros"
45
46/*
47-------------------------------------------------------------------------------
48Functions and definitions to determine: (1) whether tininess for underflow
49is detected before or after rounding by default, (2) what (if anything)
50happens when exceptions are raised, (3) how signaling NaNs are distinguished
51from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
52are propagated from function inputs to output. These details are target-
53specific.
54-------------------------------------------------------------------------------
55*/
56#include "softfloat-specialize"
57
58/*
59-------------------------------------------------------------------------------
60Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
61and 7, and returns the properly rounded 32-bit integer corresponding to the
62input. If `zSign' is nonzero, the input is negated before being converted
63to an integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point
64input is simply rounded to an integer, with the inexact exception raised if
65the input cannot be represented exactly as an integer. If the fixed-point
66input is too large, however, the invalid exception is raised and the largest
67positive or negative integer is returned.
68-------------------------------------------------------------------------------
69*/
Richard Purdief148af22005-08-03 19:49:17 +010070static int32 roundAndPackInt32( struct roundingData *roundData, flag zSign, bits64 absZ )
Linus Torvalds1da177e2005-04-16 15:20:36 -070071{
72 int8 roundingMode;
73 flag roundNearestEven;
74 int8 roundIncrement, roundBits;
75 int32 z;
76
Richard Purdief148af22005-08-03 19:49:17 +010077 roundingMode = roundData->mode;
Linus Torvalds1da177e2005-04-16 15:20:36 -070078 roundNearestEven = ( roundingMode == float_round_nearest_even );
79 roundIncrement = 0x40;
80 if ( ! roundNearestEven ) {
81 if ( roundingMode == float_round_to_zero ) {
82 roundIncrement = 0;
83 }
84 else {
85 roundIncrement = 0x7F;
86 if ( zSign ) {
87 if ( roundingMode == float_round_up ) roundIncrement = 0;
88 }
89 else {
90 if ( roundingMode == float_round_down ) roundIncrement = 0;
91 }
92 }
93 }
94 roundBits = absZ & 0x7F;
95 absZ = ( absZ + roundIncrement )>>7;
96 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
97 z = absZ;
98 if ( zSign ) z = - z;
99 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
Richard Purdief148af22005-08-03 19:49:17 +0100100 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700101 return zSign ? 0x80000000 : 0x7FFFFFFF;
102 }
Richard Purdief148af22005-08-03 19:49:17 +0100103 if ( roundBits ) roundData->exception |= float_flag_inexact;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104 return z;
105
106}
107
108/*
109-------------------------------------------------------------------------------
110Returns the fraction bits of the single-precision floating-point value `a'.
111-------------------------------------------------------------------------------
112*/
113INLINE bits32 extractFloat32Frac( float32 a )
114{
115
116 return a & 0x007FFFFF;
117
118}
119
120/*
121-------------------------------------------------------------------------------
122Returns the exponent bits of the single-precision floating-point value `a'.
123-------------------------------------------------------------------------------
124*/
125INLINE int16 extractFloat32Exp( float32 a )
126{
127
128 return ( a>>23 ) & 0xFF;
129
130}
131
132/*
133-------------------------------------------------------------------------------
134Returns the sign bit of the single-precision floating-point value `a'.
135-------------------------------------------------------------------------------
136*/
137#if 0 /* in softfloat.h */
138INLINE flag extractFloat32Sign( float32 a )
139{
140
141 return a>>31;
142
143}
144#endif
145
146/*
147-------------------------------------------------------------------------------
148Normalizes the subnormal single-precision floating-point value represented
149by the denormalized significand `aSig'. The normalized exponent and
150significand are stored at the locations pointed to by `zExpPtr' and
151`zSigPtr', respectively.
152-------------------------------------------------------------------------------
153*/
154static void
155 normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
156{
157 int8 shiftCount;
158
159 shiftCount = countLeadingZeros32( aSig ) - 8;
160 *zSigPtr = aSig<<shiftCount;
161 *zExpPtr = 1 - shiftCount;
162
163}
164
165/*
166-------------------------------------------------------------------------------
167Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
168single-precision floating-point value, returning the result. After being
169shifted into the proper positions, the three fields are simply added
170together to form the result. This means that any integer portion of `zSig'
171will be added into the exponent. Since a properly normalized significand
172will have an integer portion equal to 1, the `zExp' input should be 1 less
173than the desired result exponent whenever `zSig' is a complete, normalized
174significand.
175-------------------------------------------------------------------------------
176*/
177INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
178{
179#if 0
180 float32 f;
181 __asm__("@ packFloat32 \n\
182 mov %0, %1, asl #31 \n\
183 orr %0, %2, asl #23 \n\
184 orr %0, %3"
185 : /* no outputs */
186 : "g" (f), "g" (zSign), "g" (zExp), "g" (zSig)
187 : "cc");
188 return f;
189#else
190 return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
191#endif
192}
193
194/*
195-------------------------------------------------------------------------------
196Takes an abstract floating-point value having sign `zSign', exponent `zExp',
197and significand `zSig', and returns the proper single-precision floating-
198point value corresponding to the abstract input. Ordinarily, the abstract
199value is simply rounded and packed into the single-precision format, with
200the inexact exception raised if the abstract input cannot be represented
201exactly. If the abstract value is too large, however, the overflow and
202inexact exceptions are raised and an infinity or maximal finite value is
203returned. If the abstract value is too small, the input value is rounded to
204a subnormal number, and the underflow and inexact exceptions are raised if
205the abstract input cannot be represented exactly as a subnormal single-
206precision floating-point number.
207 The input significand `zSig' has its binary point between bits 30
208and 29, which is 7 bits to the left of the usual location. This shifted
209significand must be normalized or smaller. If `zSig' is not normalized,
210`zExp' must be 0; in that case, the result returned is a subnormal number,
211and it must not require rounding. In the usual case that `zSig' is
212normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
213The handling of underflow and overflow follows the IEC/IEEE Standard for
214Binary Floating-point Arithmetic.
215-------------------------------------------------------------------------------
216*/
Richard Purdief148af22005-08-03 19:49:17 +0100217static float32 roundAndPackFloat32( struct roundingData *roundData, flag zSign, int16 zExp, bits32 zSig )
Linus Torvalds1da177e2005-04-16 15:20:36 -0700218{
219 int8 roundingMode;
220 flag roundNearestEven;
221 int8 roundIncrement, roundBits;
222 flag isTiny;
223
Richard Purdief148af22005-08-03 19:49:17 +0100224 roundingMode = roundData->mode;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225 roundNearestEven = ( roundingMode == float_round_nearest_even );
226 roundIncrement = 0x40;
227 if ( ! roundNearestEven ) {
228 if ( roundingMode == float_round_to_zero ) {
229 roundIncrement = 0;
230 }
231 else {
232 roundIncrement = 0x7F;
233 if ( zSign ) {
234 if ( roundingMode == float_round_up ) roundIncrement = 0;
235 }
236 else {
237 if ( roundingMode == float_round_down ) roundIncrement = 0;
238 }
239 }
240 }
241 roundBits = zSig & 0x7F;
242 if ( 0xFD <= (bits16) zExp ) {
243 if ( ( 0xFD < zExp )
244 || ( ( zExp == 0xFD )
245 && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
246 ) {
Richard Purdief148af22005-08-03 19:49:17 +0100247 roundData->exception |= float_flag_overflow | float_flag_inexact;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248 return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
249 }
250 if ( zExp < 0 ) {
251 isTiny =
252 ( float_detect_tininess == float_tininess_before_rounding )
253 || ( zExp < -1 )
254 || ( zSig + roundIncrement < 0x80000000 );
255 shift32RightJamming( zSig, - zExp, &zSig );
256 zExp = 0;
257 roundBits = zSig & 0x7F;
Richard Purdief148af22005-08-03 19:49:17 +0100258 if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259 }
260 }
Richard Purdief148af22005-08-03 19:49:17 +0100261 if ( roundBits ) roundData->exception |= float_flag_inexact;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700262 zSig = ( zSig + roundIncrement )>>7;
263 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
264 if ( zSig == 0 ) zExp = 0;
265 return packFloat32( zSign, zExp, zSig );
266
267}
268
269/*
270-------------------------------------------------------------------------------
271Takes an abstract floating-point value having sign `zSign', exponent `zExp',
272and significand `zSig', and returns the proper single-precision floating-
273point value corresponding to the abstract input. This routine is just like
274`roundAndPackFloat32' except that `zSig' does not have to be normalized in
275any way. In all cases, `zExp' must be 1 less than the ``true'' floating-
276point exponent.
277-------------------------------------------------------------------------------
278*/
279static float32
Richard Purdief148af22005-08-03 19:49:17 +0100280 normalizeRoundAndPackFloat32( struct roundingData *roundData, flag zSign, int16 zExp, bits32 zSig )
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281{
282 int8 shiftCount;
283
284 shiftCount = countLeadingZeros32( zSig ) - 1;
Richard Purdief148af22005-08-03 19:49:17 +0100285 return roundAndPackFloat32( roundData, zSign, zExp - shiftCount, zSig<<shiftCount );
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286
287}
288
289/*
290-------------------------------------------------------------------------------
291Returns the fraction bits of the double-precision floating-point value `a'.
292-------------------------------------------------------------------------------
293*/
294INLINE bits64 extractFloat64Frac( float64 a )
295{
296
297 return a & LIT64( 0x000FFFFFFFFFFFFF );
298
299}
300
301/*
302-------------------------------------------------------------------------------
303Returns the exponent bits of the double-precision floating-point value `a'.
304-------------------------------------------------------------------------------
305*/
306INLINE int16 extractFloat64Exp( float64 a )
307{
308
309 return ( a>>52 ) & 0x7FF;
310
311}
312
313/*
314-------------------------------------------------------------------------------
315Returns the sign bit of the double-precision floating-point value `a'.
316-------------------------------------------------------------------------------
317*/
318#if 0 /* in softfloat.h */
319INLINE flag extractFloat64Sign( float64 a )
320{
321
322 return a>>63;
323
324}
325#endif
326
327/*
328-------------------------------------------------------------------------------
329Normalizes the subnormal double-precision floating-point value represented
330by the denormalized significand `aSig'. The normalized exponent and
331significand are stored at the locations pointed to by `zExpPtr' and
332`zSigPtr', respectively.
333-------------------------------------------------------------------------------
334*/
335static void
336 normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
337{
338 int8 shiftCount;
339
340 shiftCount = countLeadingZeros64( aSig ) - 11;
341 *zSigPtr = aSig<<shiftCount;
342 *zExpPtr = 1 - shiftCount;
343
344}
345
346/*
347-------------------------------------------------------------------------------
348Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
349double-precision floating-point value, returning the result. After being
350shifted into the proper positions, the three fields are simply added
351together to form the result. This means that any integer portion of `zSig'
352will be added into the exponent. Since a properly normalized significand
353will have an integer portion equal to 1, the `zExp' input should be 1 less
354than the desired result exponent whenever `zSig' is a complete, normalized
355significand.
356-------------------------------------------------------------------------------
357*/
358INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
359{
360
361 return ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig;
362
363}
364
365/*
366-------------------------------------------------------------------------------
367Takes an abstract floating-point value having sign `zSign', exponent `zExp',
368and significand `zSig', and returns the proper double-precision floating-
369point value corresponding to the abstract input. Ordinarily, the abstract
370value is simply rounded and packed into the double-precision format, with
371the inexact exception raised if the abstract input cannot be represented
372exactly. If the abstract value is too large, however, the overflow and
373inexact exceptions are raised and an infinity or maximal finite value is
374returned. If the abstract value is too small, the input value is rounded to
375a subnormal number, and the underflow and inexact exceptions are raised if
376the abstract input cannot be represented exactly as a subnormal double-
377precision floating-point number.
378 The input significand `zSig' has its binary point between bits 62
379and 61, which is 10 bits to the left of the usual location. This shifted
380significand must be normalized or smaller. If `zSig' is not normalized,
381`zExp' must be 0; in that case, the result returned is a subnormal number,
382and it must not require rounding. In the usual case that `zSig' is
383normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
384The handling of underflow and overflow follows the IEC/IEEE Standard for
385Binary Floating-point Arithmetic.
386-------------------------------------------------------------------------------
387*/
Richard Purdief148af22005-08-03 19:49:17 +0100388static float64 roundAndPackFloat64( struct roundingData *roundData, flag zSign, int16 zExp, bits64 zSig )
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389{
390 int8 roundingMode;
391 flag roundNearestEven;
392 int16 roundIncrement, roundBits;
393 flag isTiny;
394
Richard Purdief148af22005-08-03 19:49:17 +0100395 roundingMode = roundData->mode;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396 roundNearestEven = ( roundingMode == float_round_nearest_even );
397 roundIncrement = 0x200;
398 if ( ! roundNearestEven ) {
399 if ( roundingMode == float_round_to_zero ) {
400 roundIncrement = 0;
401 }
402 else {
403 roundIncrement = 0x3FF;
404 if ( zSign ) {
405 if ( roundingMode == float_round_up ) roundIncrement = 0;
406 }
407 else {
408 if ( roundingMode == float_round_down ) roundIncrement = 0;
409 }
410 }
411 }
412 roundBits = zSig & 0x3FF;
413 if ( 0x7FD <= (bits16) zExp ) {
414 if ( ( 0x7FD < zExp )
415 || ( ( zExp == 0x7FD )
416 && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
417 ) {
418 //register int lr = __builtin_return_address(0);
419 //printk("roundAndPackFloat64 called from 0x%08x\n",lr);
Richard Purdief148af22005-08-03 19:49:17 +0100420 roundData->exception |= float_flag_overflow | float_flag_inexact;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421 return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );
422 }
423 if ( zExp < 0 ) {
424 isTiny =
425 ( float_detect_tininess == float_tininess_before_rounding )
426 || ( zExp < -1 )
427 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
428 shift64RightJamming( zSig, - zExp, &zSig );
429 zExp = 0;
430 roundBits = zSig & 0x3FF;
Richard Purdief148af22005-08-03 19:49:17 +0100431 if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432 }
433 }
Richard Purdief148af22005-08-03 19:49:17 +0100434 if ( roundBits ) roundData->exception |= float_flag_inexact;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435 zSig = ( zSig + roundIncrement )>>10;
436 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
437 if ( zSig == 0 ) zExp = 0;
438 return packFloat64( zSign, zExp, zSig );
439
440}
441
442/*
443-------------------------------------------------------------------------------
444Takes an abstract floating-point value having sign `zSign', exponent `zExp',
445and significand `zSig', and returns the proper double-precision floating-
446point value corresponding to the abstract input. This routine is just like
447`roundAndPackFloat64' except that `zSig' does not have to be normalized in
448any way. In all cases, `zExp' must be 1 less than the ``true'' floating-
449point exponent.
450-------------------------------------------------------------------------------
451*/
452static float64
Richard Purdief148af22005-08-03 19:49:17 +0100453 normalizeRoundAndPackFloat64( struct roundingData *roundData, flag zSign, int16 zExp, bits64 zSig )
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454{
455 int8 shiftCount;
456
457 shiftCount = countLeadingZeros64( zSig ) - 1;
Richard Purdief148af22005-08-03 19:49:17 +0100458 return roundAndPackFloat64( roundData, zSign, zExp - shiftCount, zSig<<shiftCount );
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459
460}
461
462#ifdef FLOATX80
463
464/*
465-------------------------------------------------------------------------------
466Returns the fraction bits of the extended double-precision floating-point
467value `a'.
468-------------------------------------------------------------------------------
469*/
470INLINE bits64 extractFloatx80Frac( floatx80 a )
471{
472
473 return a.low;
474
475}
476
477/*
478-------------------------------------------------------------------------------
479Returns the exponent bits of the extended double-precision floating-point
480value `a'.
481-------------------------------------------------------------------------------
482*/
483INLINE int32 extractFloatx80Exp( floatx80 a )
484{
485
486 return a.high & 0x7FFF;
487
488}
489
490/*
491-------------------------------------------------------------------------------
492Returns the sign bit of the extended double-precision floating-point value
493`a'.
494-------------------------------------------------------------------------------
495*/
496INLINE flag extractFloatx80Sign( floatx80 a )
497{
498
499 return a.high>>15;
500
501}
502
503/*
504-------------------------------------------------------------------------------
505Normalizes the subnormal extended double-precision floating-point value
506represented by the denormalized significand `aSig'. The normalized exponent
507and significand are stored at the locations pointed to by `zExpPtr' and
508`zSigPtr', respectively.
509-------------------------------------------------------------------------------
510*/
511static void
512 normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
513{
514 int8 shiftCount;
515
516 shiftCount = countLeadingZeros64( aSig );
517 *zSigPtr = aSig<<shiftCount;
518 *zExpPtr = 1 - shiftCount;
519
520}
521
522/*
523-------------------------------------------------------------------------------
524Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
525extended double-precision floating-point value, returning the result.
526-------------------------------------------------------------------------------
527*/
528INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
529{
530 floatx80 z;
531
532 z.low = zSig;
533 z.high = ( ( (bits16) zSign )<<15 ) + zExp;
534 return z;
535
536}
537
538/*
539-------------------------------------------------------------------------------
540Takes an abstract floating-point value having sign `zSign', exponent `zExp',
541and extended significand formed by the concatenation of `zSig0' and `zSig1',
542and returns the proper extended double-precision floating-point value
543corresponding to the abstract input. Ordinarily, the abstract value is
544rounded and packed into the extended double-precision format, with the
545inexact exception raised if the abstract input cannot be represented
546exactly. If the abstract value is too large, however, the overflow and
547inexact exceptions are raised and an infinity or maximal finite value is
548returned. If the abstract value is too small, the input value is rounded to
549a subnormal number, and the underflow and inexact exceptions are raised if
550the abstract input cannot be represented exactly as a subnormal extended
551double-precision floating-point number.
552 If `roundingPrecision' is 32 or 64, the result is rounded to the same
553number of bits as single or double precision, respectively. Otherwise, the
554result is rounded to the full precision of the extended double-precision
555format.
556 The input significand must be normalized or smaller. If the input
557significand is not normalized, `zExp' must be 0; in that case, the result
558returned is a subnormal number, and it must not require rounding. The
559handling of underflow and overflow follows the IEC/IEEE Standard for Binary
560Floating-point Arithmetic.
561-------------------------------------------------------------------------------
562*/
563static floatx80
564 roundAndPackFloatx80(
Richard Purdief148af22005-08-03 19:49:17 +0100565 struct roundingData *roundData, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566 )
567{
Richard Purdief148af22005-08-03 19:49:17 +0100568 int8 roundingMode, roundingPrecision;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700569 flag roundNearestEven, increment, isTiny;
570 int64 roundIncrement, roundMask, roundBits;
571
Richard Purdief148af22005-08-03 19:49:17 +0100572 roundingMode = roundData->mode;
573 roundingPrecision = roundData->precision;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700574 roundNearestEven = ( roundingMode == float_round_nearest_even );
575 if ( roundingPrecision == 80 ) goto precision80;
576 if ( roundingPrecision == 64 ) {
577 roundIncrement = LIT64( 0x0000000000000400 );
578 roundMask = LIT64( 0x00000000000007FF );
579 }
580 else if ( roundingPrecision == 32 ) {
581 roundIncrement = LIT64( 0x0000008000000000 );
582 roundMask = LIT64( 0x000000FFFFFFFFFF );
583 }
584 else {
585 goto precision80;
586 }
587 zSig0 |= ( zSig1 != 0 );
588 if ( ! roundNearestEven ) {
589 if ( roundingMode == float_round_to_zero ) {
590 roundIncrement = 0;
591 }
592 else {
593 roundIncrement = roundMask;
594 if ( zSign ) {
595 if ( roundingMode == float_round_up ) roundIncrement = 0;
596 }
597 else {
598 if ( roundingMode == float_round_down ) roundIncrement = 0;
599 }
600 }
601 }
602 roundBits = zSig0 & roundMask;
603 if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
604 if ( ( 0x7FFE < zExp )
605 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
606 ) {
607 goto overflow;
608 }
609 if ( zExp <= 0 ) {
610 isTiny =
611 ( float_detect_tininess == float_tininess_before_rounding )
612 || ( zExp < 0 )
613 || ( zSig0 <= zSig0 + roundIncrement );
614 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
615 zExp = 0;
616 roundBits = zSig0 & roundMask;
Richard Purdief148af22005-08-03 19:49:17 +0100617 if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow;
618 if ( roundBits ) roundData->exception |= float_flag_inexact;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619 zSig0 += roundIncrement;
620 if ( (sbits64) zSig0 < 0 ) zExp = 1;
621 roundIncrement = roundMask + 1;
622 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
623 roundMask |= roundIncrement;
624 }
625 zSig0 &= ~ roundMask;
626 return packFloatx80( zSign, zExp, zSig0 );
627 }
628 }
Richard Purdief148af22005-08-03 19:49:17 +0100629 if ( roundBits ) roundData->exception |= float_flag_inexact;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630 zSig0 += roundIncrement;
631 if ( zSig0 < roundIncrement ) {
632 ++zExp;
633 zSig0 = LIT64( 0x8000000000000000 );
634 }
635 roundIncrement = roundMask + 1;
636 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
637 roundMask |= roundIncrement;
638 }
639 zSig0 &= ~ roundMask;
640 if ( zSig0 == 0 ) zExp = 0;
641 return packFloatx80( zSign, zExp, zSig0 );
642 precision80:
643 increment = ( (sbits64) zSig1 < 0 );
644 if ( ! roundNearestEven ) {
645 if ( roundingMode == float_round_to_zero ) {
646 increment = 0;
647 }
648 else {
649 if ( zSign ) {
650 increment = ( roundingMode == float_round_down ) && zSig1;
651 }
652 else {
653 increment = ( roundingMode == float_round_up ) && zSig1;
654 }
655 }
656 }
657 if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
658 if ( ( 0x7FFE < zExp )
659 || ( ( zExp == 0x7FFE )
660 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
661 && increment
662 )
663 ) {
664 roundMask = 0;
665 overflow:
Richard Purdief148af22005-08-03 19:49:17 +0100666 roundData->exception |= float_flag_overflow | float_flag_inexact;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700667 if ( ( roundingMode == float_round_to_zero )
668 || ( zSign && ( roundingMode == float_round_up ) )
669 || ( ! zSign && ( roundingMode == float_round_down ) )
670 ) {
671 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
672 }
673 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
674 }
675 if ( zExp <= 0 ) {
676 isTiny =
677 ( float_detect_tininess == float_tininess_before_rounding )
678 || ( zExp < 0 )
679 || ! increment
680 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
681 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
682 zExp = 0;
Richard Purdief148af22005-08-03 19:49:17 +0100683 if ( isTiny && zSig1 ) roundData->exception |= float_flag_underflow;
684 if ( zSig1 ) roundData->exception |= float_flag_inexact;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685 if ( roundNearestEven ) {
686 increment = ( (sbits64) zSig1 < 0 );
687 }
688 else {
689 if ( zSign ) {
690 increment = ( roundingMode == float_round_down ) && zSig1;
691 }
692 else {
693 increment = ( roundingMode == float_round_up ) && zSig1;
694 }
695 }
696 if ( increment ) {
697 ++zSig0;
698 zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );
699 if ( (sbits64) zSig0 < 0 ) zExp = 1;
700 }
701 return packFloatx80( zSign, zExp, zSig0 );
702 }
703 }
Richard Purdief148af22005-08-03 19:49:17 +0100704 if ( zSig1 ) roundData->exception |= float_flag_inexact;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705 if ( increment ) {
706 ++zSig0;
707 if ( zSig0 == 0 ) {
708 ++zExp;
709 zSig0 = LIT64( 0x8000000000000000 );
710 }
711 else {
712 zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );
713 }
714 }
715 else {
716 if ( zSig0 == 0 ) zExp = 0;
717 }
718
719 return packFloatx80( zSign, zExp, zSig0 );
720}
721
722/*
723-------------------------------------------------------------------------------
724Takes an abstract floating-point value having sign `zSign', exponent
725`zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
726and returns the proper extended double-precision floating-point value
727corresponding to the abstract input. This routine is just like
728`roundAndPackFloatx80' except that the input significand does not have to be
729normalized.
730-------------------------------------------------------------------------------
731*/
732static floatx80
733 normalizeRoundAndPackFloatx80(
Richard Purdief148af22005-08-03 19:49:17 +0100734 struct roundingData *roundData, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735 )
736{
737 int8 shiftCount;
738
739 if ( zSig0 == 0 ) {
740 zSig0 = zSig1;
741 zSig1 = 0;
742 zExp -= 64;
743 }
744 shiftCount = countLeadingZeros64( zSig0 );
745 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
746 zExp -= shiftCount;
747 return
Richard Purdief148af22005-08-03 19:49:17 +0100748 roundAndPackFloatx80( roundData, zSign, zExp, zSig0, zSig1 );
Linus Torvalds1da177e2005-04-16 15:20:36 -0700749
750}
751
752#endif
753
754/*
755-------------------------------------------------------------------------------
756Returns the result of converting the 32-bit two's complement integer `a' to
757the single-precision floating-point format. The conversion is performed
758according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
759-------------------------------------------------------------------------------
760*/
Richard Purdief148af22005-08-03 19:49:17 +0100761float32 int32_to_float32(struct roundingData *roundData, int32 a)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700762{
763 flag zSign;
764
765 if ( a == 0 ) return 0;
766 if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
767 zSign = ( a < 0 );
Richard Purdief148af22005-08-03 19:49:17 +0100768 return normalizeRoundAndPackFloat32( roundData, zSign, 0x9C, zSign ? - a : a );
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769
770}
771
772/*
773-------------------------------------------------------------------------------
774Returns the result of converting the 32-bit two's complement integer `a' to
775the double-precision floating-point format. The conversion is performed
776according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
777-------------------------------------------------------------------------------
778*/
779float64 int32_to_float64( int32 a )
780{
781 flag aSign;
782 uint32 absA;
783 int8 shiftCount;
784 bits64 zSig;
785
786 if ( a == 0 ) return 0;
787 aSign = ( a < 0 );
788 absA = aSign ? - a : a;
789 shiftCount = countLeadingZeros32( absA ) + 21;
790 zSig = absA;
791 return packFloat64( aSign, 0x432 - shiftCount, zSig<<shiftCount );
792
793}
794
795#ifdef FLOATX80
796
797/*
798-------------------------------------------------------------------------------
799Returns the result of converting the 32-bit two's complement integer `a'
800to the extended double-precision floating-point format. The conversion
801is performed according to the IEC/IEEE Standard for Binary Floating-point
802Arithmetic.
803-------------------------------------------------------------------------------
804*/
805floatx80 int32_to_floatx80( int32 a )
806{
807 flag zSign;
808 uint32 absA;
809 int8 shiftCount;
810 bits64 zSig;
811
812 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
813 zSign = ( a < 0 );
814 absA = zSign ? - a : a;
815 shiftCount = countLeadingZeros32( absA ) + 32;
816 zSig = absA;
817 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
818
819}
820
821#endif
822
823/*
824-------------------------------------------------------------------------------
825Returns the result of converting the single-precision floating-point value
826`a' to the 32-bit two's complement integer format. The conversion is
827performed according to the IEC/IEEE Standard for Binary Floating-point
828Arithmetic---which means in particular that the conversion is rounded
829according to the current rounding mode. If `a' is a NaN, the largest
830positive integer is returned. Otherwise, if the conversion overflows, the
831largest integer with the same sign as `a' is returned.
832-------------------------------------------------------------------------------
833*/
Richard Purdief148af22005-08-03 19:49:17 +0100834int32 float32_to_int32( struct roundingData *roundData, float32 a )
Linus Torvalds1da177e2005-04-16 15:20:36 -0700835{
836 flag aSign;
837 int16 aExp, shiftCount;
838 bits32 aSig;
839 bits64 zSig;
840
841 aSig = extractFloat32Frac( a );
842 aExp = extractFloat32Exp( a );
843 aSign = extractFloat32Sign( a );
844 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
845 if ( aExp ) aSig |= 0x00800000;
846 shiftCount = 0xAF - aExp;
847 zSig = aSig;
848 zSig <<= 32;
849 if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig );
Richard Purdief148af22005-08-03 19:49:17 +0100850 return roundAndPackInt32( roundData, aSign, zSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -0700851
852}
853
854/*
855-------------------------------------------------------------------------------
856Returns the result of converting the single-precision floating-point value
857`a' to the 32-bit two's complement integer format. The conversion is
858performed according to the IEC/IEEE Standard for Binary Floating-point
859Arithmetic, except that the conversion is always rounded toward zero. If
860`a' is a NaN, the largest positive integer is returned. Otherwise, if the
861conversion overflows, the largest integer with the same sign as `a' is
862returned.
863-------------------------------------------------------------------------------
864*/
865int32 float32_to_int32_round_to_zero( float32 a )
866{
867 flag aSign;
868 int16 aExp, shiftCount;
869 bits32 aSig;
870 int32 z;
871
872 aSig = extractFloat32Frac( a );
873 aExp = extractFloat32Exp( a );
874 aSign = extractFloat32Sign( a );
875 shiftCount = aExp - 0x9E;
876 if ( 0 <= shiftCount ) {
877 if ( a == 0xCF000000 ) return 0x80000000;
878 float_raise( float_flag_invalid );
879 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
880 return 0x80000000;
881 }
882 else if ( aExp <= 0x7E ) {
Richard Purdief148af22005-08-03 19:49:17 +0100883 if ( aExp | aSig ) float_raise( float_flag_inexact );
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884 return 0;
885 }
886 aSig = ( aSig | 0x00800000 )<<8;
887 z = aSig>>( - shiftCount );
888 if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
Richard Purdief148af22005-08-03 19:49:17 +0100889 float_raise( float_flag_inexact );
Linus Torvalds1da177e2005-04-16 15:20:36 -0700890 }
891 return aSign ? - z : z;
892
893}
894
895/*
896-------------------------------------------------------------------------------
897Returns the result of converting the single-precision floating-point value
898`a' to the double-precision floating-point format. The conversion is
899performed according to the IEC/IEEE Standard for Binary Floating-point
900Arithmetic.
901-------------------------------------------------------------------------------
902*/
903float64 float32_to_float64( float32 a )
904{
905 flag aSign;
906 int16 aExp;
907 bits32 aSig;
908
909 aSig = extractFloat32Frac( a );
910 aExp = extractFloat32Exp( a );
911 aSign = extractFloat32Sign( a );
912 if ( aExp == 0xFF ) {
913 if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
914 return packFloat64( aSign, 0x7FF, 0 );
915 }
916 if ( aExp == 0 ) {
917 if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
918 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
919 --aExp;
920 }
921 return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
922
923}
924
925#ifdef FLOATX80
926
927/*
928-------------------------------------------------------------------------------
929Returns the result of converting the single-precision floating-point value
930`a' to the extended double-precision floating-point format. The conversion
931is performed according to the IEC/IEEE Standard for Binary Floating-point
932Arithmetic.
933-------------------------------------------------------------------------------
934*/
935floatx80 float32_to_floatx80( float32 a )
936{
937 flag aSign;
938 int16 aExp;
939 bits32 aSig;
940
941 aSig = extractFloat32Frac( a );
942 aExp = extractFloat32Exp( a );
943 aSign = extractFloat32Sign( a );
944 if ( aExp == 0xFF ) {
945 if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
946 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
947 }
948 if ( aExp == 0 ) {
949 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
950 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
951 }
952 aSig |= 0x00800000;
953 return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
954
955}
956
957#endif
958
959/*
960-------------------------------------------------------------------------------
961Rounds the single-precision floating-point value `a' to an integer, and
962returns the result as a single-precision floating-point value. The
963operation is performed according to the IEC/IEEE Standard for Binary
964Floating-point Arithmetic.
965-------------------------------------------------------------------------------
966*/
Richard Purdief148af22005-08-03 19:49:17 +0100967float32 float32_round_to_int( struct roundingData *roundData, float32 a )
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968{
969 flag aSign;
970 int16 aExp;
971 bits32 lastBitMask, roundBitsMask;
972 int8 roundingMode;
973 float32 z;
974
975 aExp = extractFloat32Exp( a );
976 if ( 0x96 <= aExp ) {
977 if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
978 return propagateFloat32NaN( a, a );
979 }
980 return a;
981 }
Richard Purdief148af22005-08-03 19:49:17 +0100982 roundingMode = roundData->mode;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700983 if ( aExp <= 0x7E ) {
984 if ( (bits32) ( a<<1 ) == 0 ) return a;
Richard Purdief148af22005-08-03 19:49:17 +0100985 roundData->exception |= float_flag_inexact;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986 aSign = extractFloat32Sign( a );
Richard Purdief148af22005-08-03 19:49:17 +0100987 switch ( roundingMode ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700988 case float_round_nearest_even:
989 if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
990 return packFloat32( aSign, 0x7F, 0 );
991 }
992 break;
993 case float_round_down:
994 return aSign ? 0xBF800000 : 0;
995 case float_round_up:
996 return aSign ? 0x80000000 : 0x3F800000;
997 }
998 return packFloat32( aSign, 0, 0 );
999 }
1000 lastBitMask = 1;
1001 lastBitMask <<= 0x96 - aExp;
1002 roundBitsMask = lastBitMask - 1;
1003 z = a;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004 if ( roundingMode == float_round_nearest_even ) {
1005 z += lastBitMask>>1;
1006 if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1007 }
1008 else if ( roundingMode != float_round_to_zero ) {
1009 if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
1010 z += roundBitsMask;
1011 }
1012 }
1013 z &= ~ roundBitsMask;
Richard Purdief148af22005-08-03 19:49:17 +01001014 if ( z != a ) roundData->exception |= float_flag_inexact;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001015 return z;
1016
1017}
1018
1019/*
1020-------------------------------------------------------------------------------
1021Returns the result of adding the absolute values of the single-precision
1022floating-point values `a' and `b'. If `zSign' is true, the sum is negated
1023before being returned. `zSign' is ignored if the result is a NaN. The
1024addition is performed according to the IEC/IEEE Standard for Binary
1025Floating-point Arithmetic.
1026-------------------------------------------------------------------------------
1027*/
Richard Purdief148af22005-08-03 19:49:17 +01001028static float32 addFloat32Sigs( struct roundingData *roundData, float32 a, float32 b, flag zSign )
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029{
1030 int16 aExp, bExp, zExp;
1031 bits32 aSig, bSig, zSig;
1032 int16 expDiff;
1033
1034 aSig = extractFloat32Frac( a );
1035 aExp = extractFloat32Exp( a );
1036 bSig = extractFloat32Frac( b );
1037 bExp = extractFloat32Exp( b );
1038 expDiff = aExp - bExp;
1039 aSig <<= 6;
1040 bSig <<= 6;
1041 if ( 0 < expDiff ) {
1042 if ( aExp == 0xFF ) {
1043 if ( aSig ) return propagateFloat32NaN( a, b );
1044 return a;
1045 }
1046 if ( bExp == 0 ) {
1047 --expDiff;
1048 }
1049 else {
1050 bSig |= 0x20000000;
1051 }
1052 shift32RightJamming( bSig, expDiff, &bSig );
1053 zExp = aExp;
1054 }
1055 else if ( expDiff < 0 ) {
1056 if ( bExp == 0xFF ) {
1057 if ( bSig ) return propagateFloat32NaN( a, b );
1058 return packFloat32( zSign, 0xFF, 0 );
1059 }
1060 if ( aExp == 0 ) {
1061 ++expDiff;
1062 }
1063 else {
1064 aSig |= 0x20000000;
1065 }
1066 shift32RightJamming( aSig, - expDiff, &aSig );
1067 zExp = bExp;
1068 }
1069 else {
1070 if ( aExp == 0xFF ) {
1071 if ( aSig | bSig ) return propagateFloat32NaN( a, b );
1072 return a;
1073 }
1074 if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
1075 zSig = 0x40000000 + aSig + bSig;
1076 zExp = aExp;
1077 goto roundAndPack;
1078 }
1079 aSig |= 0x20000000;
1080 zSig = ( aSig + bSig )<<1;
1081 --zExp;
1082 if ( (sbits32) zSig < 0 ) {
1083 zSig = aSig + bSig;
1084 ++zExp;
1085 }
1086 roundAndPack:
Richard Purdief148af22005-08-03 19:49:17 +01001087 return roundAndPackFloat32( roundData, zSign, zExp, zSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001088
1089}
1090
1091/*
1092-------------------------------------------------------------------------------
1093Returns the result of subtracting the absolute values of the single-
1094precision floating-point values `a' and `b'. If `zSign' is true, the
1095difference is negated before being returned. `zSign' is ignored if the
1096result is a NaN. The subtraction is performed according to the IEC/IEEE
1097Standard for Binary Floating-point Arithmetic.
1098-------------------------------------------------------------------------------
1099*/
Richard Purdief148af22005-08-03 19:49:17 +01001100static float32 subFloat32Sigs( struct roundingData *roundData, float32 a, float32 b, flag zSign )
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101{
1102 int16 aExp, bExp, zExp;
1103 bits32 aSig, bSig, zSig;
1104 int16 expDiff;
1105
1106 aSig = extractFloat32Frac( a );
1107 aExp = extractFloat32Exp( a );
1108 bSig = extractFloat32Frac( b );
1109 bExp = extractFloat32Exp( b );
1110 expDiff = aExp - bExp;
1111 aSig <<= 7;
1112 bSig <<= 7;
1113 if ( 0 < expDiff ) goto aExpBigger;
1114 if ( expDiff < 0 ) goto bExpBigger;
1115 if ( aExp == 0xFF ) {
1116 if ( aSig | bSig ) return propagateFloat32NaN( a, b );
Richard Purdief148af22005-08-03 19:49:17 +01001117 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001118 return float32_default_nan;
1119 }
1120 if ( aExp == 0 ) {
1121 aExp = 1;
1122 bExp = 1;
1123 }
1124 if ( bSig < aSig ) goto aBigger;
1125 if ( aSig < bSig ) goto bBigger;
Richard Purdief148af22005-08-03 19:49:17 +01001126 return packFloat32( roundData->mode == float_round_down, 0, 0 );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001127 bExpBigger:
1128 if ( bExp == 0xFF ) {
1129 if ( bSig ) return propagateFloat32NaN( a, b );
1130 return packFloat32( zSign ^ 1, 0xFF, 0 );
1131 }
1132 if ( aExp == 0 ) {
1133 ++expDiff;
1134 }
1135 else {
1136 aSig |= 0x40000000;
1137 }
1138 shift32RightJamming( aSig, - expDiff, &aSig );
1139 bSig |= 0x40000000;
1140 bBigger:
1141 zSig = bSig - aSig;
1142 zExp = bExp;
1143 zSign ^= 1;
1144 goto normalizeRoundAndPack;
1145 aExpBigger:
1146 if ( aExp == 0xFF ) {
1147 if ( aSig ) return propagateFloat32NaN( a, b );
1148 return a;
1149 }
1150 if ( bExp == 0 ) {
1151 --expDiff;
1152 }
1153 else {
1154 bSig |= 0x40000000;
1155 }
1156 shift32RightJamming( bSig, expDiff, &bSig );
1157 aSig |= 0x40000000;
1158 aBigger:
1159 zSig = aSig - bSig;
1160 zExp = aExp;
1161 normalizeRoundAndPack:
1162 --zExp;
Richard Purdief148af22005-08-03 19:49:17 +01001163 return normalizeRoundAndPackFloat32( roundData, zSign, zExp, zSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001164
1165}
1166
1167/*
1168-------------------------------------------------------------------------------
1169Returns the result of adding the single-precision floating-point values `a'
1170and `b'. The operation is performed according to the IEC/IEEE Standard for
1171Binary Floating-point Arithmetic.
1172-------------------------------------------------------------------------------
1173*/
Richard Purdief148af22005-08-03 19:49:17 +01001174float32 float32_add( struct roundingData *roundData, float32 a, float32 b )
Linus Torvalds1da177e2005-04-16 15:20:36 -07001175{
1176 flag aSign, bSign;
1177
1178 aSign = extractFloat32Sign( a );
1179 bSign = extractFloat32Sign( b );
1180 if ( aSign == bSign ) {
Richard Purdief148af22005-08-03 19:49:17 +01001181 return addFloat32Sigs( roundData, a, b, aSign );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001182 }
1183 else {
Richard Purdief148af22005-08-03 19:49:17 +01001184 return subFloat32Sigs( roundData, a, b, aSign );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001185 }
1186
1187}
1188
1189/*
1190-------------------------------------------------------------------------------
1191Returns the result of subtracting the single-precision floating-point values
1192`a' and `b'. The operation is performed according to the IEC/IEEE Standard
1193for Binary Floating-point Arithmetic.
1194-------------------------------------------------------------------------------
1195*/
Richard Purdief148af22005-08-03 19:49:17 +01001196float32 float32_sub( struct roundingData *roundData, float32 a, float32 b )
Linus Torvalds1da177e2005-04-16 15:20:36 -07001197{
1198 flag aSign, bSign;
1199
1200 aSign = extractFloat32Sign( a );
1201 bSign = extractFloat32Sign( b );
1202 if ( aSign == bSign ) {
Richard Purdief148af22005-08-03 19:49:17 +01001203 return subFloat32Sigs( roundData, a, b, aSign );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001204 }
1205 else {
Richard Purdief148af22005-08-03 19:49:17 +01001206 return addFloat32Sigs( roundData, a, b, aSign );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001207 }
1208
1209}
1210
1211/*
1212-------------------------------------------------------------------------------
1213Returns the result of multiplying the single-precision floating-point values
1214`a' and `b'. The operation is performed according to the IEC/IEEE Standard
1215for Binary Floating-point Arithmetic.
1216-------------------------------------------------------------------------------
1217*/
Richard Purdief148af22005-08-03 19:49:17 +01001218float32 float32_mul( struct roundingData *roundData, float32 a, float32 b )
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219{
1220 flag aSign, bSign, zSign;
1221 int16 aExp, bExp, zExp;
1222 bits32 aSig, bSig;
1223 bits64 zSig64;
1224 bits32 zSig;
1225
1226 aSig = extractFloat32Frac( a );
1227 aExp = extractFloat32Exp( a );
1228 aSign = extractFloat32Sign( a );
1229 bSig = extractFloat32Frac( b );
1230 bExp = extractFloat32Exp( b );
1231 bSign = extractFloat32Sign( b );
1232 zSign = aSign ^ bSign;
1233 if ( aExp == 0xFF ) {
1234 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1235 return propagateFloat32NaN( a, b );
1236 }
1237 if ( ( bExp | bSig ) == 0 ) {
Richard Purdief148af22005-08-03 19:49:17 +01001238 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001239 return float32_default_nan;
1240 }
1241 return packFloat32( zSign, 0xFF, 0 );
1242 }
1243 if ( bExp == 0xFF ) {
1244 if ( bSig ) return propagateFloat32NaN( a, b );
1245 if ( ( aExp | aSig ) == 0 ) {
Richard Purdief148af22005-08-03 19:49:17 +01001246 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001247 return float32_default_nan;
1248 }
1249 return packFloat32( zSign, 0xFF, 0 );
1250 }
1251 if ( aExp == 0 ) {
1252 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1253 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1254 }
1255 if ( bExp == 0 ) {
1256 if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1257 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1258 }
1259 zExp = aExp + bExp - 0x7F;
1260 aSig = ( aSig | 0x00800000 )<<7;
1261 bSig = ( bSig | 0x00800000 )<<8;
1262 shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
1263 zSig = zSig64;
1264 if ( 0 <= (sbits32) ( zSig<<1 ) ) {
1265 zSig <<= 1;
1266 --zExp;
1267 }
Richard Purdief148af22005-08-03 19:49:17 +01001268 return roundAndPackFloat32( roundData, zSign, zExp, zSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001269
1270}
1271
1272/*
1273-------------------------------------------------------------------------------
1274Returns the result of dividing the single-precision floating-point value `a'
1275by the corresponding value `b'. The operation is performed according to the
1276IEC/IEEE Standard for Binary Floating-point Arithmetic.
1277-------------------------------------------------------------------------------
1278*/
Richard Purdief148af22005-08-03 19:49:17 +01001279float32 float32_div( struct roundingData *roundData, float32 a, float32 b )
Linus Torvalds1da177e2005-04-16 15:20:36 -07001280{
1281 flag aSign, bSign, zSign;
1282 int16 aExp, bExp, zExp;
1283 bits32 aSig, bSig, zSig;
1284
1285 aSig = extractFloat32Frac( a );
1286 aExp = extractFloat32Exp( a );
1287 aSign = extractFloat32Sign( a );
1288 bSig = extractFloat32Frac( b );
1289 bExp = extractFloat32Exp( b );
1290 bSign = extractFloat32Sign( b );
1291 zSign = aSign ^ bSign;
1292 if ( aExp == 0xFF ) {
1293 if ( aSig ) return propagateFloat32NaN( a, b );
1294 if ( bExp == 0xFF ) {
1295 if ( bSig ) return propagateFloat32NaN( a, b );
Richard Purdief148af22005-08-03 19:49:17 +01001296 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001297 return float32_default_nan;
1298 }
1299 return packFloat32( zSign, 0xFF, 0 );
1300 }
1301 if ( bExp == 0xFF ) {
1302 if ( bSig ) return propagateFloat32NaN( a, b );
1303 return packFloat32( zSign, 0, 0 );
1304 }
1305 if ( bExp == 0 ) {
1306 if ( bSig == 0 ) {
1307 if ( ( aExp | aSig ) == 0 ) {
Richard Purdief148af22005-08-03 19:49:17 +01001308 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001309 return float32_default_nan;
1310 }
Richard Purdief148af22005-08-03 19:49:17 +01001311 roundData->exception |= float_flag_divbyzero;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001312 return packFloat32( zSign, 0xFF, 0 );
1313 }
1314 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1315 }
1316 if ( aExp == 0 ) {
1317 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1318 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1319 }
1320 zExp = aExp - bExp + 0x7D;
1321 aSig = ( aSig | 0x00800000 )<<7;
1322 bSig = ( bSig | 0x00800000 )<<8;
1323 if ( bSig <= ( aSig + aSig ) ) {
1324 aSig >>= 1;
1325 ++zExp;
1326 }
Nicolas Pitrec1241c4c2005-06-23 21:56:46 +01001327 {
1328 bits64 tmp = ( (bits64) aSig )<<32;
1329 do_div( tmp, bSig );
1330 zSig = tmp;
1331 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332 if ( ( zSig & 0x3F ) == 0 ) {
1333 zSig |= ( ( (bits64) bSig ) * zSig != ( (bits64) aSig )<<32 );
1334 }
Richard Purdief148af22005-08-03 19:49:17 +01001335 return roundAndPackFloat32( roundData, zSign, zExp, zSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001336
1337}
1338
1339/*
1340-------------------------------------------------------------------------------
1341Returns the remainder of the single-precision floating-point value `a'
1342with respect to the corresponding value `b'. The operation is performed
1343according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
1344-------------------------------------------------------------------------------
1345*/
Richard Purdief148af22005-08-03 19:49:17 +01001346float32 float32_rem( struct roundingData *roundData, float32 a, float32 b )
Linus Torvalds1da177e2005-04-16 15:20:36 -07001347{
1348 flag aSign, bSign, zSign;
1349 int16 aExp, bExp, expDiff;
1350 bits32 aSig, bSig;
1351 bits32 q;
1352 bits64 aSig64, bSig64, q64;
1353 bits32 alternateASig;
1354 sbits32 sigMean;
1355
1356 aSig = extractFloat32Frac( a );
1357 aExp = extractFloat32Exp( a );
1358 aSign = extractFloat32Sign( a );
1359 bSig = extractFloat32Frac( b );
1360 bExp = extractFloat32Exp( b );
1361 bSign = extractFloat32Sign( b );
1362 if ( aExp == 0xFF ) {
1363 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1364 return propagateFloat32NaN( a, b );
1365 }
Richard Purdief148af22005-08-03 19:49:17 +01001366 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001367 return float32_default_nan;
1368 }
1369 if ( bExp == 0xFF ) {
1370 if ( bSig ) return propagateFloat32NaN( a, b );
1371 return a;
1372 }
1373 if ( bExp == 0 ) {
1374 if ( bSig == 0 ) {
Richard Purdief148af22005-08-03 19:49:17 +01001375 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001376 return float32_default_nan;
1377 }
1378 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1379 }
1380 if ( aExp == 0 ) {
1381 if ( aSig == 0 ) return a;
1382 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1383 }
1384 expDiff = aExp - bExp;
1385 aSig |= 0x00800000;
1386 bSig |= 0x00800000;
1387 if ( expDiff < 32 ) {
1388 aSig <<= 8;
1389 bSig <<= 8;
1390 if ( expDiff < 0 ) {
1391 if ( expDiff < -1 ) return a;
1392 aSig >>= 1;
1393 }
1394 q = ( bSig <= aSig );
1395 if ( q ) aSig -= bSig;
1396 if ( 0 < expDiff ) {
Nicolas Pitrec1241c4c2005-06-23 21:56:46 +01001397 bits64 tmp = ( (bits64) aSig )<<32;
1398 do_div( tmp, bSig );
1399 q = tmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001400 q >>= 32 - expDiff;
1401 bSig >>= 2;
1402 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
1403 }
1404 else {
1405 aSig >>= 2;
1406 bSig >>= 2;
1407 }
1408 }
1409 else {
1410 if ( bSig <= aSig ) aSig -= bSig;
1411 aSig64 = ( (bits64) aSig )<<40;
1412 bSig64 = ( (bits64) bSig )<<40;
1413 expDiff -= 64;
1414 while ( 0 < expDiff ) {
1415 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
1416 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
1417 aSig64 = - ( ( bSig * q64 )<<38 );
1418 expDiff -= 62;
1419 }
1420 expDiff += 64;
1421 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
1422 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
1423 q = q64>>( 64 - expDiff );
1424 bSig <<= 6;
1425 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
1426 }
1427 do {
1428 alternateASig = aSig;
1429 ++q;
1430 aSig -= bSig;
1431 } while ( 0 <= (sbits32) aSig );
1432 sigMean = aSig + alternateASig;
1433 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
1434 aSig = alternateASig;
1435 }
1436 zSign = ( (sbits32) aSig < 0 );
1437 if ( zSign ) aSig = - aSig;
Richard Purdief148af22005-08-03 19:49:17 +01001438 return normalizeRoundAndPackFloat32( roundData, aSign ^ zSign, bExp, aSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001439
1440}
1441
1442/*
1443-------------------------------------------------------------------------------
1444Returns the square root of the single-precision floating-point value `a'.
1445The operation is performed according to the IEC/IEEE Standard for Binary
1446Floating-point Arithmetic.
1447-------------------------------------------------------------------------------
1448*/
Richard Purdief148af22005-08-03 19:49:17 +01001449float32 float32_sqrt( struct roundingData *roundData, float32 a )
Linus Torvalds1da177e2005-04-16 15:20:36 -07001450{
1451 flag aSign;
1452 int16 aExp, zExp;
1453 bits32 aSig, zSig;
1454 bits64 rem, term;
1455
1456 aSig = extractFloat32Frac( a );
1457 aExp = extractFloat32Exp( a );
1458 aSign = extractFloat32Sign( a );
1459 if ( aExp == 0xFF ) {
1460 if ( aSig ) return propagateFloat32NaN( a, 0 );
1461 if ( ! aSign ) return a;
Richard Purdief148af22005-08-03 19:49:17 +01001462 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001463 return float32_default_nan;
1464 }
1465 if ( aSign ) {
1466 if ( ( aExp | aSig ) == 0 ) return a;
Richard Purdief148af22005-08-03 19:49:17 +01001467 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001468 return float32_default_nan;
1469 }
1470 if ( aExp == 0 ) {
1471 if ( aSig == 0 ) return 0;
1472 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1473 }
1474 zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
1475 aSig = ( aSig | 0x00800000 )<<8;
1476 zSig = estimateSqrt32( aExp, aSig ) + 2;
1477 if ( ( zSig & 0x7F ) <= 5 ) {
1478 if ( zSig < 2 ) {
1479 zSig = 0xFFFFFFFF;
1480 }
1481 else {
1482 aSig >>= aExp & 1;
1483 term = ( (bits64) zSig ) * zSig;
1484 rem = ( ( (bits64) aSig )<<32 ) - term;
1485 while ( (sbits64) rem < 0 ) {
1486 --zSig;
1487 rem += ( ( (bits64) zSig )<<1 ) | 1;
1488 }
1489 zSig |= ( rem != 0 );
1490 }
1491 }
1492 shift32RightJamming( zSig, 1, &zSig );
Richard Purdief148af22005-08-03 19:49:17 +01001493 return roundAndPackFloat32( roundData, 0, zExp, zSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001494
1495}
1496
1497/*
1498-------------------------------------------------------------------------------
1499Returns 1 if the single-precision floating-point value `a' is equal to the
1500corresponding value `b', and 0 otherwise. The comparison is performed
1501according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
1502-------------------------------------------------------------------------------
1503*/
1504flag float32_eq( float32 a, float32 b )
1505{
1506
1507 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1508 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1509 ) {
1510 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1511 float_raise( float_flag_invalid );
1512 }
1513 return 0;
1514 }
1515 return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1516
1517}
1518
1519/*
1520-------------------------------------------------------------------------------
1521Returns 1 if the single-precision floating-point value `a' is less than or
1522equal to the corresponding value `b', and 0 otherwise. The comparison is
1523performed according to the IEC/IEEE Standard for Binary Floating-point
1524Arithmetic.
1525-------------------------------------------------------------------------------
1526*/
1527flag float32_le( float32 a, float32 b )
1528{
1529 flag aSign, bSign;
1530
1531 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1532 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1533 ) {
1534 float_raise( float_flag_invalid );
1535 return 0;
1536 }
1537 aSign = extractFloat32Sign( a );
1538 bSign = extractFloat32Sign( b );
1539 if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1540 return ( a == b ) || ( aSign ^ ( a < b ) );
1541
1542}
1543
1544/*
1545-------------------------------------------------------------------------------
1546Returns 1 if the single-precision floating-point value `a' is less than
1547the corresponding value `b', and 0 otherwise. The comparison is performed
1548according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
1549-------------------------------------------------------------------------------
1550*/
1551flag float32_lt( float32 a, float32 b )
1552{
1553 flag aSign, bSign;
1554
1555 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1556 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1557 ) {
1558 float_raise( float_flag_invalid );
1559 return 0;
1560 }
1561 aSign = extractFloat32Sign( a );
1562 bSign = extractFloat32Sign( b );
1563 if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1564 return ( a != b ) && ( aSign ^ ( a < b ) );
1565
1566}
1567
1568/*
1569-------------------------------------------------------------------------------
1570Returns 1 if the single-precision floating-point value `a' is equal to the
1571corresponding value `b', and 0 otherwise. The invalid exception is raised
1572if either operand is a NaN. Otherwise, the comparison is performed
1573according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
1574-------------------------------------------------------------------------------
1575*/
1576flag float32_eq_signaling( float32 a, float32 b )
1577{
1578
1579 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1580 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1581 ) {
1582 float_raise( float_flag_invalid );
1583 return 0;
1584 }
1585 return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1586
1587}
1588
1589/*
1590-------------------------------------------------------------------------------
1591Returns 1 if the single-precision floating-point value `a' is less than or
1592equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
1593cause an exception. Otherwise, the comparison is performed according to the
1594IEC/IEEE Standard for Binary Floating-point Arithmetic.
1595-------------------------------------------------------------------------------
1596*/
1597flag float32_le_quiet( float32 a, float32 b )
1598{
1599 flag aSign, bSign;
1600 //int16 aExp, bExp;
1601
1602 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1603 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1604 ) {
1605 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1606 float_raise( float_flag_invalid );
1607 }
1608 return 0;
1609 }
1610 aSign = extractFloat32Sign( a );
1611 bSign = extractFloat32Sign( b );
1612 if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1613 return ( a == b ) || ( aSign ^ ( a < b ) );
1614
1615}
1616
1617/*
1618-------------------------------------------------------------------------------
1619Returns 1 if the single-precision floating-point value `a' is less than
1620the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
1621exception. Otherwise, the comparison is performed according to the IEC/IEEE
1622Standard for Binary Floating-point Arithmetic.
1623-------------------------------------------------------------------------------
1624*/
1625flag float32_lt_quiet( float32 a, float32 b )
1626{
1627 flag aSign, bSign;
1628
1629 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1630 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1631 ) {
1632 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1633 float_raise( float_flag_invalid );
1634 }
1635 return 0;
1636 }
1637 aSign = extractFloat32Sign( a );
1638 bSign = extractFloat32Sign( b );
1639 if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1640 return ( a != b ) && ( aSign ^ ( a < b ) );
1641
1642}
1643
1644/*
1645-------------------------------------------------------------------------------
1646Returns the result of converting the double-precision floating-point value
1647`a' to the 32-bit two's complement integer format. The conversion is
1648performed according to the IEC/IEEE Standard for Binary Floating-point
1649Arithmetic---which means in particular that the conversion is rounded
1650according to the current rounding mode. If `a' is a NaN, the largest
1651positive integer is returned. Otherwise, if the conversion overflows, the
1652largest integer with the same sign as `a' is returned.
1653-------------------------------------------------------------------------------
1654*/
Richard Purdief148af22005-08-03 19:49:17 +01001655int32 float64_to_int32( struct roundingData *roundData, float64 a )
Linus Torvalds1da177e2005-04-16 15:20:36 -07001656{
1657 flag aSign;
1658 int16 aExp, shiftCount;
1659 bits64 aSig;
1660
1661 aSig = extractFloat64Frac( a );
1662 aExp = extractFloat64Exp( a );
1663 aSign = extractFloat64Sign( a );
1664 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
1665 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
1666 shiftCount = 0x42C - aExp;
1667 if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
Richard Purdief148af22005-08-03 19:49:17 +01001668 return roundAndPackInt32( roundData, aSign, aSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001669
1670}
1671
1672/*
1673-------------------------------------------------------------------------------
1674Returns the result of converting the double-precision floating-point value
1675`a' to the 32-bit two's complement integer format. The conversion is
1676performed according to the IEC/IEEE Standard for Binary Floating-point
1677Arithmetic, except that the conversion is always rounded toward zero. If
1678`a' is a NaN, the largest positive integer is returned. Otherwise, if the
1679conversion overflows, the largest integer with the same sign as `a' is
1680returned.
1681-------------------------------------------------------------------------------
1682*/
1683int32 float64_to_int32_round_to_zero( float64 a )
1684{
1685 flag aSign;
1686 int16 aExp, shiftCount;
1687 bits64 aSig, savedASig;
1688 int32 z;
1689
1690 aSig = extractFloat64Frac( a );
1691 aExp = extractFloat64Exp( a );
1692 aSign = extractFloat64Sign( a );
1693 shiftCount = 0x433 - aExp;
1694 if ( shiftCount < 21 ) {
1695 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
1696 goto invalid;
1697 }
1698 else if ( 52 < shiftCount ) {
Richard Purdief148af22005-08-03 19:49:17 +01001699 if ( aExp || aSig ) float_raise( float_flag_inexact );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001700 return 0;
1701 }
1702 aSig |= LIT64( 0x0010000000000000 );
1703 savedASig = aSig;
1704 aSig >>= shiftCount;
1705 z = aSig;
1706 if ( aSign ) z = - z;
1707 if ( ( z < 0 ) ^ aSign ) {
1708 invalid:
Richard Purdief148af22005-08-03 19:49:17 +01001709 float_raise( float_flag_invalid );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001710 return aSign ? 0x80000000 : 0x7FFFFFFF;
1711 }
1712 if ( ( aSig<<shiftCount ) != savedASig ) {
Richard Purdief148af22005-08-03 19:49:17 +01001713 float_raise( float_flag_inexact );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001714 }
1715 return z;
1716
1717}
1718
1719/*
1720-------------------------------------------------------------------------------
1721Returns the result of converting the double-precision floating-point value
1722`a' to the 32-bit two's complement unsigned integer format. The conversion
1723is performed according to the IEC/IEEE Standard for Binary Floating-point
1724Arithmetic---which means in particular that the conversion is rounded
1725according to the current rounding mode. If `a' is a NaN, the largest
1726positive integer is returned. Otherwise, if the conversion overflows, the
1727largest positive integer is returned.
1728-------------------------------------------------------------------------------
1729*/
Richard Purdief148af22005-08-03 19:49:17 +01001730int32 float64_to_uint32( struct roundingData *roundData, float64 a )
Linus Torvalds1da177e2005-04-16 15:20:36 -07001731{
1732 flag aSign;
1733 int16 aExp, shiftCount;
1734 bits64 aSig;
1735
1736 aSig = extractFloat64Frac( a );
1737 aExp = extractFloat64Exp( a );
1738 aSign = 0; //extractFloat64Sign( a );
1739 //if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
1740 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
1741 shiftCount = 0x42C - aExp;
1742 if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
Richard Purdief148af22005-08-03 19:49:17 +01001743 return roundAndPackInt32( roundData, aSign, aSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001744}
1745
1746/*
1747-------------------------------------------------------------------------------
1748Returns the result of converting the double-precision floating-point value
1749`a' to the 32-bit two's complement integer format. The conversion is
1750performed according to the IEC/IEEE Standard for Binary Floating-point
1751Arithmetic, except that the conversion is always rounded toward zero. If
1752`a' is a NaN, the largest positive integer is returned. Otherwise, if the
1753conversion overflows, the largest positive integer is returned.
1754-------------------------------------------------------------------------------
1755*/
1756int32 float64_to_uint32_round_to_zero( float64 a )
1757{
1758 flag aSign;
1759 int16 aExp, shiftCount;
1760 bits64 aSig, savedASig;
1761 int32 z;
1762
1763 aSig = extractFloat64Frac( a );
1764 aExp = extractFloat64Exp( a );
1765 aSign = extractFloat64Sign( a );
1766 shiftCount = 0x433 - aExp;
1767 if ( shiftCount < 21 ) {
1768 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
1769 goto invalid;
1770 }
1771 else if ( 52 < shiftCount ) {
Richard Purdief148af22005-08-03 19:49:17 +01001772 if ( aExp || aSig ) float_raise( float_flag_inexact );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001773 return 0;
1774 }
1775 aSig |= LIT64( 0x0010000000000000 );
1776 savedASig = aSig;
1777 aSig >>= shiftCount;
1778 z = aSig;
1779 if ( aSign ) z = - z;
1780 if ( ( z < 0 ) ^ aSign ) {
1781 invalid:
Richard Purdief148af22005-08-03 19:49:17 +01001782 float_raise( float_flag_invalid );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001783 return aSign ? 0x80000000 : 0x7FFFFFFF;
1784 }
1785 if ( ( aSig<<shiftCount ) != savedASig ) {
Richard Purdief148af22005-08-03 19:49:17 +01001786 float_raise( float_flag_inexact );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001787 }
1788 return z;
1789}
1790
1791/*
1792-------------------------------------------------------------------------------
1793Returns the result of converting the double-precision floating-point value
1794`a' to the single-precision floating-point format. The conversion is
1795performed according to the IEC/IEEE Standard for Binary Floating-point
1796Arithmetic.
1797-------------------------------------------------------------------------------
1798*/
Richard Purdief148af22005-08-03 19:49:17 +01001799float32 float64_to_float32( struct roundingData *roundData, float64 a )
Linus Torvalds1da177e2005-04-16 15:20:36 -07001800{
1801 flag aSign;
1802 int16 aExp;
1803 bits64 aSig;
1804 bits32 zSig;
1805
1806 aSig = extractFloat64Frac( a );
1807 aExp = extractFloat64Exp( a );
1808 aSign = extractFloat64Sign( a );
1809 if ( aExp == 0x7FF ) {
1810 if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
1811 return packFloat32( aSign, 0xFF, 0 );
1812 }
1813 shift64RightJamming( aSig, 22, &aSig );
1814 zSig = aSig;
1815 if ( aExp || zSig ) {
1816 zSig |= 0x40000000;
1817 aExp -= 0x381;
1818 }
Richard Purdief148af22005-08-03 19:49:17 +01001819 return roundAndPackFloat32( roundData, aSign, aExp, zSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001820
1821}
1822
1823#ifdef FLOATX80
1824
1825/*
1826-------------------------------------------------------------------------------
1827Returns the result of converting the double-precision floating-point value
1828`a' to the extended double-precision floating-point format. The conversion
1829is performed according to the IEC/IEEE Standard for Binary Floating-point
1830Arithmetic.
1831-------------------------------------------------------------------------------
1832*/
1833floatx80 float64_to_floatx80( float64 a )
1834{
1835 flag aSign;
1836 int16 aExp;
1837 bits64 aSig;
1838
1839 aSig = extractFloat64Frac( a );
1840 aExp = extractFloat64Exp( a );
1841 aSign = extractFloat64Sign( a );
1842 if ( aExp == 0x7FF ) {
1843 if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
1844 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1845 }
1846 if ( aExp == 0 ) {
1847 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1848 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
1849 }
1850 return
1851 packFloatx80(
1852 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
1853
1854}
1855
1856#endif
1857
1858/*
1859-------------------------------------------------------------------------------
1860Rounds the double-precision floating-point value `a' to an integer, and
1861returns the result as a double-precision floating-point value. The
1862operation is performed according to the IEC/IEEE Standard for Binary
1863Floating-point Arithmetic.
1864-------------------------------------------------------------------------------
1865*/
Richard Purdief148af22005-08-03 19:49:17 +01001866float64 float64_round_to_int( struct roundingData *roundData, float64 a )
Linus Torvalds1da177e2005-04-16 15:20:36 -07001867{
1868 flag aSign;
1869 int16 aExp;
1870 bits64 lastBitMask, roundBitsMask;
1871 int8 roundingMode;
1872 float64 z;
1873
1874 aExp = extractFloat64Exp( a );
1875 if ( 0x433 <= aExp ) {
1876 if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
1877 return propagateFloat64NaN( a, a );
1878 }
1879 return a;
1880 }
1881 if ( aExp <= 0x3FE ) {
1882 if ( (bits64) ( a<<1 ) == 0 ) return a;
Richard Purdief148af22005-08-03 19:49:17 +01001883 roundData->exception |= float_flag_inexact;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001884 aSign = extractFloat64Sign( a );
Richard Purdief148af22005-08-03 19:49:17 +01001885 switch ( roundData->mode ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001886 case float_round_nearest_even:
1887 if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
1888 return packFloat64( aSign, 0x3FF, 0 );
1889 }
1890 break;
1891 case float_round_down:
1892 return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
1893 case float_round_up:
1894 return
1895 aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
1896 }
1897 return packFloat64( aSign, 0, 0 );
1898 }
1899 lastBitMask = 1;
1900 lastBitMask <<= 0x433 - aExp;
1901 roundBitsMask = lastBitMask - 1;
1902 z = a;
Richard Purdief148af22005-08-03 19:49:17 +01001903 roundingMode = roundData->mode;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001904 if ( roundingMode == float_round_nearest_even ) {
1905 z += lastBitMask>>1;
1906 if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1907 }
1908 else if ( roundingMode != float_round_to_zero ) {
1909 if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
1910 z += roundBitsMask;
1911 }
1912 }
1913 z &= ~ roundBitsMask;
Richard Purdief148af22005-08-03 19:49:17 +01001914 if ( z != a ) roundData->exception |= float_flag_inexact;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001915 return z;
1916
1917}
1918
1919/*
1920-------------------------------------------------------------------------------
1921Returns the result of adding the absolute values of the double-precision
1922floating-point values `a' and `b'. If `zSign' is true, the sum is negated
1923before being returned. `zSign' is ignored if the result is a NaN. The
1924addition is performed according to the IEC/IEEE Standard for Binary
1925Floating-point Arithmetic.
1926-------------------------------------------------------------------------------
1927*/
Richard Purdief148af22005-08-03 19:49:17 +01001928static float64 addFloat64Sigs( struct roundingData *roundData, float64 a, float64 b, flag zSign )
Linus Torvalds1da177e2005-04-16 15:20:36 -07001929{
1930 int16 aExp, bExp, zExp;
1931 bits64 aSig, bSig, zSig;
1932 int16 expDiff;
1933
1934 aSig = extractFloat64Frac( a );
1935 aExp = extractFloat64Exp( a );
1936 bSig = extractFloat64Frac( b );
1937 bExp = extractFloat64Exp( b );
1938 expDiff = aExp - bExp;
1939 aSig <<= 9;
1940 bSig <<= 9;
1941 if ( 0 < expDiff ) {
1942 if ( aExp == 0x7FF ) {
1943 if ( aSig ) return propagateFloat64NaN( a, b );
1944 return a;
1945 }
1946 if ( bExp == 0 ) {
1947 --expDiff;
1948 }
1949 else {
1950 bSig |= LIT64( 0x2000000000000000 );
1951 }
1952 shift64RightJamming( bSig, expDiff, &bSig );
1953 zExp = aExp;
1954 }
1955 else if ( expDiff < 0 ) {
1956 if ( bExp == 0x7FF ) {
1957 if ( bSig ) return propagateFloat64NaN( a, b );
1958 return packFloat64( zSign, 0x7FF, 0 );
1959 }
1960 if ( aExp == 0 ) {
1961 ++expDiff;
1962 }
1963 else {
1964 aSig |= LIT64( 0x2000000000000000 );
1965 }
1966 shift64RightJamming( aSig, - expDiff, &aSig );
1967 zExp = bExp;
1968 }
1969 else {
1970 if ( aExp == 0x7FF ) {
1971 if ( aSig | bSig ) return propagateFloat64NaN( a, b );
1972 return a;
1973 }
1974 if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
1975 zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
1976 zExp = aExp;
1977 goto roundAndPack;
1978 }
1979 aSig |= LIT64( 0x2000000000000000 );
1980 zSig = ( aSig + bSig )<<1;
1981 --zExp;
1982 if ( (sbits64) zSig < 0 ) {
1983 zSig = aSig + bSig;
1984 ++zExp;
1985 }
1986 roundAndPack:
Richard Purdief148af22005-08-03 19:49:17 +01001987 return roundAndPackFloat64( roundData, zSign, zExp, zSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07001988
1989}
1990
1991/*
1992-------------------------------------------------------------------------------
1993Returns the result of subtracting the absolute values of the double-
1994precision floating-point values `a' and `b'. If `zSign' is true, the
1995difference is negated before being returned. `zSign' is ignored if the
1996result is a NaN. The subtraction is performed according to the IEC/IEEE
1997Standard for Binary Floating-point Arithmetic.
1998-------------------------------------------------------------------------------
1999*/
Richard Purdief148af22005-08-03 19:49:17 +01002000static float64 subFloat64Sigs( struct roundingData *roundData, float64 a, float64 b, flag zSign )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002001{
2002 int16 aExp, bExp, zExp;
2003 bits64 aSig, bSig, zSig;
2004 int16 expDiff;
2005
2006 aSig = extractFloat64Frac( a );
2007 aExp = extractFloat64Exp( a );
2008 bSig = extractFloat64Frac( b );
2009 bExp = extractFloat64Exp( b );
2010 expDiff = aExp - bExp;
2011 aSig <<= 10;
2012 bSig <<= 10;
2013 if ( 0 < expDiff ) goto aExpBigger;
2014 if ( expDiff < 0 ) goto bExpBigger;
2015 if ( aExp == 0x7FF ) {
2016 if ( aSig | bSig ) return propagateFloat64NaN( a, b );
Richard Purdief148af22005-08-03 19:49:17 +01002017 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002018 return float64_default_nan;
2019 }
2020 if ( aExp == 0 ) {
2021 aExp = 1;
2022 bExp = 1;
2023 }
2024 if ( bSig < aSig ) goto aBigger;
2025 if ( aSig < bSig ) goto bBigger;
Richard Purdief148af22005-08-03 19:49:17 +01002026 return packFloat64( roundData->mode == float_round_down, 0, 0 );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002027 bExpBigger:
2028 if ( bExp == 0x7FF ) {
2029 if ( bSig ) return propagateFloat64NaN( a, b );
2030 return packFloat64( zSign ^ 1, 0x7FF, 0 );
2031 }
2032 if ( aExp == 0 ) {
2033 ++expDiff;
2034 }
2035 else {
2036 aSig |= LIT64( 0x4000000000000000 );
2037 }
2038 shift64RightJamming( aSig, - expDiff, &aSig );
2039 bSig |= LIT64( 0x4000000000000000 );
2040 bBigger:
2041 zSig = bSig - aSig;
2042 zExp = bExp;
2043 zSign ^= 1;
2044 goto normalizeRoundAndPack;
2045 aExpBigger:
2046 if ( aExp == 0x7FF ) {
2047 if ( aSig ) return propagateFloat64NaN( a, b );
2048 return a;
2049 }
2050 if ( bExp == 0 ) {
2051 --expDiff;
2052 }
2053 else {
2054 bSig |= LIT64( 0x4000000000000000 );
2055 }
2056 shift64RightJamming( bSig, expDiff, &bSig );
2057 aSig |= LIT64( 0x4000000000000000 );
2058 aBigger:
2059 zSig = aSig - bSig;
2060 zExp = aExp;
2061 normalizeRoundAndPack:
2062 --zExp;
Richard Purdief148af22005-08-03 19:49:17 +01002063 return normalizeRoundAndPackFloat64( roundData, zSign, zExp, zSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002064
2065}
2066
2067/*
2068-------------------------------------------------------------------------------
2069Returns the result of adding the double-precision floating-point values `a'
2070and `b'. The operation is performed according to the IEC/IEEE Standard for
2071Binary Floating-point Arithmetic.
2072-------------------------------------------------------------------------------
2073*/
Richard Purdief148af22005-08-03 19:49:17 +01002074float64 float64_add( struct roundingData *roundData, float64 a, float64 b )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002075{
2076 flag aSign, bSign;
2077
2078 aSign = extractFloat64Sign( a );
2079 bSign = extractFloat64Sign( b );
2080 if ( aSign == bSign ) {
Richard Purdief148af22005-08-03 19:49:17 +01002081 return addFloat64Sigs( roundData, a, b, aSign );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002082 }
2083 else {
Richard Purdief148af22005-08-03 19:49:17 +01002084 return subFloat64Sigs( roundData, a, b, aSign );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002085 }
2086
2087}
2088
2089/*
2090-------------------------------------------------------------------------------
2091Returns the result of subtracting the double-precision floating-point values
2092`a' and `b'. The operation is performed according to the IEC/IEEE Standard
2093for Binary Floating-point Arithmetic.
2094-------------------------------------------------------------------------------
2095*/
Richard Purdief148af22005-08-03 19:49:17 +01002096float64 float64_sub( struct roundingData *roundData, float64 a, float64 b )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002097{
2098 flag aSign, bSign;
2099
2100 aSign = extractFloat64Sign( a );
2101 bSign = extractFloat64Sign( b );
2102 if ( aSign == bSign ) {
Richard Purdief148af22005-08-03 19:49:17 +01002103 return subFloat64Sigs( roundData, a, b, aSign );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002104 }
2105 else {
Richard Purdief148af22005-08-03 19:49:17 +01002106 return addFloat64Sigs( roundData, a, b, aSign );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002107 }
2108
2109}
2110
2111/*
2112-------------------------------------------------------------------------------
2113Returns the result of multiplying the double-precision floating-point values
2114`a' and `b'. The operation is performed according to the IEC/IEEE Standard
2115for Binary Floating-point Arithmetic.
2116-------------------------------------------------------------------------------
2117*/
Richard Purdief148af22005-08-03 19:49:17 +01002118float64 float64_mul( struct roundingData *roundData, float64 a, float64 b )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002119{
2120 flag aSign, bSign, zSign;
2121 int16 aExp, bExp, zExp;
2122 bits64 aSig, bSig, zSig0, zSig1;
2123
2124 aSig = extractFloat64Frac( a );
2125 aExp = extractFloat64Exp( a );
2126 aSign = extractFloat64Sign( a );
2127 bSig = extractFloat64Frac( b );
2128 bExp = extractFloat64Exp( b );
2129 bSign = extractFloat64Sign( b );
2130 zSign = aSign ^ bSign;
2131 if ( aExp == 0x7FF ) {
2132 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2133 return propagateFloat64NaN( a, b );
2134 }
2135 if ( ( bExp | bSig ) == 0 ) {
Richard Purdief148af22005-08-03 19:49:17 +01002136 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002137 return float64_default_nan;
2138 }
2139 return packFloat64( zSign, 0x7FF, 0 );
2140 }
2141 if ( bExp == 0x7FF ) {
2142 if ( bSig ) return propagateFloat64NaN( a, b );
2143 if ( ( aExp | aSig ) == 0 ) {
Richard Purdief148af22005-08-03 19:49:17 +01002144 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002145 return float64_default_nan;
2146 }
2147 return packFloat64( zSign, 0x7FF, 0 );
2148 }
2149 if ( aExp == 0 ) {
2150 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2151 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2152 }
2153 if ( bExp == 0 ) {
2154 if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
2155 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2156 }
2157 zExp = aExp + bExp - 0x3FF;
2158 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2159 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2160 mul64To128( aSig, bSig, &zSig0, &zSig1 );
2161 zSig0 |= ( zSig1 != 0 );
2162 if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
2163 zSig0 <<= 1;
2164 --zExp;
2165 }
Richard Purdief148af22005-08-03 19:49:17 +01002166 return roundAndPackFloat64( roundData, zSign, zExp, zSig0 );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002167
2168}
2169
2170/*
2171-------------------------------------------------------------------------------
2172Returns the result of dividing the double-precision floating-point value `a'
2173by the corresponding value `b'. The operation is performed according to
2174the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2175-------------------------------------------------------------------------------
2176*/
Richard Purdief148af22005-08-03 19:49:17 +01002177float64 float64_div( struct roundingData *roundData, float64 a, float64 b )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002178{
2179 flag aSign, bSign, zSign;
2180 int16 aExp, bExp, zExp;
2181 bits64 aSig, bSig, zSig;
2182 bits64 rem0, rem1;
2183 bits64 term0, term1;
2184
2185 aSig = extractFloat64Frac( a );
2186 aExp = extractFloat64Exp( a );
2187 aSign = extractFloat64Sign( a );
2188 bSig = extractFloat64Frac( b );
2189 bExp = extractFloat64Exp( b );
2190 bSign = extractFloat64Sign( b );
2191 zSign = aSign ^ bSign;
2192 if ( aExp == 0x7FF ) {
2193 if ( aSig ) return propagateFloat64NaN( a, b );
2194 if ( bExp == 0x7FF ) {
2195 if ( bSig ) return propagateFloat64NaN( a, b );
Richard Purdief148af22005-08-03 19:49:17 +01002196 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002197 return float64_default_nan;
2198 }
2199 return packFloat64( zSign, 0x7FF, 0 );
2200 }
2201 if ( bExp == 0x7FF ) {
2202 if ( bSig ) return propagateFloat64NaN( a, b );
2203 return packFloat64( zSign, 0, 0 );
2204 }
2205 if ( bExp == 0 ) {
2206 if ( bSig == 0 ) {
2207 if ( ( aExp | aSig ) == 0 ) {
Richard Purdief148af22005-08-03 19:49:17 +01002208 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002209 return float64_default_nan;
2210 }
Richard Purdief148af22005-08-03 19:49:17 +01002211 roundData->exception |= float_flag_divbyzero;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002212 return packFloat64( zSign, 0x7FF, 0 );
2213 }
2214 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2215 }
2216 if ( aExp == 0 ) {
2217 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2218 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2219 }
2220 zExp = aExp - bExp + 0x3FD;
2221 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2222 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2223 if ( bSig <= ( aSig + aSig ) ) {
2224 aSig >>= 1;
2225 ++zExp;
2226 }
2227 zSig = estimateDiv128To64( aSig, 0, bSig );
2228 if ( ( zSig & 0x1FF ) <= 2 ) {
2229 mul64To128( bSig, zSig, &term0, &term1 );
2230 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
2231 while ( (sbits64) rem0 < 0 ) {
2232 --zSig;
2233 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
2234 }
2235 zSig |= ( rem1 != 0 );
2236 }
Richard Purdief148af22005-08-03 19:49:17 +01002237 return roundAndPackFloat64( roundData, zSign, zExp, zSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002238
2239}
2240
2241/*
2242-------------------------------------------------------------------------------
2243Returns the remainder of the double-precision floating-point value `a'
2244with respect to the corresponding value `b'. The operation is performed
2245according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2246-------------------------------------------------------------------------------
2247*/
Richard Purdief148af22005-08-03 19:49:17 +01002248float64 float64_rem( struct roundingData *roundData, float64 a, float64 b )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002249{
2250 flag aSign, bSign, zSign;
2251 int16 aExp, bExp, expDiff;
2252 bits64 aSig, bSig;
2253 bits64 q, alternateASig;
2254 sbits64 sigMean;
2255
2256 aSig = extractFloat64Frac( a );
2257 aExp = extractFloat64Exp( a );
2258 aSign = extractFloat64Sign( a );
2259 bSig = extractFloat64Frac( b );
2260 bExp = extractFloat64Exp( b );
2261 bSign = extractFloat64Sign( b );
2262 if ( aExp == 0x7FF ) {
2263 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2264 return propagateFloat64NaN( a, b );
2265 }
Richard Purdief148af22005-08-03 19:49:17 +01002266 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002267 return float64_default_nan;
2268 }
2269 if ( bExp == 0x7FF ) {
2270 if ( bSig ) return propagateFloat64NaN( a, b );
2271 return a;
2272 }
2273 if ( bExp == 0 ) {
2274 if ( bSig == 0 ) {
Richard Purdief148af22005-08-03 19:49:17 +01002275 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002276 return float64_default_nan;
2277 }
2278 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2279 }
2280 if ( aExp == 0 ) {
2281 if ( aSig == 0 ) return a;
2282 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2283 }
2284 expDiff = aExp - bExp;
2285 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
2286 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2287 if ( expDiff < 0 ) {
2288 if ( expDiff < -1 ) return a;
2289 aSig >>= 1;
2290 }
2291 q = ( bSig <= aSig );
2292 if ( q ) aSig -= bSig;
2293 expDiff -= 64;
2294 while ( 0 < expDiff ) {
2295 q = estimateDiv128To64( aSig, 0, bSig );
2296 q = ( 2 < q ) ? q - 2 : 0;
2297 aSig = - ( ( bSig>>2 ) * q );
2298 expDiff -= 62;
2299 }
2300 expDiff += 64;
2301 if ( 0 < expDiff ) {
2302 q = estimateDiv128To64( aSig, 0, bSig );
2303 q = ( 2 < q ) ? q - 2 : 0;
2304 q >>= 64 - expDiff;
2305 bSig >>= 2;
2306 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
2307 }
2308 else {
2309 aSig >>= 2;
2310 bSig >>= 2;
2311 }
2312 do {
2313 alternateASig = aSig;
2314 ++q;
2315 aSig -= bSig;
2316 } while ( 0 <= (sbits64) aSig );
2317 sigMean = aSig + alternateASig;
2318 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
2319 aSig = alternateASig;
2320 }
2321 zSign = ( (sbits64) aSig < 0 );
2322 if ( zSign ) aSig = - aSig;
Richard Purdief148af22005-08-03 19:49:17 +01002323 return normalizeRoundAndPackFloat64( roundData, aSign ^ zSign, bExp, aSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002324
2325}
2326
2327/*
2328-------------------------------------------------------------------------------
2329Returns the square root of the double-precision floating-point value `a'.
2330The operation is performed according to the IEC/IEEE Standard for Binary
2331Floating-point Arithmetic.
2332-------------------------------------------------------------------------------
2333*/
Richard Purdief148af22005-08-03 19:49:17 +01002334float64 float64_sqrt( struct roundingData *roundData, float64 a )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002335{
2336 flag aSign;
2337 int16 aExp, zExp;
2338 bits64 aSig, zSig;
2339 bits64 rem0, rem1, term0, term1; //, shiftedRem;
2340 //float64 z;
2341
2342 aSig = extractFloat64Frac( a );
2343 aExp = extractFloat64Exp( a );
2344 aSign = extractFloat64Sign( a );
2345 if ( aExp == 0x7FF ) {
2346 if ( aSig ) return propagateFloat64NaN( a, a );
2347 if ( ! aSign ) return a;
Richard Purdief148af22005-08-03 19:49:17 +01002348 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002349 return float64_default_nan;
2350 }
2351 if ( aSign ) {
2352 if ( ( aExp | aSig ) == 0 ) return a;
Richard Purdief148af22005-08-03 19:49:17 +01002353 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002354 return float64_default_nan;
2355 }
2356 if ( aExp == 0 ) {
2357 if ( aSig == 0 ) return 0;
2358 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2359 }
2360 zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
2361 aSig |= LIT64( 0x0010000000000000 );
2362 zSig = estimateSqrt32( aExp, aSig>>21 );
2363 zSig <<= 31;
2364 aSig <<= 9 - ( aExp & 1 );
2365 zSig = estimateDiv128To64( aSig, 0, zSig ) + zSig + 2;
2366 if ( ( zSig & 0x3FF ) <= 5 ) {
2367 if ( zSig < 2 ) {
2368 zSig = LIT64( 0xFFFFFFFFFFFFFFFF );
2369 }
2370 else {
2371 aSig <<= 2;
2372 mul64To128( zSig, zSig, &term0, &term1 );
2373 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
2374 while ( (sbits64) rem0 < 0 ) {
2375 --zSig;
2376 shortShift128Left( 0, zSig, 1, &term0, &term1 );
2377 term1 |= 1;
2378 add128( rem0, rem1, term0, term1, &rem0, &rem1 );
2379 }
2380 zSig |= ( ( rem0 | rem1 ) != 0 );
2381 }
2382 }
2383 shift64RightJamming( zSig, 1, &zSig );
Richard Purdief148af22005-08-03 19:49:17 +01002384 return roundAndPackFloat64( roundData, 0, zExp, zSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002385
2386}
2387
2388/*
2389-------------------------------------------------------------------------------
2390Returns 1 if the double-precision floating-point value `a' is equal to the
2391corresponding value `b', and 0 otherwise. The comparison is performed
2392according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2393-------------------------------------------------------------------------------
2394*/
2395flag float64_eq( float64 a, float64 b )
2396{
2397
2398 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2399 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2400 ) {
2401 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2402 float_raise( float_flag_invalid );
2403 }
2404 return 0;
2405 }
2406 return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
2407
2408}
2409
2410/*
2411-------------------------------------------------------------------------------
2412Returns 1 if the double-precision floating-point value `a' is less than or
2413equal to the corresponding value `b', and 0 otherwise. The comparison is
2414performed according to the IEC/IEEE Standard for Binary Floating-point
2415Arithmetic.
2416-------------------------------------------------------------------------------
2417*/
2418flag float64_le( float64 a, float64 b )
2419{
2420 flag aSign, bSign;
2421
2422 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2423 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2424 ) {
2425 float_raise( float_flag_invalid );
2426 return 0;
2427 }
2428 aSign = extractFloat64Sign( a );
2429 bSign = extractFloat64Sign( b );
2430 if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
2431 return ( a == b ) || ( aSign ^ ( a < b ) );
2432
2433}
2434
2435/*
2436-------------------------------------------------------------------------------
2437Returns 1 if the double-precision floating-point value `a' is less than
2438the corresponding value `b', and 0 otherwise. The comparison is performed
2439according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2440-------------------------------------------------------------------------------
2441*/
2442flag float64_lt( float64 a, float64 b )
2443{
2444 flag aSign, bSign;
2445
2446 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2447 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2448 ) {
2449 float_raise( float_flag_invalid );
2450 return 0;
2451 }
2452 aSign = extractFloat64Sign( a );
2453 bSign = extractFloat64Sign( b );
2454 if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
2455 return ( a != b ) && ( aSign ^ ( a < b ) );
2456
2457}
2458
2459/*
2460-------------------------------------------------------------------------------
2461Returns 1 if the double-precision floating-point value `a' is equal to the
2462corresponding value `b', and 0 otherwise. The invalid exception is raised
2463if either operand is a NaN. Otherwise, the comparison is performed
2464according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2465-------------------------------------------------------------------------------
2466*/
2467flag float64_eq_signaling( float64 a, float64 b )
2468{
2469
2470 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2471 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2472 ) {
2473 float_raise( float_flag_invalid );
2474 return 0;
2475 }
2476 return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
2477
2478}
2479
2480/*
2481-------------------------------------------------------------------------------
2482Returns 1 if the double-precision floating-point value `a' is less than or
2483equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
2484cause an exception. Otherwise, the comparison is performed according to the
2485IEC/IEEE Standard for Binary Floating-point Arithmetic.
2486-------------------------------------------------------------------------------
2487*/
2488flag float64_le_quiet( float64 a, float64 b )
2489{
2490 flag aSign, bSign;
2491 //int16 aExp, bExp;
2492
2493 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2494 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2495 ) {
2496 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2497 float_raise( float_flag_invalid );
2498 }
2499 return 0;
2500 }
2501 aSign = extractFloat64Sign( a );
2502 bSign = extractFloat64Sign( b );
2503 if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
2504 return ( a == b ) || ( aSign ^ ( a < b ) );
2505
2506}
2507
2508/*
2509-------------------------------------------------------------------------------
2510Returns 1 if the double-precision floating-point value `a' is less than
2511the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
2512exception. Otherwise, the comparison is performed according to the IEC/IEEE
2513Standard for Binary Floating-point Arithmetic.
2514-------------------------------------------------------------------------------
2515*/
2516flag float64_lt_quiet( float64 a, float64 b )
2517{
2518 flag aSign, bSign;
2519
2520 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2521 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2522 ) {
2523 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2524 float_raise( float_flag_invalid );
2525 }
2526 return 0;
2527 }
2528 aSign = extractFloat64Sign( a );
2529 bSign = extractFloat64Sign( b );
2530 if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
2531 return ( a != b ) && ( aSign ^ ( a < b ) );
2532
2533}
2534
2535#ifdef FLOATX80
2536
2537/*
2538-------------------------------------------------------------------------------
2539Returns the result of converting the extended double-precision floating-
2540point value `a' to the 32-bit two's complement integer format. The
2541conversion is performed according to the IEC/IEEE Standard for Binary
2542Floating-point Arithmetic---which means in particular that the conversion
2543is rounded according to the current rounding mode. If `a' is a NaN, the
2544largest positive integer is returned. Otherwise, if the conversion
2545overflows, the largest integer with the same sign as `a' is returned.
2546-------------------------------------------------------------------------------
2547*/
Richard Purdief148af22005-08-03 19:49:17 +01002548int32 floatx80_to_int32( struct roundingData *roundData, floatx80 a )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002549{
2550 flag aSign;
2551 int32 aExp, shiftCount;
2552 bits64 aSig;
2553
2554 aSig = extractFloatx80Frac( a );
2555 aExp = extractFloatx80Exp( a );
2556 aSign = extractFloatx80Sign( a );
2557 if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
2558 shiftCount = 0x4037 - aExp;
2559 if ( shiftCount <= 0 ) shiftCount = 1;
2560 shift64RightJamming( aSig, shiftCount, &aSig );
Richard Purdief148af22005-08-03 19:49:17 +01002561 return roundAndPackInt32( roundData, aSign, aSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002562
2563}
2564
2565/*
2566-------------------------------------------------------------------------------
2567Returns the result of converting the extended double-precision floating-
2568point value `a' to the 32-bit two's complement integer format. The
2569conversion is performed according to the IEC/IEEE Standard for Binary
2570Floating-point Arithmetic, except that the conversion is always rounded
2571toward zero. If `a' is a NaN, the largest positive integer is returned.
2572Otherwise, if the conversion overflows, the largest integer with the same
2573sign as `a' is returned.
2574-------------------------------------------------------------------------------
2575*/
2576int32 floatx80_to_int32_round_to_zero( floatx80 a )
2577{
2578 flag aSign;
2579 int32 aExp, shiftCount;
2580 bits64 aSig, savedASig;
2581 int32 z;
2582
2583 aSig = extractFloatx80Frac( a );
2584 aExp = extractFloatx80Exp( a );
2585 aSign = extractFloatx80Sign( a );
2586 shiftCount = 0x403E - aExp;
2587 if ( shiftCount < 32 ) {
2588 if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
2589 goto invalid;
2590 }
2591 else if ( 63 < shiftCount ) {
Richard Purdief148af22005-08-03 19:49:17 +01002592 if ( aExp || aSig ) float_raise( float_flag_inexact );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002593 return 0;
2594 }
2595 savedASig = aSig;
2596 aSig >>= shiftCount;
2597 z = aSig;
2598 if ( aSign ) z = - z;
2599 if ( ( z < 0 ) ^ aSign ) {
2600 invalid:
Richard Purdief148af22005-08-03 19:49:17 +01002601 float_raise( float_flag_invalid );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002602 return aSign ? 0x80000000 : 0x7FFFFFFF;
2603 }
2604 if ( ( aSig<<shiftCount ) != savedASig ) {
Richard Purdief148af22005-08-03 19:49:17 +01002605 float_raise( float_flag_inexact );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002606 }
2607 return z;
2608
2609}
2610
2611/*
2612-------------------------------------------------------------------------------
2613Returns the result of converting the extended double-precision floating-
2614point value `a' to the single-precision floating-point format. The
2615conversion is performed according to the IEC/IEEE Standard for Binary
2616Floating-point Arithmetic.
2617-------------------------------------------------------------------------------
2618*/
Richard Purdief148af22005-08-03 19:49:17 +01002619float32 floatx80_to_float32( struct roundingData *roundData, floatx80 a )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002620{
2621 flag aSign;
2622 int32 aExp;
2623 bits64 aSig;
2624
2625 aSig = extractFloatx80Frac( a );
2626 aExp = extractFloatx80Exp( a );
2627 aSign = extractFloatx80Sign( a );
2628 if ( aExp == 0x7FFF ) {
2629 if ( (bits64) ( aSig<<1 ) ) {
2630 return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
2631 }
2632 return packFloat32( aSign, 0xFF, 0 );
2633 }
2634 shift64RightJamming( aSig, 33, &aSig );
2635 if ( aExp || aSig ) aExp -= 0x3F81;
Richard Purdief148af22005-08-03 19:49:17 +01002636 return roundAndPackFloat32( roundData, aSign, aExp, aSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002637
2638}
2639
2640/*
2641-------------------------------------------------------------------------------
2642Returns the result of converting the extended double-precision floating-
2643point value `a' to the double-precision floating-point format. The
2644conversion is performed according to the IEC/IEEE Standard for Binary
2645Floating-point Arithmetic.
2646-------------------------------------------------------------------------------
2647*/
Richard Purdief148af22005-08-03 19:49:17 +01002648float64 floatx80_to_float64( struct roundingData *roundData, floatx80 a )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002649{
2650 flag aSign;
2651 int32 aExp;
2652 bits64 aSig, zSig;
2653
2654 aSig = extractFloatx80Frac( a );
2655 aExp = extractFloatx80Exp( a );
2656 aSign = extractFloatx80Sign( a );
2657 if ( aExp == 0x7FFF ) {
2658 if ( (bits64) ( aSig<<1 ) ) {
2659 return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
2660 }
2661 return packFloat64( aSign, 0x7FF, 0 );
2662 }
2663 shift64RightJamming( aSig, 1, &zSig );
2664 if ( aExp || aSig ) aExp -= 0x3C01;
Richard Purdief148af22005-08-03 19:49:17 +01002665 return roundAndPackFloat64( roundData, aSign, aExp, zSig );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002666
2667}
2668
2669/*
2670-------------------------------------------------------------------------------
2671Rounds the extended double-precision floating-point value `a' to an integer,
2672and returns the result as an extended quadruple-precision floating-point
2673value. The operation is performed according to the IEC/IEEE Standard for
2674Binary Floating-point Arithmetic.
2675-------------------------------------------------------------------------------
2676*/
Richard Purdief148af22005-08-03 19:49:17 +01002677floatx80 floatx80_round_to_int( struct roundingData *roundData, floatx80 a )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002678{
2679 flag aSign;
2680 int32 aExp;
2681 bits64 lastBitMask, roundBitsMask;
2682 int8 roundingMode;
2683 floatx80 z;
2684
2685 aExp = extractFloatx80Exp( a );
2686 if ( 0x403E <= aExp ) {
2687 if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
2688 return propagateFloatx80NaN( a, a );
2689 }
2690 return a;
2691 }
2692 if ( aExp <= 0x3FFE ) {
2693 if ( ( aExp == 0 )
2694 && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
2695 return a;
2696 }
Richard Purdief148af22005-08-03 19:49:17 +01002697 roundData->exception |= float_flag_inexact;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002698 aSign = extractFloatx80Sign( a );
Richard Purdief148af22005-08-03 19:49:17 +01002699 switch ( roundData->mode ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002700 case float_round_nearest_even:
2701 if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
2702 ) {
2703 return
2704 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
2705 }
2706 break;
2707 case float_round_down:
2708 return
2709 aSign ?
2710 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
2711 : packFloatx80( 0, 0, 0 );
2712 case float_round_up:
2713 return
2714 aSign ? packFloatx80( 1, 0, 0 )
2715 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
2716 }
2717 return packFloatx80( aSign, 0, 0 );
2718 }
2719 lastBitMask = 1;
2720 lastBitMask <<= 0x403E - aExp;
2721 roundBitsMask = lastBitMask - 1;
2722 z = a;
Richard Purdief148af22005-08-03 19:49:17 +01002723 roundingMode = roundData->mode;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002724 if ( roundingMode == float_round_nearest_even ) {
2725 z.low += lastBitMask>>1;
2726 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
2727 }
2728 else if ( roundingMode != float_round_to_zero ) {
2729 if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
2730 z.low += roundBitsMask;
2731 }
2732 }
2733 z.low &= ~ roundBitsMask;
2734 if ( z.low == 0 ) {
2735 ++z.high;
2736 z.low = LIT64( 0x8000000000000000 );
2737 }
Richard Purdief148af22005-08-03 19:49:17 +01002738 if ( z.low != a.low ) roundData->exception |= float_flag_inexact;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002739 return z;
2740
2741}
2742
2743/*
2744-------------------------------------------------------------------------------
2745Returns the result of adding the absolute values of the extended double-
2746precision floating-point values `a' and `b'. If `zSign' is true, the sum is
2747negated before being returned. `zSign' is ignored if the result is a NaN.
2748The addition is performed according to the IEC/IEEE Standard for Binary
2749Floating-point Arithmetic.
2750-------------------------------------------------------------------------------
2751*/
Richard Purdief148af22005-08-03 19:49:17 +01002752static floatx80 addFloatx80Sigs( struct roundingData *roundData, floatx80 a, floatx80 b, flag zSign )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002753{
2754 int32 aExp, bExp, zExp;
2755 bits64 aSig, bSig, zSig0, zSig1;
2756 int32 expDiff;
2757
2758 aSig = extractFloatx80Frac( a );
2759 aExp = extractFloatx80Exp( a );
2760 bSig = extractFloatx80Frac( b );
2761 bExp = extractFloatx80Exp( b );
2762 expDiff = aExp - bExp;
2763 if ( 0 < expDiff ) {
2764 if ( aExp == 0x7FFF ) {
2765 if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
2766 return a;
2767 }
2768 if ( bExp == 0 ) --expDiff;
2769 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
2770 zExp = aExp;
2771 }
2772 else if ( expDiff < 0 ) {
2773 if ( bExp == 0x7FFF ) {
2774 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
2775 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2776 }
2777 if ( aExp == 0 ) ++expDiff;
2778 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
2779 zExp = bExp;
2780 }
2781 else {
2782 if ( aExp == 0x7FFF ) {
2783 if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
2784 return propagateFloatx80NaN( a, b );
2785 }
2786 return a;
2787 }
2788 zSig1 = 0;
2789 zSig0 = aSig + bSig;
2790 if ( aExp == 0 ) {
2791 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
2792 goto roundAndPack;
2793 }
2794 zExp = aExp;
2795 goto shiftRight1;
2796 }
2797
2798 zSig0 = aSig + bSig;
2799
2800 if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
2801 shiftRight1:
2802 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
2803 zSig0 |= LIT64( 0x8000000000000000 );
2804 ++zExp;
2805 roundAndPack:
2806 return
2807 roundAndPackFloatx80(
Richard Purdief148af22005-08-03 19:49:17 +01002808 roundData, zSign, zExp, zSig0, zSig1 );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002809
2810}
2811
2812/*
2813-------------------------------------------------------------------------------
2814Returns the result of subtracting the absolute values of the extended
2815double-precision floating-point values `a' and `b'. If `zSign' is true,
2816the difference is negated before being returned. `zSign' is ignored if the
2817result is a NaN. The subtraction is performed according to the IEC/IEEE
2818Standard for Binary Floating-point Arithmetic.
2819-------------------------------------------------------------------------------
2820*/
Richard Purdief148af22005-08-03 19:49:17 +01002821static floatx80 subFloatx80Sigs( struct roundingData *roundData, floatx80 a, floatx80 b, flag zSign )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002822{
2823 int32 aExp, bExp, zExp;
2824 bits64 aSig, bSig, zSig0, zSig1;
2825 int32 expDiff;
2826 floatx80 z;
2827
2828 aSig = extractFloatx80Frac( a );
2829 aExp = extractFloatx80Exp( a );
2830 bSig = extractFloatx80Frac( b );
2831 bExp = extractFloatx80Exp( b );
2832 expDiff = aExp - bExp;
2833 if ( 0 < expDiff ) goto aExpBigger;
2834 if ( expDiff < 0 ) goto bExpBigger;
2835 if ( aExp == 0x7FFF ) {
2836 if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
2837 return propagateFloatx80NaN( a, b );
2838 }
Richard Purdief148af22005-08-03 19:49:17 +01002839 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002840 z.low = floatx80_default_nan_low;
2841 z.high = floatx80_default_nan_high;
2842 return z;
2843 }
2844 if ( aExp == 0 ) {
2845 aExp = 1;
2846 bExp = 1;
2847 }
2848 zSig1 = 0;
2849 if ( bSig < aSig ) goto aBigger;
2850 if ( aSig < bSig ) goto bBigger;
Richard Purdief148af22005-08-03 19:49:17 +01002851 return packFloatx80( roundData->mode == float_round_down, 0, 0 );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002852 bExpBigger:
2853 if ( bExp == 0x7FFF ) {
2854 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
2855 return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
2856 }
2857 if ( aExp == 0 ) ++expDiff;
2858 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
2859 bBigger:
2860 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
2861 zExp = bExp;
2862 zSign ^= 1;
2863 goto normalizeRoundAndPack;
2864 aExpBigger:
2865 if ( aExp == 0x7FFF ) {
2866 if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
2867 return a;
2868 }
2869 if ( bExp == 0 ) --expDiff;
2870 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
2871 aBigger:
2872 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
2873 zExp = aExp;
2874 normalizeRoundAndPack:
2875 return
2876 normalizeRoundAndPackFloatx80(
Richard Purdief148af22005-08-03 19:49:17 +01002877 roundData, zSign, zExp, zSig0, zSig1 );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002878
2879}
2880
2881/*
2882-------------------------------------------------------------------------------
2883Returns the result of adding the extended double-precision floating-point
2884values `a' and `b'. The operation is performed according to the IEC/IEEE
2885Standard for Binary Floating-point Arithmetic.
2886-------------------------------------------------------------------------------
2887*/
Richard Purdief148af22005-08-03 19:49:17 +01002888floatx80 floatx80_add( struct roundingData *roundData, floatx80 a, floatx80 b )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002889{
2890 flag aSign, bSign;
2891
2892 aSign = extractFloatx80Sign( a );
2893 bSign = extractFloatx80Sign( b );
2894 if ( aSign == bSign ) {
Richard Purdief148af22005-08-03 19:49:17 +01002895 return addFloatx80Sigs( roundData, a, b, aSign );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002896 }
2897 else {
Richard Purdief148af22005-08-03 19:49:17 +01002898 return subFloatx80Sigs( roundData, a, b, aSign );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002899 }
2900
2901}
2902
2903/*
2904-------------------------------------------------------------------------------
2905Returns the result of subtracting the extended double-precision floating-
2906point values `a' and `b'. The operation is performed according to the
2907IEC/IEEE Standard for Binary Floating-point Arithmetic.
2908-------------------------------------------------------------------------------
2909*/
Richard Purdief148af22005-08-03 19:49:17 +01002910floatx80 floatx80_sub( struct roundingData *roundData, floatx80 a, floatx80 b )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002911{
2912 flag aSign, bSign;
2913
2914 aSign = extractFloatx80Sign( a );
2915 bSign = extractFloatx80Sign( b );
2916 if ( aSign == bSign ) {
Richard Purdief148af22005-08-03 19:49:17 +01002917 return subFloatx80Sigs( roundData, a, b, aSign );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002918 }
2919 else {
Richard Purdief148af22005-08-03 19:49:17 +01002920 return addFloatx80Sigs( roundData, a, b, aSign );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002921 }
2922
2923}
2924
2925/*
2926-------------------------------------------------------------------------------
2927Returns the result of multiplying the extended double-precision floating-
2928point values `a' and `b'. The operation is performed according to the
2929IEC/IEEE Standard for Binary Floating-point Arithmetic.
2930-------------------------------------------------------------------------------
2931*/
Richard Purdief148af22005-08-03 19:49:17 +01002932floatx80 floatx80_mul( struct roundingData *roundData, floatx80 a, floatx80 b )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002933{
2934 flag aSign, bSign, zSign;
2935 int32 aExp, bExp, zExp;
2936 bits64 aSig, bSig, zSig0, zSig1;
2937 floatx80 z;
2938
2939 aSig = extractFloatx80Frac( a );
2940 aExp = extractFloatx80Exp( a );
2941 aSign = extractFloatx80Sign( a );
2942 bSig = extractFloatx80Frac( b );
2943 bExp = extractFloatx80Exp( b );
2944 bSign = extractFloatx80Sign( b );
2945 zSign = aSign ^ bSign;
2946 if ( aExp == 0x7FFF ) {
2947 if ( (bits64) ( aSig<<1 )
2948 || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
2949 return propagateFloatx80NaN( a, b );
2950 }
2951 if ( ( bExp | bSig ) == 0 ) goto invalid;
2952 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2953 }
2954 if ( bExp == 0x7FFF ) {
2955 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
2956 if ( ( aExp | aSig ) == 0 ) {
2957 invalid:
Richard Purdief148af22005-08-03 19:49:17 +01002958 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002959 z.low = floatx80_default_nan_low;
2960 z.high = floatx80_default_nan_high;
2961 return z;
2962 }
2963 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2964 }
2965 if ( aExp == 0 ) {
2966 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
2967 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
2968 }
2969 if ( bExp == 0 ) {
2970 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
2971 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
2972 }
2973 zExp = aExp + bExp - 0x3FFE;
2974 mul64To128( aSig, bSig, &zSig0, &zSig1 );
2975 if ( 0 < (sbits64) zSig0 ) {
2976 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
2977 --zExp;
2978 }
2979 return
2980 roundAndPackFloatx80(
Richard Purdief148af22005-08-03 19:49:17 +01002981 roundData, zSign, zExp, zSig0, zSig1 );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002982
2983}
2984
2985/*
2986-------------------------------------------------------------------------------
2987Returns the result of dividing the extended double-precision floating-point
2988value `a' by the corresponding value `b'. The operation is performed
2989according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2990-------------------------------------------------------------------------------
2991*/
Richard Purdief148af22005-08-03 19:49:17 +01002992floatx80 floatx80_div( struct roundingData *roundData, floatx80 a, floatx80 b )
Linus Torvalds1da177e2005-04-16 15:20:36 -07002993{
2994 flag aSign, bSign, zSign;
2995 int32 aExp, bExp, zExp;
2996 bits64 aSig, bSig, zSig0, zSig1;
2997 bits64 rem0, rem1, rem2, term0, term1, term2;
2998 floatx80 z;
2999
3000 aSig = extractFloatx80Frac( a );
3001 aExp = extractFloatx80Exp( a );
3002 aSign = extractFloatx80Sign( a );
3003 bSig = extractFloatx80Frac( b );
3004 bExp = extractFloatx80Exp( b );
3005 bSign = extractFloatx80Sign( b );
3006 zSign = aSign ^ bSign;
3007 if ( aExp == 0x7FFF ) {
3008 if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
3009 if ( bExp == 0x7FFF ) {
3010 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3011 goto invalid;
3012 }
3013 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3014 }
3015 if ( bExp == 0x7FFF ) {
3016 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3017 return packFloatx80( zSign, 0, 0 );
3018 }
3019 if ( bExp == 0 ) {
3020 if ( bSig == 0 ) {
3021 if ( ( aExp | aSig ) == 0 ) {
3022 invalid:
Richard Purdief148af22005-08-03 19:49:17 +01003023 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003024 z.low = floatx80_default_nan_low;
3025 z.high = floatx80_default_nan_high;
3026 return z;
3027 }
Richard Purdief148af22005-08-03 19:49:17 +01003028 roundData->exception |= float_flag_divbyzero;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003029 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3030 }
3031 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3032 }
3033 if ( aExp == 0 ) {
3034 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3035 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3036 }
3037 zExp = aExp - bExp + 0x3FFE;
3038 rem1 = 0;
3039 if ( bSig <= aSig ) {
3040 shift128Right( aSig, 0, 1, &aSig, &rem1 );
3041 ++zExp;
3042 }
3043 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
3044 mul64To128( bSig, zSig0, &term0, &term1 );
3045 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
3046 while ( (sbits64) rem0 < 0 ) {
3047 --zSig0;
3048 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
3049 }
3050 zSig1 = estimateDiv128To64( rem1, 0, bSig );
3051 if ( (bits64) ( zSig1<<1 ) <= 8 ) {
3052 mul64To128( bSig, zSig1, &term1, &term2 );
3053 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
3054 while ( (sbits64) rem1 < 0 ) {
3055 --zSig1;
3056 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
3057 }
3058 zSig1 |= ( ( rem1 | rem2 ) != 0 );
3059 }
3060 return
3061 roundAndPackFloatx80(
Richard Purdief148af22005-08-03 19:49:17 +01003062 roundData, zSign, zExp, zSig0, zSig1 );
Linus Torvalds1da177e2005-04-16 15:20:36 -07003063
3064}
3065
3066/*
3067-------------------------------------------------------------------------------
3068Returns the remainder of the extended double-precision floating-point value
3069`a' with respect to the corresponding value `b'. The operation is performed
3070according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
3071-------------------------------------------------------------------------------
3072*/
Richard Purdief148af22005-08-03 19:49:17 +01003073floatx80 floatx80_rem( struct roundingData *roundData, floatx80 a, floatx80 b )
Linus Torvalds1da177e2005-04-16 15:20:36 -07003074{
3075 flag aSign, bSign, zSign;
3076 int32 aExp, bExp, expDiff;
3077 bits64 aSig0, aSig1, bSig;
3078 bits64 q, term0, term1, alternateASig0, alternateASig1;
3079 floatx80 z;
3080
3081 aSig0 = extractFloatx80Frac( a );
3082 aExp = extractFloatx80Exp( a );
3083 aSign = extractFloatx80Sign( a );
3084 bSig = extractFloatx80Frac( b );
3085 bExp = extractFloatx80Exp( b );
3086 bSign = extractFloatx80Sign( b );
3087 if ( aExp == 0x7FFF ) {
3088 if ( (bits64) ( aSig0<<1 )
3089 || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
3090 return propagateFloatx80NaN( a, b );
3091 }
3092 goto invalid;
3093 }
3094 if ( bExp == 0x7FFF ) {
3095 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3096 return a;
3097 }
3098 if ( bExp == 0 ) {
3099 if ( bSig == 0 ) {
3100 invalid:
Richard Purdief148af22005-08-03 19:49:17 +01003101 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003102 z.low = floatx80_default_nan_low;
3103 z.high = floatx80_default_nan_high;
3104 return z;
3105 }
3106 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3107 }
3108 if ( aExp == 0 ) {
3109 if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
3110 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
3111 }
3112 bSig |= LIT64( 0x8000000000000000 );
3113 zSign = aSign;
3114 expDiff = aExp - bExp;
3115 aSig1 = 0;
3116 if ( expDiff < 0 ) {
3117 if ( expDiff < -1 ) return a;
3118 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
3119 expDiff = 0;
3120 }
3121 q = ( bSig <= aSig0 );
3122 if ( q ) aSig0 -= bSig;
3123 expDiff -= 64;
3124 while ( 0 < expDiff ) {
3125 q = estimateDiv128To64( aSig0, aSig1, bSig );
3126 q = ( 2 < q ) ? q - 2 : 0;
3127 mul64To128( bSig, q, &term0, &term1 );
3128 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3129 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
3130 expDiff -= 62;
3131 }
3132 expDiff += 64;
3133 if ( 0 < expDiff ) {
3134 q = estimateDiv128To64( aSig0, aSig1, bSig );
3135 q = ( 2 < q ) ? q - 2 : 0;
3136 q >>= 64 - expDiff;
3137 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
3138 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3139 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
3140 while ( le128( term0, term1, aSig0, aSig1 ) ) {
3141 ++q;
3142 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3143 }
3144 }
3145 else {
3146 term1 = 0;
3147 term0 = bSig;
3148 }
3149 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
3150 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
3151 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
3152 && ( q & 1 ) )
3153 ) {
3154 aSig0 = alternateASig0;
3155 aSig1 = alternateASig1;
3156 zSign = ! zSign;
3157 }
Richard Purdief148af22005-08-03 19:49:17 +01003158
Linus Torvalds1da177e2005-04-16 15:20:36 -07003159 return
3160 normalizeRoundAndPackFloatx80(
Richard Purdief148af22005-08-03 19:49:17 +01003161 roundData, zSign, bExp + expDiff, aSig0, aSig1 );
Linus Torvalds1da177e2005-04-16 15:20:36 -07003162
3163}
3164
3165/*
3166-------------------------------------------------------------------------------
3167Returns the square root of the extended double-precision floating-point
3168value `a'. The operation is performed according to the IEC/IEEE Standard
3169for Binary Floating-point Arithmetic.
3170-------------------------------------------------------------------------------
3171*/
Richard Purdief148af22005-08-03 19:49:17 +01003172floatx80 floatx80_sqrt( struct roundingData *roundData, floatx80 a )
Linus Torvalds1da177e2005-04-16 15:20:36 -07003173{
3174 flag aSign;
3175 int32 aExp, zExp;
3176 bits64 aSig0, aSig1, zSig0, zSig1;
3177 bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
3178 bits64 shiftedRem0, shiftedRem1;
3179 floatx80 z;
3180
3181 aSig0 = extractFloatx80Frac( a );
3182 aExp = extractFloatx80Exp( a );
3183 aSign = extractFloatx80Sign( a );
3184 if ( aExp == 0x7FFF ) {
3185 if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
3186 if ( ! aSign ) return a;
3187 goto invalid;
3188 }
3189 if ( aSign ) {
3190 if ( ( aExp | aSig0 ) == 0 ) return a;
3191 invalid:
Richard Purdief148af22005-08-03 19:49:17 +01003192 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003193 z.low = floatx80_default_nan_low;
3194 z.high = floatx80_default_nan_high;
3195 return z;
3196 }
3197 if ( aExp == 0 ) {
3198 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
3199 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
3200 }
3201 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
3202 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
3203 zSig0 <<= 31;
3204 aSig1 = 0;
3205 shift128Right( aSig0, 0, ( aExp & 1 ) + 2, &aSig0, &aSig1 );
3206 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0 ) + zSig0 + 4;
3207 if ( 0 <= (sbits64) zSig0 ) zSig0 = LIT64( 0xFFFFFFFFFFFFFFFF );
3208 shortShift128Left( aSig0, aSig1, 2, &aSig0, &aSig1 );
3209 mul64To128( zSig0, zSig0, &term0, &term1 );
3210 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
3211 while ( (sbits64) rem0 < 0 ) {
3212 --zSig0;
3213 shortShift128Left( 0, zSig0, 1, &term0, &term1 );
3214 term1 |= 1;
3215 add128( rem0, rem1, term0, term1, &rem0, &rem1 );
3216 }
3217 shortShift128Left( rem0, rem1, 63, &shiftedRem0, &shiftedRem1 );
3218 zSig1 = estimateDiv128To64( shiftedRem0, shiftedRem1, zSig0 );
3219 if ( (bits64) ( zSig1<<1 ) <= 10 ) {
3220 if ( zSig1 == 0 ) zSig1 = 1;
3221 mul64To128( zSig0, zSig1, &term1, &term2 );
3222 shortShift128Left( term1, term2, 1, &term1, &term2 );
3223 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
3224 mul64To128( zSig1, zSig1, &term2, &term3 );
3225 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
3226 while ( (sbits64) rem1 < 0 ) {
3227 --zSig1;
3228 shortShift192Left( 0, zSig0, zSig1, 1, &term1, &term2, &term3 );
3229 term3 |= 1;
3230 add192(
3231 rem1, rem2, rem3, term1, term2, term3, &rem1, &rem2, &rem3 );
3232 }
3233 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
3234 }
3235 return
3236 roundAndPackFloatx80(
Richard Purdief148af22005-08-03 19:49:17 +01003237 roundData, 0, zExp, zSig0, zSig1 );
Linus Torvalds1da177e2005-04-16 15:20:36 -07003238
3239}
3240
3241/*
3242-------------------------------------------------------------------------------
3243Returns 1 if the extended double-precision floating-point value `a' is
3244equal to the corresponding value `b', and 0 otherwise. The comparison is
3245performed according to the IEC/IEEE Standard for Binary Floating-point
3246Arithmetic.
3247-------------------------------------------------------------------------------
3248*/
3249flag floatx80_eq( floatx80 a, floatx80 b )
3250{
3251
3252 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3253 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3254 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3255 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3256 ) {
3257 if ( floatx80_is_signaling_nan( a )
3258 || floatx80_is_signaling_nan( b ) ) {
Richard Purdief148af22005-08-03 19:49:17 +01003259 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003260 }
3261 return 0;
3262 }
3263 return
3264 ( a.low == b.low )
3265 && ( ( a.high == b.high )
3266 || ( ( a.low == 0 )
3267 && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
3268 );
3269
3270}
3271
3272/*
3273-------------------------------------------------------------------------------
3274Returns 1 if the extended double-precision floating-point value `a' is
3275less than or equal to the corresponding value `b', and 0 otherwise. The
3276comparison is performed according to the IEC/IEEE Standard for Binary
3277Floating-point Arithmetic.
3278-------------------------------------------------------------------------------
3279*/
3280flag floatx80_le( floatx80 a, floatx80 b )
3281{
3282 flag aSign, bSign;
3283
3284 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3285 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3286 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3287 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3288 ) {
Richard Purdief148af22005-08-03 19:49:17 +01003289 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003290 return 0;
3291 }
3292 aSign = extractFloatx80Sign( a );
3293 bSign = extractFloatx80Sign( b );
3294 if ( aSign != bSign ) {
3295 return
3296 aSign
3297 || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
3298 == 0 );
3299 }
3300 return
3301 aSign ? le128( b.high, b.low, a.high, a.low )
3302 : le128( a.high, a.low, b.high, b.low );
3303
3304}
3305
3306/*
3307-------------------------------------------------------------------------------
3308Returns 1 if the extended double-precision floating-point value `a' is
3309less than the corresponding value `b', and 0 otherwise. The comparison
3310is performed according to the IEC/IEEE Standard for Binary Floating-point
3311Arithmetic.
3312-------------------------------------------------------------------------------
3313*/
3314flag floatx80_lt( floatx80 a, floatx80 b )
3315{
3316 flag aSign, bSign;
3317
3318 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3319 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3320 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3321 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3322 ) {
Richard Purdief148af22005-08-03 19:49:17 +01003323 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003324 return 0;
3325 }
3326 aSign = extractFloatx80Sign( a );
3327 bSign = extractFloatx80Sign( b );
3328 if ( aSign != bSign ) {
3329 return
3330 aSign
3331 && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
3332 != 0 );
3333 }
3334 return
3335 aSign ? lt128( b.high, b.low, a.high, a.low )
3336 : lt128( a.high, a.low, b.high, b.low );
3337
3338}
3339
3340/*
3341-------------------------------------------------------------------------------
3342Returns 1 if the extended double-precision floating-point value `a' is equal
3343to the corresponding value `b', and 0 otherwise. The invalid exception is
3344raised if either operand is a NaN. Otherwise, the comparison is performed
3345according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
3346-------------------------------------------------------------------------------
3347*/
3348flag floatx80_eq_signaling( floatx80 a, floatx80 b )
3349{
3350
3351 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3352 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3353 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3354 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3355 ) {
Richard Purdief148af22005-08-03 19:49:17 +01003356 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003357 return 0;
3358 }
3359 return
3360 ( a.low == b.low )
3361 && ( ( a.high == b.high )
3362 || ( ( a.low == 0 )
3363 && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
3364 );
3365
3366}
3367
3368/*
3369-------------------------------------------------------------------------------
3370Returns 1 if the extended double-precision floating-point value `a' is less
3371than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
3372do not cause an exception. Otherwise, the comparison is performed according
3373to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
3374-------------------------------------------------------------------------------
3375*/
3376flag floatx80_le_quiet( floatx80 a, floatx80 b )
3377{
3378 flag aSign, bSign;
3379
3380 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3381 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3382 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3383 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3384 ) {
3385 if ( floatx80_is_signaling_nan( a )
3386 || floatx80_is_signaling_nan( b ) ) {
Richard Purdief148af22005-08-03 19:49:17 +01003387 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003388 }
3389 return 0;
3390 }
3391 aSign = extractFloatx80Sign( a );
3392 bSign = extractFloatx80Sign( b );
3393 if ( aSign != bSign ) {
3394 return
3395 aSign
3396 || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
3397 == 0 );
3398 }
3399 return
3400 aSign ? le128( b.high, b.low, a.high, a.low )
3401 : le128( a.high, a.low, b.high, b.low );
3402
3403}
3404
3405/*
3406-------------------------------------------------------------------------------
3407Returns 1 if the extended double-precision floating-point value `a' is less
3408than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
3409an exception. Otherwise, the comparison is performed according to the
3410IEC/IEEE Standard for Binary Floating-point Arithmetic.
3411-------------------------------------------------------------------------------
3412*/
3413flag floatx80_lt_quiet( floatx80 a, floatx80 b )
3414{
3415 flag aSign, bSign;
3416
3417 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3418 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3419 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3420 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3421 ) {
3422 if ( floatx80_is_signaling_nan( a )
3423 || floatx80_is_signaling_nan( b ) ) {
Richard Purdief148af22005-08-03 19:49:17 +01003424 roundData->exception |= float_flag_invalid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003425 }
3426 return 0;
3427 }
3428 aSign = extractFloatx80Sign( a );
3429 bSign = extractFloatx80Sign( b );
3430 if ( aSign != bSign ) {
3431 return
3432 aSign
3433 && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
3434 != 0 );
3435 }
3436 return
3437 aSign ? lt128( b.high, b.low, a.high, a.low )
3438 : lt128( a.high, a.low, b.high, b.low );
3439
3440}
3441
3442#endif
3443