blob: cf2a6173149e3e5653618dba5f4c102950774329 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001
2/*
3===============================================================================
4
5This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
6Arithmetic Package, Release 2.
7
8Written by John R. Hauser. This work was made possible in part by the
9International Computer Science Institute, located at Suite 600, 1947 Center
10Street, Berkeley, California 94704. Funding was partially provided by the
11National Science Foundation under grant MIP-9311980. The original version
12of this code was written as part of a project to build a fixed-point vector
13processor in collaboration with the University of California at Berkeley,
14overseen by Profs. Nelson Morgan and John Wawrzynek. More information
Justin P. Mattock50a23e62010-10-16 10:36:23 -070015is available through the web page
16http://www.jhauser.us/arithmetic/SoftFloat-2b/SoftFloat-source.txt
Linus Torvalds1da177e2005-04-16 15:20:36 -070017
18THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
19has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
20TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
21PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
22AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
23
24Derivative works are acceptable, even for commercial purposes, so long as
25(1) they include prominent notice that the work is derivative, and (2) they
26include prominent notice akin to these three paragraphs for those parts of
27this code that are retained.
28
29===============================================================================
30*/
31
32/*
33-------------------------------------------------------------------------------
34Shifts `a' right by the number of bits given in `count'. If any nonzero
35bits are shifted off, they are ``jammed'' into the least significant bit of
36the result by setting the least significant bit to 1. The value of `count'
37can be arbitrarily large; in particular, if `count' is greater than 32, the
38result will be either 0 or 1, depending on whether `a' is zero or nonzero.
39The result is stored in the location pointed to by `zPtr'.
40-------------------------------------------------------------------------------
41*/
42INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
43{
44 bits32 z;
45 if ( count == 0 ) {
46 z = a;
47 }
48 else if ( count < 32 ) {
49 z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
50 }
51 else {
52 z = ( a != 0 );
53 }
54 *zPtr = z;
55}
56
57/*
58-------------------------------------------------------------------------------
59Shifts `a' right by the number of bits given in `count'. If any nonzero
60bits are shifted off, they are ``jammed'' into the least significant bit of
61the result by setting the least significant bit to 1. The value of `count'
62can be arbitrarily large; in particular, if `count' is greater than 64, the
63result will be either 0 or 1, depending on whether `a' is zero or nonzero.
64The result is stored in the location pointed to by `zPtr'.
65-------------------------------------------------------------------------------
66*/
67INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
68{
69 bits64 z;
70
71 __asm__("@shift64RightJamming -- start");
72 if ( count == 0 ) {
73 z = a;
74 }
75 else if ( count < 64 ) {
76 z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
77 }
78 else {
79 z = ( a != 0 );
80 }
81 __asm__("@shift64RightJamming -- end");
82 *zPtr = z;
83}
84
85/*
86-------------------------------------------------------------------------------
87Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
88_plus_ the number of bits given in `count'. The shifted result is at most
8964 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
90bits shifted off form a second 64-bit result as follows: The _last_ bit
91shifted off is the most-significant bit of the extra result, and the other
9263 bits of the extra result are all zero if and only if _all_but_the_last_
93bits shifted off were all zero. This extra result is stored in the location
94pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
95 (This routine makes more sense if `a0' and `a1' are considered to form a
96fixed-point value with binary point between `a0' and `a1'. This fixed-point
97value is shifted right by the number of bits given in `count', and the
98integer part of the result is returned at the location pointed to by
99`z0Ptr'. The fractional part of the result may be slightly corrupted as
100described above, and is returned at the location pointed to by `z1Ptr'.)
101-------------------------------------------------------------------------------
102*/
103INLINE void
104 shift64ExtraRightJamming(
105 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
106{
107 bits64 z0, z1;
108 int8 negCount = ( - count ) & 63;
109
110 if ( count == 0 ) {
111 z1 = a1;
112 z0 = a0;
113 }
114 else if ( count < 64 ) {
115 z1 = ( a0<<negCount ) | ( a1 != 0 );
116 z0 = a0>>count;
117 }
118 else {
119 if ( count == 64 ) {
120 z1 = a0 | ( a1 != 0 );
121 }
122 else {
123 z1 = ( ( a0 | a1 ) != 0 );
124 }
125 z0 = 0;
126 }
127 *z1Ptr = z1;
128 *z0Ptr = z0;
129
130}
131
132/*
133-------------------------------------------------------------------------------
134Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
135number of bits given in `count'. Any bits shifted off are lost. The value
136of `count' can be arbitrarily large; in particular, if `count' is greater
137than 128, the result will be 0. The result is broken into two 64-bit pieces
138which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
139-------------------------------------------------------------------------------
140*/
141INLINE void
142 shift128Right(
143 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
144{
145 bits64 z0, z1;
146 int8 negCount = ( - count ) & 63;
147
148 if ( count == 0 ) {
149 z1 = a1;
150 z0 = a0;
151 }
152 else if ( count < 64 ) {
153 z1 = ( a0<<negCount ) | ( a1>>count );
154 z0 = a0>>count;
155 }
156 else {
157 z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
158 z0 = 0;
159 }
160 *z1Ptr = z1;
161 *z0Ptr = z0;
162
163}
164
165/*
166-------------------------------------------------------------------------------
167Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
168number of bits given in `count'. If any nonzero bits are shifted off, they
169are ``jammed'' into the least significant bit of the result by setting the
170least significant bit to 1. The value of `count' can be arbitrarily large;
171in particular, if `count' is greater than 128, the result will be either 0
172or 1, depending on whether the concatenation of `a0' and `a1' is zero or
173nonzero. The result is broken into two 64-bit pieces which are stored at
174the locations pointed to by `z0Ptr' and `z1Ptr'.
175-------------------------------------------------------------------------------
176*/
177INLINE void
178 shift128RightJamming(
179 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
180{
181 bits64 z0, z1;
182 int8 negCount = ( - count ) & 63;
183
184 if ( count == 0 ) {
185 z1 = a1;
186 z0 = a0;
187 }
188 else if ( count < 64 ) {
189 z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
190 z0 = a0>>count;
191 }
192 else {
193 if ( count == 64 ) {
194 z1 = a0 | ( a1 != 0 );
195 }
196 else if ( count < 128 ) {
197 z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
198 }
199 else {
200 z1 = ( ( a0 | a1 ) != 0 );
201 }
202 z0 = 0;
203 }
204 *z1Ptr = z1;
205 *z0Ptr = z0;
206
207}
208
209/*
210-------------------------------------------------------------------------------
211Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
212by 64 _plus_ the number of bits given in `count'. The shifted result is
213at most 128 nonzero bits; these are broken into two 64-bit pieces which are
214stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
215off form a third 64-bit result as follows: The _last_ bit shifted off is
216the most-significant bit of the extra result, and the other 63 bits of the
217extra result are all zero if and only if _all_but_the_last_ bits shifted off
218were all zero. This extra result is stored in the location pointed to by
219`z2Ptr'. The value of `count' can be arbitrarily large.
220 (This routine makes more sense if `a0', `a1', and `a2' are considered
221to form a fixed-point value with binary point between `a1' and `a2'. This
222fixed-point value is shifted right by the number of bits given in `count',
223and the integer part of the result is returned at the locations pointed to
224by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
225corrupted as described above, and is returned at the location pointed to by
226`z2Ptr'.)
227-------------------------------------------------------------------------------
228*/
229INLINE void
230 shift128ExtraRightJamming(
231 bits64 a0,
232 bits64 a1,
233 bits64 a2,
234 int16 count,
235 bits64 *z0Ptr,
236 bits64 *z1Ptr,
237 bits64 *z2Ptr
238 )
239{
240 bits64 z0, z1, z2;
241 int8 negCount = ( - count ) & 63;
242
243 if ( count == 0 ) {
244 z2 = a2;
245 z1 = a1;
246 z0 = a0;
247 }
248 else {
249 if ( count < 64 ) {
250 z2 = a1<<negCount;
251 z1 = ( a0<<negCount ) | ( a1>>count );
252 z0 = a0>>count;
253 }
254 else {
255 if ( count == 64 ) {
256 z2 = a1;
257 z1 = a0;
258 }
259 else {
260 a2 |= a1;
261 if ( count < 128 ) {
262 z2 = a0<<negCount;
263 z1 = a0>>( count & 63 );
264 }
265 else {
266 z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
267 z1 = 0;
268 }
269 }
270 z0 = 0;
271 }
272 z2 |= ( a2 != 0 );
273 }
274 *z2Ptr = z2;
275 *z1Ptr = z1;
276 *z0Ptr = z0;
277
278}
279
280/*
281-------------------------------------------------------------------------------
282Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
283number of bits given in `count'. Any bits shifted off are lost. The value
284of `count' must be less than 64. The result is broken into two 64-bit
285pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
286-------------------------------------------------------------------------------
287*/
288INLINE void
289 shortShift128Left(
290 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
291{
292
293 *z1Ptr = a1<<count;
294 *z0Ptr =
295 ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
296
297}
298
299/*
300-------------------------------------------------------------------------------
301Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
302by the number of bits given in `count'. Any bits shifted off are lost.
303The value of `count' must be less than 64. The result is broken into three
30464-bit pieces which are stored at the locations pointed to by `z0Ptr',
305`z1Ptr', and `z2Ptr'.
306-------------------------------------------------------------------------------
307*/
308INLINE void
309 shortShift192Left(
310 bits64 a0,
311 bits64 a1,
312 bits64 a2,
313 int16 count,
314 bits64 *z0Ptr,
315 bits64 *z1Ptr,
316 bits64 *z2Ptr
317 )
318{
319 bits64 z0, z1, z2;
320 int8 negCount;
321
322 z2 = a2<<count;
323 z1 = a1<<count;
324 z0 = a0<<count;
325 if ( 0 < count ) {
326 negCount = ( ( - count ) & 63 );
327 z1 |= a2>>negCount;
328 z0 |= a1>>negCount;
329 }
330 *z2Ptr = z2;
331 *z1Ptr = z1;
332 *z0Ptr = z0;
333
334}
335
336/*
337-------------------------------------------------------------------------------
338Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
339value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
340any carry out is lost. The result is broken into two 64-bit pieces which
341are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
342-------------------------------------------------------------------------------
343*/
344INLINE void
345 add128(
346 bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
347{
348 bits64 z1;
349
350 z1 = a1 + b1;
351 *z1Ptr = z1;
352 *z0Ptr = a0 + b0 + ( z1 < a1 );
353
354}
355
356/*
357-------------------------------------------------------------------------------
358Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
359192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
360modulo 2^192, so any carry out is lost. The result is broken into three
36164-bit pieces which are stored at the locations pointed to by `z0Ptr',
362`z1Ptr', and `z2Ptr'.
363-------------------------------------------------------------------------------
364*/
365INLINE void
366 add192(
367 bits64 a0,
368 bits64 a1,
369 bits64 a2,
370 bits64 b0,
371 bits64 b1,
372 bits64 b2,
373 bits64 *z0Ptr,
374 bits64 *z1Ptr,
375 bits64 *z2Ptr
376 )
377{
378 bits64 z0, z1, z2;
379 int8 carry0, carry1;
380
381 z2 = a2 + b2;
382 carry1 = ( z2 < a2 );
383 z1 = a1 + b1;
384 carry0 = ( z1 < a1 );
385 z0 = a0 + b0;
386 z1 += carry1;
387 z0 += ( z1 < carry1 );
388 z0 += carry0;
389 *z2Ptr = z2;
390 *z1Ptr = z1;
391 *z0Ptr = z0;
392
393}
394
395/*
396-------------------------------------------------------------------------------
397Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
398128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
3992^128, so any borrow out (carry out) is lost. The result is broken into two
40064-bit pieces which are stored at the locations pointed to by `z0Ptr' and
401`z1Ptr'.
402-------------------------------------------------------------------------------
403*/
404INLINE void
405 sub128(
406 bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
407{
408
409 *z1Ptr = a1 - b1;
410 *z0Ptr = a0 - b0 - ( a1 < b1 );
411
412}
413
414/*
415-------------------------------------------------------------------------------
416Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
417from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
418Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
419result is broken into three 64-bit pieces which are stored at the locations
420pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
421-------------------------------------------------------------------------------
422*/
423INLINE void
424 sub192(
425 bits64 a0,
426 bits64 a1,
427 bits64 a2,
428 bits64 b0,
429 bits64 b1,
430 bits64 b2,
431 bits64 *z0Ptr,
432 bits64 *z1Ptr,
433 bits64 *z2Ptr
434 )
435{
436 bits64 z0, z1, z2;
437 int8 borrow0, borrow1;
438
439 z2 = a2 - b2;
440 borrow1 = ( a2 < b2 );
441 z1 = a1 - b1;
442 borrow0 = ( a1 < b1 );
443 z0 = a0 - b0;
444 z0 -= ( z1 < borrow1 );
445 z1 -= borrow1;
446 z0 -= borrow0;
447 *z2Ptr = z2;
448 *z1Ptr = z1;
449 *z0Ptr = z0;
450
451}
452
453/*
454-------------------------------------------------------------------------------
455Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
456into two 64-bit pieces which are stored at the locations pointed to by
457`z0Ptr' and `z1Ptr'.
458-------------------------------------------------------------------------------
459*/
460INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
461{
462 bits32 aHigh, aLow, bHigh, bLow;
463 bits64 z0, zMiddleA, zMiddleB, z1;
464
465 aLow = a;
466 aHigh = a>>32;
467 bLow = b;
468 bHigh = b>>32;
469 z1 = ( (bits64) aLow ) * bLow;
470 zMiddleA = ( (bits64) aLow ) * bHigh;
471 zMiddleB = ( (bits64) aHigh ) * bLow;
472 z0 = ( (bits64) aHigh ) * bHigh;
473 zMiddleA += zMiddleB;
474 z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
475 zMiddleA <<= 32;
476 z1 += zMiddleA;
477 z0 += ( z1 < zMiddleA );
478 *z1Ptr = z1;
479 *z0Ptr = z0;
480
481}
482
483/*
484-------------------------------------------------------------------------------
485Multiplies the 128-bit value formed by concatenating `a0' and `a1' by `b' to
486obtain a 192-bit product. The product is broken into three 64-bit pieces
487which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
488`z2Ptr'.
489-------------------------------------------------------------------------------
490*/
491INLINE void
492 mul128By64To192(
493 bits64 a0,
494 bits64 a1,
495 bits64 b,
496 bits64 *z0Ptr,
497 bits64 *z1Ptr,
498 bits64 *z2Ptr
499 )
500{
501 bits64 z0, z1, z2, more1;
502
503 mul64To128( a1, b, &z1, &z2 );
504 mul64To128( a0, b, &z0, &more1 );
505 add128( z0, more1, 0, z1, &z0, &z1 );
506 *z2Ptr = z2;
507 *z1Ptr = z1;
508 *z0Ptr = z0;
509
510}
511
512/*
513-------------------------------------------------------------------------------
514Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
515128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
516product. The product is broken into four 64-bit pieces which are stored at
517the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
518-------------------------------------------------------------------------------
519*/
520INLINE void
521 mul128To256(
522 bits64 a0,
523 bits64 a1,
524 bits64 b0,
525 bits64 b1,
526 bits64 *z0Ptr,
527 bits64 *z1Ptr,
528 bits64 *z2Ptr,
529 bits64 *z3Ptr
530 )
531{
532 bits64 z0, z1, z2, z3;
533 bits64 more1, more2;
534
535 mul64To128( a1, b1, &z2, &z3 );
536 mul64To128( a1, b0, &z1, &more2 );
537 add128( z1, more2, 0, z2, &z1, &z2 );
538 mul64To128( a0, b0, &z0, &more1 );
539 add128( z0, more1, 0, z1, &z0, &z1 );
540 mul64To128( a0, b1, &more1, &more2 );
541 add128( more1, more2, 0, z2, &more1, &z2 );
542 add128( z0, z1, 0, more1, &z0, &z1 );
543 *z3Ptr = z3;
544 *z2Ptr = z2;
545 *z1Ptr = z1;
546 *z0Ptr = z0;
547
548}
549
550/*
551-------------------------------------------------------------------------------
552Returns an approximation to the 64-bit integer quotient obtained by dividing
553`b' into the 128-bit value formed by concatenating `a0' and `a1'. The
554divisor `b' must be at least 2^63. If q is the exact quotient truncated
555toward zero, the approximation returned lies between q and q + 2 inclusive.
556If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
557unsigned integer is returned.
558-------------------------------------------------------------------------------
559*/
560static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
561{
562 bits64 b0, b1;
563 bits64 rem0, rem1, term0, term1;
564 bits64 z;
565 if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
Nicolas Pitrec1241c4c2005-06-23 21:56:46 +0100566 b0 = b>>32; /* hence b0 is 32 bits wide now */
567 if ( b0<<32 <= a0 ) {
568 z = LIT64( 0xFFFFFFFF00000000 );
569 } else {
570 z = a0;
571 do_div( z, b0 );
572 z <<= 32;
573 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700574 mul64To128( b, z, &term0, &term1 );
575 sub128( a0, a1, term0, term1, &rem0, &rem1 );
576 while ( ( (sbits64) rem0 ) < 0 ) {
577 z -= LIT64( 0x100000000 );
578 b1 = b<<32;
579 add128( rem0, rem1, b0, b1, &rem0, &rem1 );
580 }
581 rem0 = ( rem0<<32 ) | ( rem1>>32 );
Nicolas Pitrec1241c4c2005-06-23 21:56:46 +0100582 if ( b0<<32 <= rem0 ) {
583 z |= 0xFFFFFFFF;
584 } else {
585 do_div( rem0, b0 );
586 z |= rem0;
587 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588 return z;
589
590}
591
592/*
593-------------------------------------------------------------------------------
594Returns an approximation to the square root of the 32-bit significand given
595by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
596`aExp' (the least significant bit) is 1, the integer returned approximates
5972^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
598is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
599case, the approximation returned lies strictly within +/-2 of the exact
600value.
601-------------------------------------------------------------------------------
602*/
603static bits32 estimateSqrt32( int16 aExp, bits32 a )
604{
605 static const bits16 sqrtOddAdjustments[] = {
606 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
607 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
608 };
609 static const bits16 sqrtEvenAdjustments[] = {
610 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
611 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
612 };
613 int8 index;
614 bits32 z;
Nicolas Pitrec1241c4c2005-06-23 21:56:46 +0100615 bits64 A;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616
617 index = ( a>>27 ) & 15;
618 if ( aExp & 1 ) {
619 z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
620 z = ( ( a / z )<<14 ) + ( z<<15 );
621 a >>= 1;
622 }
623 else {
624 z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
625 z = a / z + z;
626 z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
627 if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
628 }
Nicolas Pitrec1241c4c2005-06-23 21:56:46 +0100629 A = ( (bits64) a )<<31;
630 do_div( A, z );
631 return ( (bits32) A ) + ( z>>1 );
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632
633}
634
635/*
636-------------------------------------------------------------------------------
637Returns the number of leading 0 bits before the most-significant 1 bit
638of `a'. If `a' is zero, 32 is returned.
639-------------------------------------------------------------------------------
640*/
641static int8 countLeadingZeros32( bits32 a )
642{
643 static const int8 countLeadingZerosHigh[] = {
644 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
645 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
646 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
647 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
648 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
649 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
650 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
651 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
652 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
653 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
654 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
655 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
656 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
657 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
658 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
659 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
660 };
661 int8 shiftCount;
662
663 shiftCount = 0;
664 if ( a < 0x10000 ) {
665 shiftCount += 16;
666 a <<= 16;
667 }
668 if ( a < 0x1000000 ) {
669 shiftCount += 8;
670 a <<= 8;
671 }
672 shiftCount += countLeadingZerosHigh[ a>>24 ];
673 return shiftCount;
674
675}
676
677/*
678-------------------------------------------------------------------------------
679Returns the number of leading 0 bits before the most-significant 1 bit
680of `a'. If `a' is zero, 64 is returned.
681-------------------------------------------------------------------------------
682*/
683static int8 countLeadingZeros64( bits64 a )
684{
685 int8 shiftCount;
686
687 shiftCount = 0;
688 if ( a < ( (bits64) 1 )<<32 ) {
689 shiftCount += 32;
690 }
691 else {
692 a >>= 32;
693 }
694 shiftCount += countLeadingZeros32( a );
695 return shiftCount;
696
697}
698
699/*
700-------------------------------------------------------------------------------
701Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
702is equal to the 128-bit value formed by concatenating `b0' and `b1'.
703Otherwise, returns 0.
704-------------------------------------------------------------------------------
705*/
706INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
707{
708
709 return ( a0 == b0 ) && ( a1 == b1 );
710
711}
712
713/*
714-------------------------------------------------------------------------------
715Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
716than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
717Otherwise, returns 0.
718-------------------------------------------------------------------------------
719*/
720INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
721{
722
723 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
724
725}
726
727/*
728-------------------------------------------------------------------------------
729Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
730than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
731returns 0.
732-------------------------------------------------------------------------------
733*/
734INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
735{
736
737 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
738
739}
740
741/*
742-------------------------------------------------------------------------------
743Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
744not equal to the 128-bit value formed by concatenating `b0' and `b1'.
745Otherwise, returns 0.
746-------------------------------------------------------------------------------
747*/
748INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
749{
750
751 return ( a0 != b0 ) || ( a1 != b1 );
752
753}
754