Blame - arch/arm/nwfpe/softfloat-macros - kernel/msm-4.9

blob: 5a060f95a58f1df2b6439858322e2e53b081ddba [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1
				2	/*
				3	===============================================================================
				4
				5	This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
				6	Arithmetic Package, Release 2.
				7
				8	Written by John R. Hauser. This work was made possible in part by the
				9	International Computer Science Institute, located at Suite 600, 1947 Center
				10	Street, Berkeley, California 94704. Funding was partially provided by the
				11	National Science Foundation under grant MIP-9311980. The original version
				12	of this code was written as part of a project to build a fixed-point vector
				13	processor in collaboration with the University of California at Berkeley,
				14	overseen by Profs. Nelson Morgan and John Wawrzynek. More information
				15	is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
				16	arithmetic/softfloat.html'.
				17
				18	THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
				19	has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
				20	TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
				21	PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
				22	AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
				23
				24	Derivative works are acceptable, even for commercial purposes, so long as
				25	(1) they include prominent notice that the work is derivative, and (2) they
				26	include prominent notice akin to these three paragraphs for those parts of
				27	this code that are retained.
				28
				29	===============================================================================
				30	*/
				31
				32	/*
				33	-------------------------------------------------------------------------------
				34	Shifts `a' right by the number of bits given in `count'. If any nonzero
				35	bits are shifted off, they are ``jammed'' into the least significant bit of
				36	the result by setting the least significant bit to 1. The value of `count'
				37	can be arbitrarily large; in particular, if `count' is greater than 32, the
				38	result will be either 0 or 1, depending on whether `a' is zero or nonzero.
				39	The result is stored in the location pointed to by `zPtr'.
				40	-------------------------------------------------------------------------------
				41	*/
				42	INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
				43	{
				44	bits32 z;
				45	if ( count == 0 ) {
				46	z = a;
				47	}
				48	else if ( count < 32 ) {
				49	z = ( a>>count ) \| ( ( a<<( ( - count ) & 31 ) ) != 0 );
				50	}
				51	else {
				52	z = ( a != 0 );
				53	}
				54	*zPtr = z;
				55	}
				56
				57	/*
				58	-------------------------------------------------------------------------------
				59	Shifts `a' right by the number of bits given in `count'. If any nonzero
				60	bits are shifted off, they are ``jammed'' into the least significant bit of
				61	the result by setting the least significant bit to 1. The value of `count'
				62	can be arbitrarily large; in particular, if `count' is greater than 64, the
				63	result will be either 0 or 1, depending on whether `a' is zero or nonzero.
				64	The result is stored in the location pointed to by `zPtr'.
				65	-------------------------------------------------------------------------------
				66	*/
				67	INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
				68	{
				69	bits64 z;
				70
				71	__asm__("@shift64RightJamming -- start");
				72	if ( count == 0 ) {
				73	z = a;
				74	}
				75	else if ( count < 64 ) {
				76	z = ( a>>count ) \| ( ( a<<( ( - count ) & 63 ) ) != 0 );
				77	}
				78	else {
				79	z = ( a != 0 );
				80	}
				81	__asm__("@shift64RightJamming -- end");
				82	*zPtr = z;
				83	}
				84
				85	/*
				86	-------------------------------------------------------------------------------
				87	Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
				88	_plus_ the number of bits given in `count'. The shifted result is at most
				89	64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
				90	bits shifted off form a second 64-bit result as follows: The _last_ bit
				91	shifted off is the most-significant bit of the extra result, and the other
				92	63 bits of the extra result are all zero if and only if _all_but_the_last_
				93	bits shifted off were all zero. This extra result is stored in the location
				94	pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
				95	(This routine makes more sense if `a0' and `a1' are considered to form a
				96	fixed-point value with binary point between `a0' and `a1'. This fixed-point
				97	value is shifted right by the number of bits given in `count', and the
				98	integer part of the result is returned at the location pointed to by
				99	`z0Ptr'. The fractional part of the result may be slightly corrupted as
				100	described above, and is returned at the location pointed to by `z1Ptr'.)
				101	-------------------------------------------------------------------------------
				102	*/
				103	INLINE void
				104	shift64ExtraRightJamming(
				105	bits64 a0, bits64 a1, int16 count, bits64 z0Ptr, bits64 z1Ptr )
				106	{
				107	bits64 z0, z1;
				108	int8 negCount = ( - count ) & 63;
				109
				110	if ( count == 0 ) {
				111	z1 = a1;
				112	z0 = a0;
				113	}
				114	else if ( count < 64 ) {
				115	z1 = ( a0<<negCount ) \| ( a1 != 0 );
				116	z0 = a0>>count;
				117	}
				118	else {
				119	if ( count == 64 ) {
				120	z1 = a0 \| ( a1 != 0 );
				121	}
				122	else {
				123	z1 = ( ( a0 \| a1 ) != 0 );
				124	}
				125	z0 = 0;
				126	}
				127	*z1Ptr = z1;
				128	*z0Ptr = z0;
				129
				130	}
				131
				132	/*
				133	-------------------------------------------------------------------------------
				134	Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
				135	number of bits given in `count'. Any bits shifted off are lost. The value
				136	of `count' can be arbitrarily large; in particular, if `count' is greater
				137	than 128, the result will be 0. The result is broken into two 64-bit pieces
				138	which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
				139	-------------------------------------------------------------------------------
				140	*/
				141	INLINE void
				142	shift128Right(
				143	bits64 a0, bits64 a1, int16 count, bits64 z0Ptr, bits64 z1Ptr )
				144	{
				145	bits64 z0, z1;
				146	int8 negCount = ( - count ) & 63;
				147
				148	if ( count == 0 ) {
				149	z1 = a1;
				150	z0 = a0;
				151	}
				152	else if ( count < 64 ) {
				153	z1 = ( a0<<negCount ) \| ( a1>>count );
				154	z0 = a0>>count;
				155	}
				156	else {
				157	z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
				158	z0 = 0;
				159	}
				160	*z1Ptr = z1;
				161	*z0Ptr = z0;
				162
				163	}
				164
				165	/*
				166	-------------------------------------------------------------------------------
				167	Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
				168	number of bits given in `count'. If any nonzero bits are shifted off, they
				169	are ``jammed'' into the least significant bit of the result by setting the
				170	least significant bit to 1. The value of `count' can be arbitrarily large;
				171	in particular, if `count' is greater than 128, the result will be either 0
				172	or 1, depending on whether the concatenation of `a0' and `a1' is zero or
				173	nonzero. The result is broken into two 64-bit pieces which are stored at
				174	the locations pointed to by `z0Ptr' and `z1Ptr'.
				175	-------------------------------------------------------------------------------
				176	*/
				177	INLINE void
				178	shift128RightJamming(
				179	bits64 a0, bits64 a1, int16 count, bits64 z0Ptr, bits64 z1Ptr )
				180	{
				181	bits64 z0, z1;
				182	int8 negCount = ( - count ) & 63;
				183
				184	if ( count == 0 ) {
				185	z1 = a1;
				186	z0 = a0;
				187	}
				188	else if ( count < 64 ) {
				189	z1 = ( a0<<negCount ) \| ( a1>>count ) \| ( ( a1<<negCount ) != 0 );
				190	z0 = a0>>count;
				191	}
				192	else {
				193	if ( count == 64 ) {
				194	z1 = a0 \| ( a1 != 0 );
				195	}
				196	else if ( count < 128 ) {
				197	z1 = ( a0>>( count & 63 ) ) \| ( ( ( a0<<negCount ) \| a1 ) != 0 );
				198	}
				199	else {
				200	z1 = ( ( a0 \| a1 ) != 0 );
				201	}
				202	z0 = 0;
				203	}
				204	*z1Ptr = z1;
				205	*z0Ptr = z0;
				206
				207	}
				208
				209	/*
				210	-------------------------------------------------------------------------------
				211	Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
				212	by 64 _plus_ the number of bits given in `count'. The shifted result is
				213	at most 128 nonzero bits; these are broken into two 64-bit pieces which are
				214	stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
				215	off form a third 64-bit result as follows: The _last_ bit shifted off is
				216	the most-significant bit of the extra result, and the other 63 bits of the
				217	extra result are all zero if and only if _all_but_the_last_ bits shifted off
				218	were all zero. This extra result is stored in the location pointed to by
				219	`z2Ptr'. The value of `count' can be arbitrarily large.
				220	(This routine makes more sense if `a0', `a1', and `a2' are considered
				221	to form a fixed-point value with binary point between `a1' and `a2'. This
				222	fixed-point value is shifted right by the number of bits given in `count',
				223	and the integer part of the result is returned at the locations pointed to
				224	by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
				225	corrupted as described above, and is returned at the location pointed to by
				226	`z2Ptr'.)
				227	-------------------------------------------------------------------------------
				228	*/
				229	INLINE void
				230	shift128ExtraRightJamming(
				231	bits64 a0,
				232	bits64 a1,
				233	bits64 a2,
				234	int16 count,
				235	bits64 *z0Ptr,
				236	bits64 *z1Ptr,
				237	bits64 *z2Ptr
				238	)
				239	{
				240	bits64 z0, z1, z2;
				241	int8 negCount = ( - count ) & 63;
				242
				243	if ( count == 0 ) {
				244	z2 = a2;
				245	z1 = a1;
				246	z0 = a0;
				247	}
				248	else {
				249	if ( count < 64 ) {
				250	z2 = a1<<negCount;
				251	z1 = ( a0<<negCount ) \| ( a1>>count );
				252	z0 = a0>>count;
				253	}
				254	else {
				255	if ( count == 64 ) {
				256	z2 = a1;
				257	z1 = a0;
				258	}
				259	else {
				260	a2 \|= a1;
				261	if ( count < 128 ) {
				262	z2 = a0<<negCount;
				263	z1 = a0>>( count & 63 );
				264	}
				265	else {
				266	z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
				267	z1 = 0;
				268	}
				269	}
				270	z0 = 0;
				271	}
				272	z2 \|= ( a2 != 0 );
				273	}
				274	*z2Ptr = z2;
				275	*z1Ptr = z1;
				276	*z0Ptr = z0;
				277
				278	}
				279
				280	/*
				281	-------------------------------------------------------------------------------
				282	Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
				283	number of bits given in `count'. Any bits shifted off are lost. The value
				284	of `count' must be less than 64. The result is broken into two 64-bit
				285	pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
				286	-------------------------------------------------------------------------------
				287	*/
				288	INLINE void
				289	shortShift128Left(
				290	bits64 a0, bits64 a1, int16 count, bits64 z0Ptr, bits64 z1Ptr )
				291	{
				292
				293	*z1Ptr = a1<<count;
				294	*z0Ptr =
				295	( count == 0 ) ? a0 : ( a0<<count ) \| ( a1>>( ( - count ) & 63 ) );
				296
				297	}
				298
				299	/*
				300	-------------------------------------------------------------------------------
				301	Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
				302	by the number of bits given in `count'. Any bits shifted off are lost.
				303	The value of `count' must be less than 64. The result is broken into three
				304	64-bit pieces which are stored at the locations pointed to by `z0Ptr',
				305	`z1Ptr', and `z2Ptr'.
				306	-------------------------------------------------------------------------------
				307	*/
				308	INLINE void
				309	shortShift192Left(
				310	bits64 a0,
				311	bits64 a1,
				312	bits64 a2,
				313	int16 count,
				314	bits64 *z0Ptr,
				315	bits64 *z1Ptr,
				316	bits64 *z2Ptr
				317	)
				318	{
				319	bits64 z0, z1, z2;
				320	int8 negCount;
				321
				322	z2 = a2<<count;
				323	z1 = a1<<count;
				324	z0 = a0<<count;
				325	if ( 0 < count ) {
				326	negCount = ( ( - count ) & 63 );
				327	z1 \|= a2>>negCount;
				328	z0 \|= a1>>negCount;
				329	}
				330	*z2Ptr = z2;
				331	*z1Ptr = z1;
				332	*z0Ptr = z0;
				333
				334	}
				335
				336	/*
				337	-------------------------------------------------------------------------------
				338	Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
				339	value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
				340	any carry out is lost. The result is broken into two 64-bit pieces which
				341	are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
				342	-------------------------------------------------------------------------------
				343	*/
				344	INLINE void
				345	add128(
				346	bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 z0Ptr, bits64 z1Ptr )
				347	{
				348	bits64 z1;
				349
				350	z1 = a1 + b1;
				351	*z1Ptr = z1;
				352	*z0Ptr = a0 + b0 + ( z1 < a1 );
				353
				354	}
				355
				356	/*
				357	-------------------------------------------------------------------------------
				358	Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
				359	192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
				360	modulo 2^192, so any carry out is lost. The result is broken into three
				361	64-bit pieces which are stored at the locations pointed to by `z0Ptr',
				362	`z1Ptr', and `z2Ptr'.
				363	-------------------------------------------------------------------------------
				364	*/
				365	INLINE void
				366	add192(
				367	bits64 a0,
				368	bits64 a1,
				369	bits64 a2,
				370	bits64 b0,
				371	bits64 b1,
				372	bits64 b2,
				373	bits64 *z0Ptr,
				374	bits64 *z1Ptr,
				375	bits64 *z2Ptr
				376	)
				377	{
				378	bits64 z0, z1, z2;
				379	int8 carry0, carry1;
				380
				381	z2 = a2 + b2;
				382	carry1 = ( z2 < a2 );
				383	z1 = a1 + b1;
				384	carry0 = ( z1 < a1 );
				385	z0 = a0 + b0;
				386	z1 += carry1;
				387	z0 += ( z1 < carry1 );
				388	z0 += carry0;
				389	*z2Ptr = z2;
				390	*z1Ptr = z1;
				391	*z0Ptr = z0;
				392
				393	}
				394
				395	/*
				396	-------------------------------------------------------------------------------
				397	Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
				398	128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
				399	2^128, so any borrow out (carry out) is lost. The result is broken into two
				400	64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
				401	`z1Ptr'.
				402	-------------------------------------------------------------------------------
				403	*/
				404	INLINE void
				405	sub128(
				406	bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 z0Ptr, bits64 z1Ptr )
				407	{
				408
				409	*z1Ptr = a1 - b1;
				410	*z0Ptr = a0 - b0 - ( a1 < b1 );
				411
				412	}
				413
				414	/*
				415	-------------------------------------------------------------------------------
				416	Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
				417	from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
				418	Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
				419	result is broken into three 64-bit pieces which are stored at the locations
				420	pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
				421	-------------------------------------------------------------------------------
				422	*/
				423	INLINE void
				424	sub192(
				425	bits64 a0,
				426	bits64 a1,
				427	bits64 a2,
				428	bits64 b0,
				429	bits64 b1,
				430	bits64 b2,
				431	bits64 *z0Ptr,
				432	bits64 *z1Ptr,
				433	bits64 *z2Ptr
				434	)
				435	{
				436	bits64 z0, z1, z2;
				437	int8 borrow0, borrow1;
				438
				439	z2 = a2 - b2;
				440	borrow1 = ( a2 < b2 );
				441	z1 = a1 - b1;
				442	borrow0 = ( a1 < b1 );
				443	z0 = a0 - b0;
				444	z0 -= ( z1 < borrow1 );
				445	z1 -= borrow1;
				446	z0 -= borrow0;
				447	*z2Ptr = z2;
				448	*z1Ptr = z1;
				449	*z0Ptr = z0;
				450
				451	}
				452
				453	/*
				454	-------------------------------------------------------------------------------
				455	Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
				456	into two 64-bit pieces which are stored at the locations pointed to by
				457	`z0Ptr' and `z1Ptr'.
				458	-------------------------------------------------------------------------------
				459	*/
				460	INLINE void mul64To128( bits64 a, bits64 b, bits64 z0Ptr, bits64 z1Ptr )
				461	{
				462	bits32 aHigh, aLow, bHigh, bLow;
				463	bits64 z0, zMiddleA, zMiddleB, z1;
				464
				465	aLow = a;
				466	aHigh = a>>32;
				467	bLow = b;
				468	bHigh = b>>32;
				469	z1 = ( (bits64) aLow ) * bLow;
				470	zMiddleA = ( (bits64) aLow ) * bHigh;
				471	zMiddleB = ( (bits64) aHigh ) * bLow;
				472	z0 = ( (bits64) aHigh ) * bHigh;
				473	zMiddleA += zMiddleB;
				474	z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
				475	zMiddleA <<= 32;
				476	z1 += zMiddleA;
				477	z0 += ( z1 < zMiddleA );
				478	*z1Ptr = z1;
				479	*z0Ptr = z0;
				480
				481	}
				482
				483	/*
				484	-------------------------------------------------------------------------------
				485	Multiplies the 128-bit value formed by concatenating `a0' and `a1' by `b' to
				486	obtain a 192-bit product. The product is broken into three 64-bit pieces
				487	which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
				488	`z2Ptr'.
				489	-------------------------------------------------------------------------------
				490	*/
				491	INLINE void
				492	mul128By64To192(
				493	bits64 a0,
				494	bits64 a1,
				495	bits64 b,
				496	bits64 *z0Ptr,
				497	bits64 *z1Ptr,
				498	bits64 *z2Ptr
				499	)
				500	{
				501	bits64 z0, z1, z2, more1;
				502
				503	mul64To128( a1, b, &z1, &z2 );
				504	mul64To128( a0, b, &z0, &more1 );
				505	add128( z0, more1, 0, z1, &z0, &z1 );
				506	*z2Ptr = z2;
				507	*z1Ptr = z1;
				508	*z0Ptr = z0;
				509
				510	}
				511
				512	/*
				513	-------------------------------------------------------------------------------
				514	Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
				515	128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
				516	product. The product is broken into four 64-bit pieces which are stored at
				517	the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
				518	-------------------------------------------------------------------------------
				519	*/
				520	INLINE void
				521	mul128To256(
				522	bits64 a0,
				523	bits64 a1,
				524	bits64 b0,
				525	bits64 b1,
				526	bits64 *z0Ptr,
				527	bits64 *z1Ptr,
				528	bits64 *z2Ptr,
				529	bits64 *z3Ptr
				530	)
				531	{
				532	bits64 z0, z1, z2, z3;
				533	bits64 more1, more2;
				534
				535	mul64To128( a1, b1, &z2, &z3 );
				536	mul64To128( a1, b0, &z1, &more2 );
				537	add128( z1, more2, 0, z2, &z1, &z2 );
				538	mul64To128( a0, b0, &z0, &more1 );
				539	add128( z0, more1, 0, z1, &z0, &z1 );
				540	mul64To128( a0, b1, &more1, &more2 );
				541	add128( more1, more2, 0, z2, &more1, &z2 );
				542	add128( z0, z1, 0, more1, &z0, &z1 );
				543	*z3Ptr = z3;
				544	*z2Ptr = z2;
				545	*z1Ptr = z1;
				546	*z0Ptr = z0;
				547
				548	}
				549
				550	/*
				551	-------------------------------------------------------------------------------
				552	Returns an approximation to the 64-bit integer quotient obtained by dividing
				553	`b' into the 128-bit value formed by concatenating `a0' and `a1'. The
				554	divisor `b' must be at least 2^63. If q is the exact quotient truncated
				555	toward zero, the approximation returned lies between q and q + 2 inclusive.
				556	If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
				557	unsigned integer is returned.
				558	-------------------------------------------------------------------------------
				559	*/
				560	static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
				561	{
				562	bits64 b0, b1;
				563	bits64 rem0, rem1, term0, term1;
				564	bits64 z;
				565	if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
Nicolas Pitre	c1241c4c	2005-06-23 21:56:46 +0100	[diff] [blame]	566	b0 = b>>32; /* hence b0 is 32 bits wide now */
				567	if ( b0<<32 <= a0 ) {
				568	z = LIT64( 0xFFFFFFFF00000000 );
				569	} else {
				570	z = a0;
				571	do_div( z, b0 );
				572	z <<= 32;
				573	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	574	mul64To128( b, z, &term0, &term1 );
				575	sub128( a0, a1, term0, term1, &rem0, &rem1 );
				576	while ( ( (sbits64) rem0 ) < 0 ) {
				577	z -= LIT64( 0x100000000 );
				578	b1 = b<<32;
				579	add128( rem0, rem1, b0, b1, &rem0, &rem1 );
				580	}
				581	rem0 = ( rem0<<32 ) \| ( rem1>>32 );
Nicolas Pitre	c1241c4c	2005-06-23 21:56:46 +0100	[diff] [blame]	582	if ( b0<<32 <= rem0 ) {
				583	z \|= 0xFFFFFFFF;
				584	} else {
				585	do_div( rem0, b0 );
				586	z \|= rem0;
				587	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	588	return z;
				589
				590	}
				591
				592	/*
				593	-------------------------------------------------------------------------------
				594	Returns an approximation to the square root of the 32-bit significand given
				595	by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
				596	`aExp' (the least significant bit) is 1, the integer returned approximates
				597	2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
				598	is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
				599	case, the approximation returned lies strictly within +/-2 of the exact
				600	value.
				601	-------------------------------------------------------------------------------
				602	*/
				603	static bits32 estimateSqrt32( int16 aExp, bits32 a )
				604	{
				605	static const bits16 sqrtOddAdjustments[] = {
				606	0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
				607	0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
				608	};
				609	static const bits16 sqrtEvenAdjustments[] = {
				610	0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
				611	0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
				612	};
				613	int8 index;
				614	bits32 z;
Nicolas Pitre	c1241c4c	2005-06-23 21:56:46 +0100	[diff] [blame]	615	bits64 A;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	616
				617	index = ( a>>27 ) & 15;
				618	if ( aExp & 1 ) {
				619	z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
				620	z = ( ( a / z )<<14 ) + ( z<<15 );
				621	a >>= 1;
				622	}
				623	else {
				624	z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
				625	z = a / z + z;
				626	z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
				627	if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
				628	}
Nicolas Pitre	c1241c4c	2005-06-23 21:56:46 +0100	[diff] [blame]	629	A = ( (bits64) a )<<31;
				630	do_div( A, z );
				631	return ( (bits32) A ) + ( z>>1 );
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	632
				633	}
				634
				635	/*
				636	-------------------------------------------------------------------------------
				637	Returns the number of leading 0 bits before the most-significant 1 bit
				638	of `a'. If `a' is zero, 32 is returned.
				639	-------------------------------------------------------------------------------
				640	*/
				641	static int8 countLeadingZeros32( bits32 a )
				642	{
				643	static const int8 countLeadingZerosHigh[] = {
				644	8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
				645	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
				646	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
				647	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
				648	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				649	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				650	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				651	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
				652	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				653	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				654	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				655	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				656	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				657	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				658	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				659	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
				660	};
				661	int8 shiftCount;
				662
				663	shiftCount = 0;
				664	if ( a < 0x10000 ) {
				665	shiftCount += 16;
				666	a <<= 16;
				667	}
				668	if ( a < 0x1000000 ) {
				669	shiftCount += 8;
				670	a <<= 8;
				671	}
				672	shiftCount += countLeadingZerosHigh[ a>>24 ];
				673	return shiftCount;
				674
				675	}
				676
				677	/*
				678	-------------------------------------------------------------------------------
				679	Returns the number of leading 0 bits before the most-significant 1 bit
				680	of `a'. If `a' is zero, 64 is returned.
				681	-------------------------------------------------------------------------------
				682	*/
				683	static int8 countLeadingZeros64( bits64 a )
				684	{
				685	int8 shiftCount;
				686
				687	shiftCount = 0;
				688	if ( a < ( (bits64) 1 )<<32 ) {
				689	shiftCount += 32;
				690	}
				691	else {
				692	a >>= 32;
				693	}
				694	shiftCount += countLeadingZeros32( a );
				695	return shiftCount;
				696
				697	}
				698
				699	/*
				700	-------------------------------------------------------------------------------
				701	Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
				702	is equal to the 128-bit value formed by concatenating `b0' and `b1'.
				703	Otherwise, returns 0.
				704	-------------------------------------------------------------------------------
				705	*/
				706	INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
				707	{
				708
				709	return ( a0 == b0 ) && ( a1 == b1 );
				710
				711	}
				712
				713	/*
				714	-------------------------------------------------------------------------------
				715	Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
				716	than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
				717	Otherwise, returns 0.
				718	-------------------------------------------------------------------------------
				719	*/
				720	INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
				721	{
				722
				723	return ( a0 < b0 ) \|\| ( ( a0 == b0 ) && ( a1 <= b1 ) );
				724
				725	}
				726
				727	/*
				728	-------------------------------------------------------------------------------
				729	Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
				730	than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
				731	returns 0.
				732	-------------------------------------------------------------------------------
				733	*/
				734	INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
				735	{
				736
				737	return ( a0 < b0 ) \|\| ( ( a0 == b0 ) && ( a1 < b1 ) );
				738
				739	}
				740
				741	/*
				742	-------------------------------------------------------------------------------
				743	Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
				744	not equal to the 128-bit value formed by concatenating `b0' and `b1'.
				745	Otherwise, returns 0.
				746	-------------------------------------------------------------------------------
				747	*/
				748	INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
				749	{
				750
				751	return ( a0 != b0 ) \|\| ( a1 != b1 );
				752
				753	}
				754