Blame - arch/x86/math-emu/wm_sqrt.S - kernel/msm

blob: d258f59564e11b71a37148873bbf61b41d3c4b82 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	.file "wm_sqrt.S"
				2	/*---------------------------------------------------------------------------+
				3	\| wm_sqrt.S \|
				4	\| \|
				5	\| Fixed point arithmetic square root evaluation. \|
				6	\| \|
				7	\| Copyright (C) 1992,1993,1995,1997 \|
				8	\| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, \|
				9	\| Australia. E-mail billm@suburbia.net \|
				10	\| \|
				11	\| Call from C as: \|
				12	\| int wm_sqrt(FPU_REG *n, unsigned int control_word) \|
				13	\| \|
				14	+---------------------------------------------------------------------------*/
				15
				16	/*---------------------------------------------------------------------------+
				17	\| wm_sqrt(FPU_REG *n, unsigned int control_word) \|
				18	\| returns the square root of n in n. \|
				19	\| \|
				20	\| Use Newton's method to compute the square root of a number, which must \|
				21	\| be in the range [1.0 .. 4.0), to 64 bits accuracy. \|
				22	\| Does not check the sign or tag of the argument. \|
				23	\| Sets the exponent, but not the sign or tag of the result. \|
				24	\| \|
				25	\| The guess is kept in %esi:%edi \|
				26	+---------------------------------------------------------------------------*/
				27
				28	#include "exception.h"
				29	#include "fpu_emu.h"
				30
				31
				32	#ifndef NON_REENTRANT_FPU
				33	/* Local storage on the stack: */
				34	#define FPU_accum_3 -4(%ebp) /* ms word */
				35	#define FPU_accum_2 -8(%ebp)
				36	#define FPU_accum_1 -12(%ebp)
				37	#define FPU_accum_0 -16(%ebp)
				38
				39	/*
				40	* The de-normalised argument:
				41	* sq_2 sq_1 sq_0
				42	* b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
				43	* ^ binary point here
				44	*/
				45	#define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */
				46	#define FPU_fsqrt_arg_1 -24(%ebp)
				47	#define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */
				48
				49	#else
				50	/* Local storage in a static area: */
				51	.data
				52	.align 4,0
				53	FPU_accum_3:
				54	.long 0 /* ms word */
				55	FPU_accum_2:
				56	.long 0
				57	FPU_accum_1:
				58	.long 0
				59	FPU_accum_0:
				60	.long 0
				61
				62	/* The de-normalised argument:
				63	sq_2 sq_1 sq_0
				64	b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
				65	^ binary point here
				66	*/
				67	FPU_fsqrt_arg_2:
				68	.long 0 /* ms word */
				69	FPU_fsqrt_arg_1:
				70	.long 0
				71	FPU_fsqrt_arg_0:
				72	.long 0 /* ls word, at most the ms bit is set */
				73	#endif /* NON_REENTRANT_FPU */
				74
				75
				76	.text
				77	ENTRY(wm_sqrt)
				78	pushl %ebp
				79	movl %esp,%ebp
				80	#ifndef NON_REENTRANT_FPU
				81	subl $28,%esp
				82	#endif /* NON_REENTRANT_FPU */
				83	pushl %esi
				84	pushl %edi
				85	pushl %ebx
				86
				87	movl PARAM1,%esi
				88
				89	movl SIGH(%esi),%eax
				90	movl SIGL(%esi),%ecx
				91	xorl %edx,%edx
				92
				93	/* We use a rough linear estimate for the first guess.. */
				94
				95	cmpw EXP_BIAS,EXP(%esi)
				96	jnz sqrt_arg_ge_2
				97
				98	shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */
				99	rcrl $1,%ecx
				100	rcrl $1,%edx
				101
				102	sqrt_arg_ge_2:
				103	/* From here on, n is never accessed directly again until it is
				104	replaced by the answer. */
				105
				106	movl %eax,FPU_fsqrt_arg_2 /* ms word of n */
				107	movl %ecx,FPU_fsqrt_arg_1
				108	movl %edx,FPU_fsqrt_arg_0
				109
				110	/* Make a linear first estimate */
				111	shrl $1,%eax
				112	addl $0x40000000,%eax
				113	movl $0xaaaaaaaa,%ecx
				114	mull %ecx
				115	shll %edx /* max result was 7fff... */
				116	testl $0x80000000,%edx /* but min was 3fff... */
				117	jnz sqrt_prelim_no_adjust
				118
				119	movl $0x80000000,%edx /* round up */
				120
				121	sqrt_prelim_no_adjust:
				122	movl %edx,%esi /* Our first guess */
				123
				124	/* We have now computed (approx) (2 + x) / 3, which forms the basis
				125	for a few iterations of Newton's method */
				126
				127	movl FPU_fsqrt_arg_2,%ecx /* ms word */
				128
				129	/*
				130	* From our initial estimate, three iterations are enough to get us
				131	* to 30 bits or so. This will then allow two iterations at better
				132	* precision to complete the process.
				133	*/
				134
				135	/* Compute (g + n/g)/2 at each iteration (g is the guess). */
				136	shrl %ecx /* Doing this first will prevent a divide */
				137	/* overflow later. */
				138
				139	movl %ecx,%edx /* msw of the arg / 2 */
				140	divl %esi /* current estimate */
				141	shrl %esi /* divide by 2 */
				142	addl %eax,%esi /* the new estimate */
				143
				144	movl %ecx,%edx
				145	divl %esi
				146	shrl %esi
				147	addl %eax,%esi
				148
				149	movl %ecx,%edx
				150	divl %esi
				151	shrl %esi
				152	addl %eax,%esi
				153
				154	/*
				155	* Now that an estimate accurate to about 30 bits has been obtained (in %esi),
				156	* we improve it to 60 bits or so.
				157	*
				158	* The strategy from now on is to compute new estimates from
				159	* guess := guess + (n - guess^2) / (2 * guess)
				160	*/
				161
				162	/* First, find the square of the guess */
				163	movl %esi,%eax
				164	mull %esi
				165	/* guess^2 now in %edx:%eax */
				166
				167	movl FPU_fsqrt_arg_1,%ecx
				168	subl %ecx,%eax
				169	movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */
				170	sbbl %ecx,%edx
				171	jnc sqrt_stage_2_positive
				172
				173	/* Subtraction gives a negative result,
				174	negate the result before division. */
				175	notl %edx
				176	notl %eax
				177	addl $1,%eax
				178	adcl $0,%edx
				179
				180	divl %esi
				181	movl %eax,%ecx
				182
				183	movl %edx,%eax
				184	divl %esi
				185	jmp sqrt_stage_2_finish
				186
				187	sqrt_stage_2_positive:
				188	divl %esi
				189	movl %eax,%ecx
				190
				191	movl %edx,%eax
				192	divl %esi
				193
				194	notl %ecx
				195	notl %eax
				196	addl $1,%eax
				197	adcl $0,%ecx
				198
				199	sqrt_stage_2_finish:
				200	sarl $1,%ecx /* divide by 2 */
				201	rcrl $1,%eax
				202
				203	/* Form the new estimate in %esi:%edi */
				204	movl %eax,%edi
				205	addl %ecx,%esi
				206
				207	jnz sqrt_stage_2_done /* result should be [1..2) */
				208
				209	#ifdef PARANOID
				210	/* It should be possible to get here only if the arg is ffff....ffff */
				211	cmp $0xffffffff,FPU_fsqrt_arg_1
				212	jnz sqrt_stage_2_error
				213	#endif /* PARANOID */
				214
				215	/* The best rounded result. */
				216	xorl %eax,%eax
				217	decl %eax
				218	movl %eax,%edi
				219	movl %eax,%esi
				220	movl $0x7fffffff,%eax
				221	jmp sqrt_round_result
				222
				223	#ifdef PARANOID
				224	sqrt_stage_2_error:
				225	pushl EX_INTERNAL\|0x213
				226	call EXCEPTION
				227	#endif /* PARANOID */
				228
				229	sqrt_stage_2_done:
				230
				231	/* Now the square root has been computed to better than 60 bits. */
				232
				233	/* Find the square of the guess. */
				234	movl %edi,%eax /* ls word of guess */
				235	mull %edi
				236	movl %edx,FPU_accum_1
				237
				238	movl %esi,%eax
				239	mull %esi
				240	movl %edx,FPU_accum_3
				241	movl %eax,FPU_accum_2
				242
				243	movl %edi,%eax
				244	mull %esi
				245	addl %eax,FPU_accum_1
				246	adcl %edx,FPU_accum_2
				247	adcl $0,FPU_accum_3
				248
				249	/* movl %esi,%eax */
				250	/* mull %edi */
				251	addl %eax,FPU_accum_1
				252	adcl %edx,FPU_accum_2
				253	adcl $0,FPU_accum_3
				254
				255	/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
				256
				257	movl FPU_fsqrt_arg_0,%eax /* get normalized n */
				258	subl %eax,FPU_accum_1
				259	movl FPU_fsqrt_arg_1,%eax
				260	sbbl %eax,FPU_accum_2
				261	movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */
				262	sbbl %eax,FPU_accum_3
				263	jnc sqrt_stage_3_positive
				264
				265	/* Subtraction gives a negative result,
				266	negate the result before division */
				267	notl FPU_accum_1
				268	notl FPU_accum_2
				269	notl FPU_accum_3
				270	addl $1,FPU_accum_1
				271	adcl $0,FPU_accum_2
				272
				273	#ifdef PARANOID
				274	adcl $0,FPU_accum_3 /* This must be zero */
				275	jz sqrt_stage_3_no_error
				276
				277	sqrt_stage_3_error:
				278	pushl EX_INTERNAL\|0x207
				279	call EXCEPTION
				280
				281	sqrt_stage_3_no_error:
				282	#endif /* PARANOID */
				283
				284	movl FPU_accum_2,%edx
				285	movl FPU_accum_1,%eax
				286	divl %esi
				287	movl %eax,%ecx
				288
				289	movl %edx,%eax
				290	divl %esi
				291
				292	sarl $1,%ecx /* divide by 2 */
				293	rcrl $1,%eax
				294
				295	/* prepare to round the result */
				296
				297	addl %ecx,%edi
				298	adcl $0,%esi
				299
				300	jmp sqrt_stage_3_finished
				301
				302	sqrt_stage_3_positive:
				303	movl FPU_accum_2,%edx
				304	movl FPU_accum_1,%eax
				305	divl %esi
				306	movl %eax,%ecx
				307
				308	movl %edx,%eax
				309	divl %esi
				310
				311	sarl $1,%ecx /* divide by 2 */
				312	rcrl $1,%eax
				313
				314	/* prepare to round the result */
				315
				316	notl %eax /* Negate the correction term */
				317	notl %ecx
				318	addl $1,%eax
				319	adcl $0,%ecx /* carry here ==> correction == 0 */
				320	adcl $0xffffffff,%esi
				321
				322	addl %ecx,%edi
				323	adcl $0,%esi
				324
				325	sqrt_stage_3_finished:
				326
				327	/*
				328	* The result in %esi:%edi:%esi should be good to about 90 bits here,
				329	* and the rounding information here does not have sufficient accuracy
				330	* in a few rare cases.
				331	*/
				332	cmpl $0xffffffe0,%eax
				333	ja sqrt_near_exact_x
				334
				335	cmpl $0x00000020,%eax
				336	jb sqrt_near_exact
				337
				338	cmpl $0x7fffffe0,%eax
				339	jb sqrt_round_result
				340
				341	cmpl $0x80000020,%eax
				342	jb sqrt_get_more_precision
				343
				344	sqrt_round_result:
				345	/* Set up for rounding operations */
				346	movl %eax,%edx
				347	movl %esi,%eax
				348	movl %edi,%ebx
				349	movl PARAM1,%edi
				350	movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */
				351	jmp fpu_reg_round
				352
				353
				354	sqrt_near_exact_x:
				355	/* First, the estimate must be rounded up. */
				356	addl $1,%edi
				357	adcl $0,%esi
				358
				359	sqrt_near_exact:
				360	/*
				361	* This is an easy case because x^1/2 is monotonic.
				362	* We need just find the square of our estimate, compare it
				363	* with the argument, and deduce whether our estimate is
				364	* above, below, or exact. We use the fact that the estimate
				365	* is known to be accurate to about 90 bits.
				366	*/
				367	movl %edi,%eax /* ls word of guess */
				368	mull %edi
				369	movl %edx,%ebx /* 2nd ls word of square */
				370	movl %eax,%ecx /* ls word of square */
				371
				372	movl %edi,%eax
				373	mull %esi
				374	addl %eax,%ebx
				375	addl %eax,%ebx
				376
				377	#ifdef PARANOID
				378	cmp $0xffffffb0,%ebx
				379	jb sqrt_near_exact_ok
				380
				381	cmp $0x00000050,%ebx
				382	ja sqrt_near_exact_ok
				383
				384	pushl EX_INTERNAL\|0x214
				385	call EXCEPTION
				386
				387	sqrt_near_exact_ok:
				388	#endif /* PARANOID */
				389
				390	or %ebx,%ebx
				391	js sqrt_near_exact_small
				392
				393	jnz sqrt_near_exact_large
				394
				395	or %ebx,%edx
				396	jnz sqrt_near_exact_large
				397
				398	/* Our estimate is exactly the right answer */
				399	xorl %eax,%eax
				400	jmp sqrt_round_result
				401
				402	sqrt_near_exact_small:
				403	/* Our estimate is too small */
				404	movl $0x000000ff,%eax
				405	jmp sqrt_round_result
				406
				407	sqrt_near_exact_large:
				408	/* Our estimate is too large, we need to decrement it */
				409	subl $1,%edi
				410	sbbl $0,%esi
				411	movl $0xffffff00,%eax
				412	jmp sqrt_round_result
				413
				414
				415	sqrt_get_more_precision:
				416	/* This case is almost the same as the above, except we start
				417	with an extra bit of precision in the estimate. */
				418	stc /* The extra bit. */
				419	rcll $1,%edi /* Shift the estimate left one bit */
				420	rcll $1,%esi
				421
				422	movl %edi,%eax /* ls word of guess */
				423	mull %edi
				424	movl %edx,%ebx /* 2nd ls word of square */
				425	movl %eax,%ecx /* ls word of square */
				426
				427	movl %edi,%eax
				428	mull %esi
				429	addl %eax,%ebx
				430	addl %eax,%ebx
				431
				432	/* Put our estimate back to its original value */
				433	stc /* The ms bit. */
				434	rcrl $1,%esi /* Shift the estimate left one bit */
				435	rcrl $1,%edi
				436
				437	#ifdef PARANOID
				438	cmp $0xffffff60,%ebx
				439	jb sqrt_more_prec_ok
				440
				441	cmp $0x000000a0,%ebx
				442	ja sqrt_more_prec_ok
				443
				444	pushl EX_INTERNAL\|0x215
				445	call EXCEPTION
				446
				447	sqrt_more_prec_ok:
				448	#endif /* PARANOID */
				449
				450	or %ebx,%ebx
				451	js sqrt_more_prec_small
				452
				453	jnz sqrt_more_prec_large
				454
				455	or %ebx,%ecx
				456	jnz sqrt_more_prec_large
				457
				458	/* Our estimate is exactly the right answer */
				459	movl $0x80000000,%eax
				460	jmp sqrt_round_result
				461
				462	sqrt_more_prec_small:
				463	/* Our estimate is too small */
				464	movl $0x800000ff,%eax
				465	jmp sqrt_round_result
				466
				467	sqrt_more_prec_large:
				468	/* Our estimate is too large */
				469	movl $0x7fffff00,%eax
				470	jmp sqrt_round_result