Blame - arch/x86/math-emu/div_Xsig.S - kernel/msm

blob: f77ba3058b31748f898dfbadc7ad8838dbceb8d0 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	.file "div_Xsig.S"
				2	/*---------------------------------------------------------------------------+
				3	\| div_Xsig.S \|
				4	\| \|
				5	\| Division subroutine for 96 bit quantities \|
				6	\| \|
				7	\| Copyright (C) 1994,1995 \|
				8	\| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, \|
				9	\| Australia. E-mail billm@jacobi.maths.monash.edu.au \|
				10	\| \|
				11	\| \|
				12	+---------------------------------------------------------------------------*/
				13
				14	/*---------------------------------------------------------------------------+
				15	\| Divide the 96 bit quantity pointed to by a, by that pointed to by b, and \|
				16	\| put the 96 bit result at the location d. \|
				17	\| \|
				18	\| The result may not be accurate to 96 bits. It is intended for use where \|
				19	\| a result better than 64 bits is required. The result should usually be \|
				20	\| good to at least 94 bits. \|
				21	\| The returned result is actually divided by one half. This is done to \|
				22	\| prevent overflow. \|
				23	\| \|
				24	\| .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb -> .dddddddddddd \|
				25	\| \|
				26	\| void div_Xsig(Xsig a, Xsig b, Xsig *dest) \|
				27	\| \|
				28	+---------------------------------------------------------------------------*/
				29
				30	#include "exception.h"
				31	#include "fpu_emu.h"
				32
				33
				34	#define XsigLL(x) (x)
				35	#define XsigL(x) 4(x)
				36	#define XsigH(x) 8(x)
				37
				38
				39	#ifndef NON_REENTRANT_FPU
				40	/*
				41	Local storage on the stack:
				42	Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
				43	*/
				44	#define FPU_accum_3 -4(%ebp)
				45	#define FPU_accum_2 -8(%ebp)
				46	#define FPU_accum_1 -12(%ebp)
				47	#define FPU_accum_0 -16(%ebp)
				48	#define FPU_result_3 -20(%ebp)
				49	#define FPU_result_2 -24(%ebp)
				50	#define FPU_result_1 -28(%ebp)
				51
				52	#else
				53	.data
				54	/*
				55	Local storage in a static area:
				56	Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
				57	*/
				58	.align 4,0
				59	FPU_accum_3:
				60	.long 0
				61	FPU_accum_2:
				62	.long 0
				63	FPU_accum_1:
				64	.long 0
				65	FPU_accum_0:
				66	.long 0
				67	FPU_result_3:
				68	.long 0
				69	FPU_result_2:
				70	.long 0
				71	FPU_result_1:
				72	.long 0
				73	#endif /* NON_REENTRANT_FPU */
				74
				75
				76	.text
				77	ENTRY(div_Xsig)
				78	pushl %ebp
				79	movl %esp,%ebp
				80	#ifndef NON_REENTRANT_FPU
				81	subl $28,%esp
				82	#endif /* NON_REENTRANT_FPU */
				83
				84	pushl %esi
				85	pushl %edi
				86	pushl %ebx
				87
				88	movl PARAM1,%esi /* pointer to num */
				89	movl PARAM2,%ebx /* pointer to denom */
				90
				91	#ifdef PARANOID
				92	testl $0x80000000, XsigH(%ebx) /* Divisor */
				93	je L_bugged
				94	#endif /* PARANOID */
				95
				96
				97	/*---------------------------------------------------------------------------+
				98	\| Divide: Return arg1/arg2 to arg3. \|
				99	\| \|
				100	\| The maximum returned value is (ignoring exponents) \|
				101	\| .ffffffff ffffffff \|
				102	\| ------------------ = 1.ffffffff fffffffe \|
				103	\| .80000000 00000000 \|
				104	\| and the minimum is \|
				105	\| .80000000 00000000 \|
				106	\| ------------------ = .80000000 00000001 (rounded) \|
				107	\| .ffffffff ffffffff \|
				108	\| \|
				109	+---------------------------------------------------------------------------*/
				110
				111	/* Save extended dividend in local register */
				112
				113	/* Divide by 2 to prevent overflow */
				114	clc
				115	movl XsigH(%esi),%eax
				116	rcrl %eax
				117	movl %eax,FPU_accum_3
				118	movl XsigL(%esi),%eax
				119	rcrl %eax
				120	movl %eax,FPU_accum_2
				121	movl XsigLL(%esi),%eax
				122	rcrl %eax
				123	movl %eax,FPU_accum_1
				124	movl $0,%eax
				125	rcrl %eax
				126	movl %eax,FPU_accum_0
				127
				128	movl FPU_accum_2,%eax /* Get the current num */
				129	movl FPU_accum_3,%edx
				130
				131	/----------------------------------------------------------------------/
				132	/* Initialization done.
				133	Do the first 32 bits. */
				134
				135	/* We will divide by a number which is too large */
				136	movl XsigH(%ebx),%ecx
				137	addl $1,%ecx
				138	jnc LFirst_div_not_1
				139
				140	/* here we need to divide by 100000000h,
				141	i.e., no division at all.. */
				142	mov %edx,%eax
				143	jmp LFirst_div_done
				144
				145	LFirst_div_not_1:
				146	divl %ecx /* Divide the numerator by the augmented
				147	denom ms dw */
				148
				149	LFirst_div_done:
				150	movl %eax,FPU_result_3 /* Put the result in the answer */
				151
				152	mull XsigH(%ebx) /* mul by the ms dw of the denom */
				153
				154	subl %eax,FPU_accum_2 /* Subtract from the num local reg */
				155	sbbl %edx,FPU_accum_3
				156
				157	movl FPU_result_3,%eax /* Get the result back */
				158	mull XsigL(%ebx) /* now mul the ls dw of the denom */
				159
				160	subl %eax,FPU_accum_1 /* Subtract from the num local reg */
				161	sbbl %edx,FPU_accum_2
				162	sbbl $0,FPU_accum_3
				163	je LDo_2nd_32_bits /* Must check for non-zero result here */
				164
				165	#ifdef PARANOID
				166	jb L_bugged_1
				167	#endif /* PARANOID */
				168
				169	/* need to subtract another once of the denom */
				170	incl FPU_result_3 /* Correct the answer */
				171
				172	movl XsigL(%ebx),%eax
				173	movl XsigH(%ebx),%edx
				174	subl %eax,FPU_accum_1 /* Subtract from the num local reg */
				175	sbbl %edx,FPU_accum_2
				176
				177	#ifdef PARANOID
				178	sbbl $0,FPU_accum_3
				179	jne L_bugged_1 /* Must check for non-zero result here */
				180	#endif /* PARANOID */
				181
				182	/----------------------------------------------------------------------/
				183	/* Half of the main problem is done, there is just a reduced numerator
				184	to handle now.
				185	Work with the second 32 bits, FPU_accum_0 not used from now on */
				186	LDo_2nd_32_bits:
				187	movl FPU_accum_2,%edx /* get the reduced num */
				188	movl FPU_accum_1,%eax
				189
				190	/* need to check for possible subsequent overflow */
				191	cmpl XsigH(%ebx),%edx
				192	jb LDo_2nd_div
				193	ja LPrevent_2nd_overflow
				194
				195	cmpl XsigL(%ebx),%eax
				196	jb LDo_2nd_div
				197
				198	LPrevent_2nd_overflow:
				199	/* The numerator is greater or equal, would cause overflow */
				200	/* prevent overflow */
				201	subl XsigL(%ebx),%eax
				202	sbbl XsigH(%ebx),%edx
				203	movl %edx,FPU_accum_2
				204	movl %eax,FPU_accum_1
				205
				206	incl FPU_result_3 /* Reflect the subtraction in the answer */
				207
				208	#ifdef PARANOID
				209	je L_bugged_2 /* Can't bump the result to 1.0 */
				210	#endif /* PARANOID */
				211
				212	LDo_2nd_div:
				213	cmpl $0,%ecx /* augmented denom msw */
				214	jnz LSecond_div_not_1
				215
				216	/* %ecx == 0, we are dividing by 1.0 */
				217	mov %edx,%eax
				218	jmp LSecond_div_done
				219
				220	LSecond_div_not_1:
				221	divl %ecx /* Divide the numerator by the denom ms dw */
				222
				223	LSecond_div_done:
				224	movl %eax,FPU_result_2 /* Put the result in the answer */
				225
				226	mull XsigH(%ebx) /* mul by the ms dw of the denom */
				227
				228	subl %eax,FPU_accum_1 /* Subtract from the num local reg */
				229	sbbl %edx,FPU_accum_2
				230
				231	#ifdef PARANOID
				232	jc L_bugged_2
				233	#endif /* PARANOID */
				234
				235	movl FPU_result_2,%eax /* Get the result back */
				236	mull XsigL(%ebx) /* now mul the ls dw of the denom */
				237
				238	subl %eax,FPU_accum_0 /* Subtract from the num local reg */
				239	sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */
				240	sbbl $0,FPU_accum_2
				241
				242	#ifdef PARANOID
				243	jc L_bugged_2
				244	#endif /* PARANOID */
				245
				246	jz LDo_3rd_32_bits
				247
				248	#ifdef PARANOID
				249	cmpl $1,FPU_accum_2
				250	jne L_bugged_2
				251	#endif /* PARANOID */
				252
				253	/* need to subtract another once of the denom */
				254	movl XsigL(%ebx),%eax
				255	movl XsigH(%ebx),%edx
				256	subl %eax,FPU_accum_0 /* Subtract from the num local reg */
				257	sbbl %edx,FPU_accum_1
				258	sbbl $0,FPU_accum_2
				259
				260	#ifdef PARANOID
				261	jc L_bugged_2
				262	jne L_bugged_2
				263	#endif /* PARANOID */
				264
				265	addl $1,FPU_result_2 /* Correct the answer */
				266	adcl $0,FPU_result_3
				267
				268	#ifdef PARANOID
				269	jc L_bugged_2 /* Must check for non-zero result here */
				270	#endif /* PARANOID */
				271
				272	/----------------------------------------------------------------------/
				273	/* The division is essentially finished here, we just need to perform
				274	tidying operations.
				275	Deal with the 3rd 32 bits */
				276	LDo_3rd_32_bits:
				277	/* We use an approximation for the third 32 bits.
				278	To take account of the 3rd 32 bits of the divisor
				279	(call them del), we subtract del * (a/b) */
				280
				281	movl FPU_result_3,%eax /* a/b */
				282	mull XsigLL(%ebx) /* del */
				283
				284	subl %edx,FPU_accum_1
				285
				286	/* A borrow indicates that the result is negative */
				287	jnb LTest_over
				288
				289	movl XsigH(%ebx),%edx
				290	addl %edx,FPU_accum_1
				291
				292	subl $1,FPU_result_2 /* Adjust the answer */
				293	sbbl $0,FPU_result_3
				294
				295	/* The above addition might not have been enough, check again. */
				296	movl FPU_accum_1,%edx /* get the reduced num */
				297	cmpl XsigH(%ebx),%edx /* denom */
				298	jb LDo_3rd_div
				299
				300	movl XsigH(%ebx),%edx
				301	addl %edx,FPU_accum_1
				302
				303	subl $1,FPU_result_2 /* Adjust the answer */
				304	sbbl $0,FPU_result_3
				305	jmp LDo_3rd_div
				306
				307	LTest_over:
				308	movl FPU_accum_1,%edx /* get the reduced num */
				309
				310	/* need to check for possible subsequent overflow */
				311	cmpl XsigH(%ebx),%edx /* denom */
				312	jb LDo_3rd_div
				313
				314	/* prevent overflow */
				315	subl XsigH(%ebx),%edx
				316	movl %edx,FPU_accum_1
				317
				318	addl $1,FPU_result_2 /* Reflect the subtraction in the answer */
				319	adcl $0,FPU_result_3
				320
				321	LDo_3rd_div:
				322	movl FPU_accum_0,%eax
				323	movl FPU_accum_1,%edx
				324	divl XsigH(%ebx)
				325
				326	movl %eax,FPU_result_1 /* Rough estimate of third word */
				327
				328	movl PARAM3,%esi /* pointer to answer */
				329
				330	movl FPU_result_1,%eax
				331	movl %eax,XsigLL(%esi)
				332	movl FPU_result_2,%eax
				333	movl %eax,XsigL(%esi)
				334	movl FPU_result_3,%eax
				335	movl %eax,XsigH(%esi)
				336
				337	L_exit:
				338	popl %ebx
				339	popl %edi
				340	popl %esi
				341
				342	leave
				343	ret
				344
				345
				346	#ifdef PARANOID
				347	/* The logic is wrong if we got here */
				348	L_bugged:
				349	pushl EX_INTERNAL\|0x240
				350	call EXCEPTION
				351	pop %ebx
				352	jmp L_exit
				353
				354	L_bugged_1:
				355	pushl EX_INTERNAL\|0x241
				356	call EXCEPTION
				357	pop %ebx
				358	jmp L_exit
				359
				360	L_bugged_2:
				361	pushl EX_INTERNAL\|0x242
				362	call EXCEPTION
				363	pop %ebx
				364	jmp L_exit
				365	#endif /* PARANOID */