Blame - arch/sparc/lib/udiv.S - kernel/msm

blob: 169e01da671574a38d58b98a5f82317a9fee9a56 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/* $Id: udiv.S,v 1.4 1996/09/30 02:22:38 davem Exp $
				2	* udiv.S: This routine was taken from glibc-1.09 and is covered
				3	* by the GNU Library General Public License Version 2.
				4	*/
				5
				6
				7	/* This file is generated from divrem.m4; DO NOT EDIT! */
				8	/*
				9	* Division and remainder, from Appendix E of the Sparc Version 8
				10	* Architecture Manual, with fixes from Gordon Irlam.
				11	*/
				12
				13	/*
				14	* Input: dividend and divisor in %o0 and %o1 respectively.
				15	*
				16	* m4 parameters:
				17	* .udiv name of function to generate
				18	* div div=div => %o0 / %o1; div=rem => %o0 % %o1
				19	* false false=true => signed; false=false => unsigned
				20	*
				21	* Algorithm parameters:
				22	* N how many bits per iteration we try to get (4)
				23	* WORDSIZE total number of bits (32)
				24	*
				25	* Derived constants:
				26	* TOPBITS number of bits in the top decade of a number
				27	*
				28	* Important variables:
				29	* Q the partial quotient under development (initially 0)
				30	* R the remainder so far, initially the dividend
				31	* ITER number of main division loop iterations required;
				32	* equal to ceil(log2(quotient) / N). Note that this
				33	* is the log base (2^N) of the quotient.
				34	* V the current comparand, initially divisor2^(ITERN-1)
				35	*
				36	* Cost:
				37	* Current estimate for non-large dividend is
				38	* ceil(log2(quotient) / N) * (10 + 7N/2) + C
				39	* A large dividend is one greater than 2^(31-TOPBITS) and takes a
				40	* different path, as the upper bits of the quotient must be developed
				41	* one bit at a time.
				42	*/
				43
				44
				45	.globl .udiv
Al Viro	7caaeab	2005-09-11 20:14:07 -0700	[diff] [blame]	46	.globl _Udiv
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	47	.udiv:
Al Viro	7caaeab	2005-09-11 20:14:07 -0700	[diff] [blame]	48	_Udiv: /* needed for export */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	49
				50	! Ready to divide. Compute size of quotient; scale comparand.
				51	orcc %o1, %g0, %o5
				52	bne 1f
				53	mov %o0, %o3
				54
				55	! Divide by zero trap. If it returns, return 0 (about as
				56	! wrong as possible, but that is what SunOS does...).
				57	ta ST_DIV0
				58	retl
				59	clr %o0
				60
				61	1:
				62	cmp %o3, %o5 ! if %o1 exceeds %o0, done
				63	blu Lgot_result ! (and algorithm fails otherwise)
				64	clr %o2
				65
				66	sethi %hi(1 << (32 - 4 - 1)), %g1
				67
				68	cmp %o3, %g1
				69	blu Lnot_really_big
				70	clr %o4
				71
				72	! Here the dividend is >= 2**(31-N) or so. We must be careful here,
				73	! as our usual N-at-a-shot divide step will cause overflow and havoc.
				74	! The number of bits in the result here is N*ITER+SC, where SC <= N.
				75	! Compute ITER in an unorthodox manner: know we need to shift V into
				76	! the top decade: so do not even bother to compare to R.
				77	1:
				78	cmp %o5, %g1
				79	bgeu 3f
				80	mov 1, %g7
				81
				82	sll %o5, 4, %o5
				83
				84	b 1b
				85	add %o4, 1, %o4
				86
				87	! Now compute %g7.
				88	2:
				89	addcc %o5, %o5, %o5
				90	bcc Lnot_too_big
				91	add %g7, 1, %g7
				92
				93	! We get here if the %o1 overflowed while shifting.
				94	! This means that %o3 has the high-order bit set.
				95	! Restore %o5 and subtract from %o3.
				96	sll %g1, 4, %g1 ! high order bit
				97	srl %o5, 1, %o5 ! rest of %o5
				98	add %o5, %g1, %o5
				99
				100	b Ldo_single_div
				101	sub %g7, 1, %g7
				102
				103	Lnot_too_big:
				104	3:
				105	cmp %o5, %o3
				106	blu 2b
				107	nop
				108
				109	be Ldo_single_div
				110	nop
				111	/* NB: these are commented out in the V8-Sparc manual as well */
				112	/* (I do not understand this) */
				113	! %o5 > %o3: went too far: back up 1 step
				114	! srl %o5, 1, %o5
				115	! dec %g7
				116	! do single-bit divide steps
				117	!
				118	! We have to be careful here. We know that %o3 >= %o5, so we can do the
				119	! first divide step without thinking. BUT, the others are conditional,
				120	! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
				121	! order bit set in the first step, just falling into the regular
				122	! division loop will mess up the first time around.
				123	! So we unroll slightly...
				124	Ldo_single_div:
				125	subcc %g7, 1, %g7
				126	bl Lend_regular_divide
				127	nop
				128
				129	sub %o3, %o5, %o3
				130	mov 1, %o2
				131
				132	b Lend_single_divloop
				133	nop
				134	Lsingle_divloop:
				135	sll %o2, 1, %o2
				136	bl 1f
				137	srl %o5, 1, %o5
				138	! %o3 >= 0
				139	sub %o3, %o5, %o3
				140	b 2f
				141	add %o2, 1, %o2
				142	1: ! %o3 < 0
				143	add %o3, %o5, %o3
				144	sub %o2, 1, %o2
				145	2:
				146	Lend_single_divloop:
				147	subcc %g7, 1, %g7
				148	bge Lsingle_divloop
				149	tst %o3
				150
				151	b,a Lend_regular_divide
				152
				153	Lnot_really_big:
				154	1:
				155	sll %o5, 4, %o5
				156
				157	cmp %o5, %o3
				158	bleu 1b
				159	addcc %o4, 1, %o4
				160
				161	be Lgot_result
				162	sub %o4, 1, %o4
				163
				164	tst %o3 ! set up for initial iteration
				165	Ldivloop:
				166	sll %o2, 4, %o2
				167	! depth 1, accumulated bits 0
				168	bl L.1.16
				169	srl %o5,1,%o5
				170	! remainder is positive
				171	subcc %o3,%o5,%o3
				172	! depth 2, accumulated bits 1
				173	bl L.2.17
				174	srl %o5,1,%o5
				175	! remainder is positive
				176	subcc %o3,%o5,%o3
				177	! depth 3, accumulated bits 3
				178	bl L.3.19
				179	srl %o5,1,%o5
				180	! remainder is positive
				181	subcc %o3,%o5,%o3
				182	! depth 4, accumulated bits 7
				183	bl L.4.23
				184	srl %o5,1,%o5
				185	! remainder is positive
				186	subcc %o3,%o5,%o3
				187	b 9f
				188	add %o2, (7*2+1), %o2
				189
				190	L.4.23:
				191	! remainder is negative
				192	addcc %o3,%o5,%o3
				193	b 9f
				194	add %o2, (7*2-1), %o2
				195
				196	L.3.19:
				197	! remainder is negative
				198	addcc %o3,%o5,%o3
				199	! depth 4, accumulated bits 5
				200	bl L.4.21
				201	srl %o5,1,%o5
				202	! remainder is positive
				203	subcc %o3,%o5,%o3
				204	b 9f
				205	add %o2, (5*2+1), %o2
				206
				207	L.4.21:
				208	! remainder is negative
				209	addcc %o3,%o5,%o3
				210	b 9f
				211	add %o2, (5*2-1), %o2
				212
				213	L.2.17:
				214	! remainder is negative
				215	addcc %o3,%o5,%o3
				216	! depth 3, accumulated bits 1
				217	bl L.3.17
				218	srl %o5,1,%o5
				219	! remainder is positive
				220	subcc %o3,%o5,%o3
				221	! depth 4, accumulated bits 3
				222	bl L.4.19
				223	srl %o5,1,%o5
				224	! remainder is positive
				225	subcc %o3,%o5,%o3
				226	b 9f
				227	add %o2, (3*2+1), %o2
				228
				229	L.4.19:
				230	! remainder is negative
				231	addcc %o3,%o5,%o3
				232	b 9f
				233	add %o2, (3*2-1), %o2
				234
				235	L.3.17:
				236	! remainder is negative
				237	addcc %o3,%o5,%o3
				238	! depth 4, accumulated bits 1
				239	bl L.4.17
				240	srl %o5,1,%o5
				241	! remainder is positive
				242	subcc %o3,%o5,%o3
				243	b 9f
				244	add %o2, (1*2+1), %o2
				245
				246	L.4.17:
				247	! remainder is negative
				248	addcc %o3,%o5,%o3
				249	b 9f
				250	add %o2, (1*2-1), %o2
				251
				252	L.1.16:
				253	! remainder is negative
				254	addcc %o3,%o5,%o3
				255	! depth 2, accumulated bits -1
				256	bl L.2.15
				257	srl %o5,1,%o5
				258	! remainder is positive
				259	subcc %o3,%o5,%o3
				260	! depth 3, accumulated bits -1
				261	bl L.3.15
				262	srl %o5,1,%o5
				263	! remainder is positive
				264	subcc %o3,%o5,%o3
				265	! depth 4, accumulated bits -1
				266	bl L.4.15
				267	srl %o5,1,%o5
				268	! remainder is positive
				269	subcc %o3,%o5,%o3
				270	b 9f
				271	add %o2, (-1*2+1), %o2
				272
				273	L.4.15:
				274	! remainder is negative
				275	addcc %o3,%o5,%o3
				276	b 9f
				277	add %o2, (-1*2-1), %o2
				278
				279	L.3.15:
				280	! remainder is negative
				281	addcc %o3,%o5,%o3
				282	! depth 4, accumulated bits -3
				283	bl L.4.13
				284	srl %o5,1,%o5
				285	! remainder is positive
				286	subcc %o3,%o5,%o3
				287	b 9f
				288	add %o2, (-3*2+1), %o2
				289
				290	L.4.13:
				291	! remainder is negative
				292	addcc %o3,%o5,%o3
				293	b 9f
				294	add %o2, (-3*2-1), %o2
				295
				296	L.2.15:
				297	! remainder is negative
				298	addcc %o3,%o5,%o3
				299	! depth 3, accumulated bits -3
				300	bl L.3.13
				301	srl %o5,1,%o5
				302	! remainder is positive
				303	subcc %o3,%o5,%o3
				304	! depth 4, accumulated bits -5
				305	bl L.4.11
				306	srl %o5,1,%o5
				307	! remainder is positive
				308	subcc %o3,%o5,%o3
				309	b 9f
				310	add %o2, (-5*2+1), %o2
				311
				312	L.4.11:
				313	! remainder is negative
				314	addcc %o3,%o5,%o3
				315	b 9f
				316	add %o2, (-5*2-1), %o2
				317
				318	L.3.13:
				319	! remainder is negative
				320	addcc %o3,%o5,%o3
				321	! depth 4, accumulated bits -7
				322	bl L.4.9
				323	srl %o5,1,%o5
				324	! remainder is positive
				325	subcc %o3,%o5,%o3
				326	b 9f
				327	add %o2, (-7*2+1), %o2
				328
				329	L.4.9:
				330	! remainder is negative
				331	addcc %o3,%o5,%o3
				332	b 9f
				333	add %o2, (-7*2-1), %o2
				334
				335	9:
				336	Lend_regular_divide:
				337	subcc %o4, 1, %o4
				338	bge Ldivloop
				339	tst %o3
				340
				341	bl,a Lgot_result
				342	! non-restoring fixup here (one instruction only!)
				343	sub %o2, 1, %o2
				344
				345	Lgot_result:
				346
				347	retl
				348	mov %o2, %o0
				349
				350	.globl .udiv_patch
				351	.udiv_patch:
				352	wr %g0, 0x0, %y
				353	nop
				354	nop
				355	retl
				356	udiv %o0, %o1, %o0
				357	nop