Blame - arch/sparc/lib/urem.S - kernel/msm

blob: 6b92bdc8b04cc12c5ab994aa962aa6e38938e41d [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/* $Id: urem.S,v 1.4 1996/09/30 02:22:42 davem Exp $
				2	* urem.S: This routine was taken from glibc-1.09 and is covered
				3	* by the GNU Library General Public License Version 2.
				4	*/
				5
				6	/* This file is generated from divrem.m4; DO NOT EDIT! */
				7	/*
				8	* Division and remainder, from Appendix E of the Sparc Version 8
				9	* Architecture Manual, with fixes from Gordon Irlam.
				10	*/
				11
				12	/*
				13	* Input: dividend and divisor in %o0 and %o1 respectively.
				14	*
				15	* m4 parameters:
				16	* .urem name of function to generate
				17	* rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1
				18	* false false=true => signed; false=false => unsigned
				19	*
				20	* Algorithm parameters:
				21	* N how many bits per iteration we try to get (4)
				22	* WORDSIZE total number of bits (32)
				23	*
				24	* Derived constants:
				25	* TOPBITS number of bits in the top decade of a number
				26	*
				27	* Important variables:
				28	* Q the partial quotient under development (initially 0)
				29	* R the remainder so far, initially the dividend
				30	* ITER number of main division loop iterations required;
				31	* equal to ceil(log2(quotient) / N). Note that this
				32	* is the log base (2^N) of the quotient.
				33	* V the current comparand, initially divisor2^(ITERN-1)
				34	*
				35	* Cost:
				36	* Current estimate for non-large dividend is
				37	* ceil(log2(quotient) / N) * (10 + 7N/2) + C
				38	* A large dividend is one greater than 2^(31-TOPBITS) and takes a
				39	* different path, as the upper bits of the quotient must be developed
				40	* one bit at a time.
				41	*/
				42
				43	.globl .urem
Al Viro	7caaeab	2005-09-11 20:14:07 -0700	[diff] [blame]	44	.globl _Urem
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	45	.urem:
Al Viro	7caaeab	2005-09-11 20:14:07 -0700	[diff] [blame]	46	_Urem: /* needed for export */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	47
				48	! Ready to divide. Compute size of quotient; scale comparand.
				49	orcc %o1, %g0, %o5
				50	bne 1f
				51	mov %o0, %o3
				52
				53	! Divide by zero trap. If it returns, return 0 (about as
				54	! wrong as possible, but that is what SunOS does...).
				55	ta ST_DIV0
				56	retl
				57	clr %o0
				58
				59	1:
				60	cmp %o3, %o5 ! if %o1 exceeds %o0, done
				61	blu Lgot_result ! (and algorithm fails otherwise)
				62	clr %o2
				63
				64	sethi %hi(1 << (32 - 4 - 1)), %g1
				65
				66	cmp %o3, %g1
				67	blu Lnot_really_big
				68	clr %o4
				69
				70	! Here the dividend is >= 2**(31-N) or so. We must be careful here,
				71	! as our usual N-at-a-shot divide step will cause overflow and havoc.
				72	! The number of bits in the result here is N*ITER+SC, where SC <= N.
				73	! Compute ITER in an unorthodox manner: know we need to shift V into
				74	! the top decade: so do not even bother to compare to R.
				75	1:
				76	cmp %o5, %g1
				77	bgeu 3f
				78	mov 1, %g7
				79
				80	sll %o5, 4, %o5
				81
				82	b 1b
				83	add %o4, 1, %o4
				84
				85	! Now compute %g7.
				86	2:
				87	addcc %o5, %o5, %o5
				88	bcc Lnot_too_big
				89	add %g7, 1, %g7
				90
				91	! We get here if the %o1 overflowed while shifting.
				92	! This means that %o3 has the high-order bit set.
				93	! Restore %o5 and subtract from %o3.
				94	sll %g1, 4, %g1 ! high order bit
				95	srl %o5, 1, %o5 ! rest of %o5
				96	add %o5, %g1, %o5
				97
				98	b Ldo_single_div
				99	sub %g7, 1, %g7
				100
				101	Lnot_too_big:
				102	3:
				103	cmp %o5, %o3
				104	blu 2b
				105	nop
				106
				107	be Ldo_single_div
				108	nop
				109	/* NB: these are commented out in the V8-Sparc manual as well */
				110	/* (I do not understand this) */
				111	! %o5 > %o3: went too far: back up 1 step
				112	! srl %o5, 1, %o5
				113	! dec %g7
				114	! do single-bit divide steps
				115	!
				116	! We have to be careful here. We know that %o3 >= %o5, so we can do the
				117	! first divide step without thinking. BUT, the others are conditional,
				118	! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
				119	! order bit set in the first step, just falling into the regular
				120	! division loop will mess up the first time around.
				121	! So we unroll slightly...
				122	Ldo_single_div:
				123	subcc %g7, 1, %g7
				124	bl Lend_regular_divide
				125	nop
				126
				127	sub %o3, %o5, %o3
				128	mov 1, %o2
				129
				130	b Lend_single_divloop
				131	nop
				132	Lsingle_divloop:
				133	sll %o2, 1, %o2
				134	bl 1f
				135	srl %o5, 1, %o5
				136	! %o3 >= 0
				137	sub %o3, %o5, %o3
				138	b 2f
				139	add %o2, 1, %o2
				140	1: ! %o3 < 0
				141	add %o3, %o5, %o3
				142	sub %o2, 1, %o2
				143	2:
				144	Lend_single_divloop:
				145	subcc %g7, 1, %g7
				146	bge Lsingle_divloop
				147	tst %o3
				148
				149	b,a Lend_regular_divide
				150
				151	Lnot_really_big:
				152	1:
				153	sll %o5, 4, %o5
				154
				155	cmp %o5, %o3
				156	bleu 1b
				157	addcc %o4, 1, %o4
				158
				159	be Lgot_result
				160	sub %o4, 1, %o4
				161
				162	tst %o3 ! set up for initial iteration
				163	Ldivloop:
				164	sll %o2, 4, %o2
				165	! depth 1, accumulated bits 0
				166	bl L.1.16
				167	srl %o5,1,%o5
				168	! remainder is positive
				169	subcc %o3,%o5,%o3
				170	! depth 2, accumulated bits 1
				171	bl L.2.17
				172	srl %o5,1,%o5
				173	! remainder is positive
				174	subcc %o3,%o5,%o3
				175	! depth 3, accumulated bits 3
				176	bl L.3.19
				177	srl %o5,1,%o5
				178	! remainder is positive
				179	subcc %o3,%o5,%o3
				180	! depth 4, accumulated bits 7
				181	bl L.4.23
				182	srl %o5,1,%o5
				183	! remainder is positive
				184	subcc %o3,%o5,%o3
				185	b 9f
				186	add %o2, (7*2+1), %o2
				187
				188	L.4.23:
				189	! remainder is negative
				190	addcc %o3,%o5,%o3
				191	b 9f
				192	add %o2, (7*2-1), %o2
				193
				194	L.3.19:
				195	! remainder is negative
				196	addcc %o3,%o5,%o3
				197	! depth 4, accumulated bits 5
				198	bl L.4.21
				199	srl %o5,1,%o5
				200	! remainder is positive
				201	subcc %o3,%o5,%o3
				202	b 9f
				203	add %o2, (5*2+1), %o2
				204
				205	L.4.21:
				206	! remainder is negative
				207	addcc %o3,%o5,%o3
				208	b 9f
				209	add %o2, (5*2-1), %o2
				210
				211	L.2.17:
				212	! remainder is negative
				213	addcc %o3,%o5,%o3
				214	! depth 3, accumulated bits 1
				215	bl L.3.17
				216	srl %o5,1,%o5
				217	! remainder is positive
				218	subcc %o3,%o5,%o3
				219	! depth 4, accumulated bits 3
				220	bl L.4.19
				221	srl %o5,1,%o5
				222	! remainder is positive
				223	subcc %o3,%o5,%o3
				224	b 9f
				225	add %o2, (3*2+1), %o2
				226
				227	L.4.19:
				228	! remainder is negative
				229	addcc %o3,%o5,%o3
				230	b 9f
				231	add %o2, (3*2-1), %o2
				232
				233	L.3.17:
				234	! remainder is negative
				235	addcc %o3,%o5,%o3
				236	! depth 4, accumulated bits 1
				237	bl L.4.17
				238	srl %o5,1,%o5
				239	! remainder is positive
				240	subcc %o3,%o5,%o3
				241	b 9f
				242	add %o2, (1*2+1), %o2
				243
				244	L.4.17:
				245	! remainder is negative
				246	addcc %o3,%o5,%o3
				247	b 9f
				248	add %o2, (1*2-1), %o2
				249
				250	L.1.16:
				251	! remainder is negative
				252	addcc %o3,%o5,%o3
				253	! depth 2, accumulated bits -1
				254	bl L.2.15
				255	srl %o5,1,%o5
				256	! remainder is positive
				257	subcc %o3,%o5,%o3
				258	! depth 3, accumulated bits -1
				259	bl L.3.15
				260	srl %o5,1,%o5
				261	! remainder is positive
				262	subcc %o3,%o5,%o3
				263	! depth 4, accumulated bits -1
				264	bl L.4.15
				265	srl %o5,1,%o5
				266	! remainder is positive
				267	subcc %o3,%o5,%o3
				268	b 9f
				269	add %o2, (-1*2+1), %o2
				270
				271	L.4.15:
				272	! remainder is negative
				273	addcc %o3,%o5,%o3
				274	b 9f
				275	add %o2, (-1*2-1), %o2
				276
				277	L.3.15:
				278	! remainder is negative
				279	addcc %o3,%o5,%o3
				280	! depth 4, accumulated bits -3
				281	bl L.4.13
				282	srl %o5,1,%o5
				283	! remainder is positive
				284	subcc %o3,%o5,%o3
				285	b 9f
				286	add %o2, (-3*2+1), %o2
				287
				288	L.4.13:
				289	! remainder is negative
				290	addcc %o3,%o5,%o3
				291	b 9f
				292	add %o2, (-3*2-1), %o2
				293
				294	L.2.15:
				295	! remainder is negative
				296	addcc %o3,%o5,%o3
				297	! depth 3, accumulated bits -3
				298	bl L.3.13
				299	srl %o5,1,%o5
				300	! remainder is positive
				301	subcc %o3,%o5,%o3
				302	! depth 4, accumulated bits -5
				303	bl L.4.11
				304	srl %o5,1,%o5
				305	! remainder is positive
				306	subcc %o3,%o5,%o3
				307	b 9f
				308	add %o2, (-5*2+1), %o2
				309
				310	L.4.11:
				311	! remainder is negative
				312	addcc %o3,%o5,%o3
				313	b 9f
				314	add %o2, (-5*2-1), %o2
				315
				316	L.3.13:
				317	! remainder is negative
				318	addcc %o3,%o5,%o3
				319	! depth 4, accumulated bits -7
				320	bl L.4.9
				321	srl %o5,1,%o5
				322	! remainder is positive
				323	subcc %o3,%o5,%o3
				324	b 9f
				325	add %o2, (-7*2+1), %o2
				326
				327	L.4.9:
				328	! remainder is negative
				329	addcc %o3,%o5,%o3
				330	b 9f
				331	add %o2, (-7*2-1), %o2
				332
				333	9:
				334	Lend_regular_divide:
				335	subcc %o4, 1, %o4
				336	bge Ldivloop
				337	tst %o3
				338
				339	bl,a Lgot_result
				340	! non-restoring fixup here (one instruction only!)
				341	add %o3, %o1, %o3
				342
				343	Lgot_result:
				344
				345	retl
				346	mov %o3, %o0
				347
				348	.globl .urem_patch
				349	.urem_patch:
				350	wr %g0, 0x0, %y
				351	nop
				352	nop
				353	nop
				354	udiv %o0, %o1, %o2
				355	umul %o2, %o1, %o2
				356	retl
				357	sub %o0, %o2, %o0