Blame - arch/sparc/lib/rem.S - kernel/msm-4.19

blob: 44508148d055718d3f8c6f56b693ae1c1d10ffd7 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame^]	1	/* $Id: rem.S,v 1.7 1996/09/30 02:22:34 davem Exp $
				2	* rem.S: This routine was taken from glibc-1.09 and is covered
				3	* by the GNU Library General Public License Version 2.
				4	*/
				5
				6
				7	/* This file is generated from divrem.m4; DO NOT EDIT! */
				8	/*
				9	* Division and remainder, from Appendix E of the Sparc Version 8
				10	* Architecture Manual, with fixes from Gordon Irlam.
				11	*/
				12
				13	/*
				14	* Input: dividend and divisor in %o0 and %o1 respectively.
				15	*
				16	* m4 parameters:
				17	* .rem name of function to generate
				18	* rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1
				19	* true true=true => signed; true=false => unsigned
				20	*
				21	* Algorithm parameters:
				22	* N how many bits per iteration we try to get (4)
				23	* WORDSIZE total number of bits (32)
				24	*
				25	* Derived constants:
				26	* TOPBITS number of bits in the top decade of a number
				27	*
				28	* Important variables:
				29	* Q the partial quotient under development (initially 0)
				30	* R the remainder so far, initially the dividend
				31	* ITER number of main division loop iterations required;
				32	* equal to ceil(log2(quotient) / N). Note that this
				33	* is the log base (2^N) of the quotient.
				34	* V the current comparand, initially divisor2^(ITERN-1)
				35	*
				36	* Cost:
				37	* Current estimate for non-large dividend is
				38	* ceil(log2(quotient) / N) * (10 + 7N/2) + C
				39	* A large dividend is one greater than 2^(31-TOPBITS) and takes a
				40	* different path, as the upper bits of the quotient must be developed
				41	* one bit at a time.
				42	*/
				43
				44
				45	.globl .rem
				46	.rem:
				47	! compute sign of result; if neither is negative, no problem
				48	orcc %o1, %o0, %g0 ! either negative?
				49	bge 2f ! no, go do the divide
				50	mov %o0, %g2 ! compute sign in any case
				51
				52	tst %o1
				53	bge 1f
				54	tst %o0
				55	! %o1 is definitely negative; %o0 might also be negative
				56	bge 2f ! if %o0 not negative...
				57	sub %g0, %o1, %o1 ! in any case, make %o1 nonneg
				58	1: ! %o0 is negative, %o1 is nonnegative
				59	sub %g0, %o0, %o0 ! make %o0 nonnegative
				60	2:
				61
				62	! Ready to divide. Compute size of quotient; scale comparand.
				63	orcc %o1, %g0, %o5
				64	bne 1f
				65	mov %o0, %o3
				66
				67	! Divide by zero trap. If it returns, return 0 (about as
				68	! wrong as possible, but that is what SunOS does...).
				69	ta ST_DIV0
				70	retl
				71	clr %o0
				72
				73	1:
				74	cmp %o3, %o5 ! if %o1 exceeds %o0, done
				75	blu Lgot_result ! (and algorithm fails otherwise)
				76	clr %o2
				77
				78	sethi %hi(1 << (32 - 4 - 1)), %g1
				79
				80	cmp %o3, %g1
				81	blu Lnot_really_big
				82	clr %o4
				83
				84	! Here the dividend is >= 2**(31-N) or so. We must be careful here,
				85	! as our usual N-at-a-shot divide step will cause overflow and havoc.
				86	! The number of bits in the result here is N*ITER+SC, where SC <= N.
				87	! Compute ITER in an unorthodox manner: know we need to shift V into
				88	! the top decade: so do not even bother to compare to R.
				89	1:
				90	cmp %o5, %g1
				91	bgeu 3f
				92	mov 1, %g7
				93
				94	sll %o5, 4, %o5
				95
				96	b 1b
				97	add %o4, 1, %o4
				98
				99	! Now compute %g7.
				100	2:
				101	addcc %o5, %o5, %o5
				102
				103	bcc Lnot_too_big
				104	add %g7, 1, %g7
				105
				106	! We get here if the %o1 overflowed while shifting.
				107	! This means that %o3 has the high-order bit set.
				108	! Restore %o5 and subtract from %o3.
				109	sll %g1, 4, %g1 ! high order bit
				110	srl %o5, 1, %o5 ! rest of %o5
				111	add %o5, %g1, %o5
				112
				113	b Ldo_single_div
				114	sub %g7, 1, %g7
				115
				116	Lnot_too_big:
				117	3:
				118	cmp %o5, %o3
				119	blu 2b
				120	nop
				121
				122	be Ldo_single_div
				123	nop
				124	/* NB: these are commented out in the V8-Sparc manual as well */
				125	/* (I do not understand this) */
				126	! %o5 > %o3: went too far: back up 1 step
				127	! srl %o5, 1, %o5
				128	! dec %g7
				129	! do single-bit divide steps
				130	!
				131	! We have to be careful here. We know that %o3 >= %o5, so we can do the
				132	! first divide step without thinking. BUT, the others are conditional,
				133	! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
				134	! order bit set in the first step, just falling into the regular
				135	! division loop will mess up the first time around.
				136	! So we unroll slightly...
				137	Ldo_single_div:
				138	subcc %g7, 1, %g7
				139	bl Lend_regular_divide
				140	nop
				141
				142	sub %o3, %o5, %o3
				143	mov 1, %o2
				144
				145	b Lend_single_divloop
				146	nop
				147	Lsingle_divloop:
				148	sll %o2, 1, %o2
				149
				150	bl 1f
				151	srl %o5, 1, %o5
				152	! %o3 >= 0
				153	sub %o3, %o5, %o3
				154
				155	b 2f
				156	add %o2, 1, %o2
				157	1: ! %o3 < 0
				158	add %o3, %o5, %o3
				159	sub %o2, 1, %o2
				160	2:
				161	Lend_single_divloop:
				162	subcc %g7, 1, %g7
				163	bge Lsingle_divloop
				164	tst %o3
				165
				166	b,a Lend_regular_divide
				167
				168	Lnot_really_big:
				169	1:
				170	sll %o5, 4, %o5
				171	cmp %o5, %o3
				172	bleu 1b
				173	addcc %o4, 1, %o4
				174	be Lgot_result
				175	sub %o4, 1, %o4
				176
				177	tst %o3 ! set up for initial iteration
				178	Ldivloop:
				179	sll %o2, 4, %o2
				180	! depth 1, accumulated bits 0
				181	bl L.1.16
				182	srl %o5,1,%o5
				183	! remainder is positive
				184	subcc %o3,%o5,%o3
				185	! depth 2, accumulated bits 1
				186	bl L.2.17
				187	srl %o5,1,%o5
				188	! remainder is positive
				189	subcc %o3,%o5,%o3
				190	! depth 3, accumulated bits 3
				191	bl L.3.19
				192	srl %o5,1,%o5
				193	! remainder is positive
				194	subcc %o3,%o5,%o3
				195	! depth 4, accumulated bits 7
				196	bl L.4.23
				197	srl %o5,1,%o5
				198	! remainder is positive
				199	subcc %o3,%o5,%o3
				200
				201	b 9f
				202	add %o2, (7*2+1), %o2
				203
				204	L.4.23:
				205	! remainder is negative
				206	addcc %o3,%o5,%o3
				207	b 9f
				208	add %o2, (7*2-1), %o2
				209
				210	L.3.19:
				211	! remainder is negative
				212	addcc %o3,%o5,%o3
				213	! depth 4, accumulated bits 5
				214	bl L.4.21
				215	srl %o5,1,%o5
				216	! remainder is positive
				217	subcc %o3,%o5,%o3
				218	b 9f
				219	add %o2, (5*2+1), %o2
				220
				221	L.4.21:
				222	! remainder is negative
				223	addcc %o3,%o5,%o3
				224	b 9f
				225	add %o2, (5*2-1), %o2
				226
				227	L.2.17:
				228	! remainder is negative
				229	addcc %o3,%o5,%o3
				230	! depth 3, accumulated bits 1
				231	bl L.3.17
				232	srl %o5,1,%o5
				233	! remainder is positive
				234	subcc %o3,%o5,%o3
				235	! depth 4, accumulated bits 3
				236	bl L.4.19
				237	srl %o5,1,%o5
				238	! remainder is positive
				239	subcc %o3,%o5,%o3
				240	b 9f
				241	add %o2, (3*2+1), %o2
				242
				243	L.4.19:
				244	! remainder is negative
				245	addcc %o3,%o5,%o3
				246	b 9f
				247	add %o2, (3*2-1), %o2
				248
				249	L.3.17:
				250	! remainder is negative
				251	addcc %o3,%o5,%o3
				252	! depth 4, accumulated bits 1
				253	bl L.4.17
				254	srl %o5,1,%o5
				255	! remainder is positive
				256	subcc %o3,%o5,%o3
				257	b 9f
				258	add %o2, (1*2+1), %o2
				259
				260	L.4.17:
				261	! remainder is negative
				262	addcc %o3,%o5,%o3
				263	b 9f
				264	add %o2, (1*2-1), %o2
				265
				266	L.1.16:
				267	! remainder is negative
				268	addcc %o3,%o5,%o3
				269	! depth 2, accumulated bits -1
				270	bl L.2.15
				271	srl %o5,1,%o5
				272	! remainder is positive
				273	subcc %o3,%o5,%o3
				274	! depth 3, accumulated bits -1
				275	bl L.3.15
				276	srl %o5,1,%o5
				277	! remainder is positive
				278	subcc %o3,%o5,%o3
				279	! depth 4, accumulated bits -1
				280	bl L.4.15
				281	srl %o5,1,%o5
				282	! remainder is positive
				283	subcc %o3,%o5,%o3
				284	b 9f
				285	add %o2, (-1*2+1), %o2
				286
				287	L.4.15:
				288	! remainder is negative
				289	addcc %o3,%o5,%o3
				290	b 9f
				291	add %o2, (-1*2-1), %o2
				292
				293	L.3.15:
				294	! remainder is negative
				295	addcc %o3,%o5,%o3
				296	! depth 4, accumulated bits -3
				297	bl L.4.13
				298	srl %o5,1,%o5
				299	! remainder is positive
				300	subcc %o3,%o5,%o3
				301	b 9f
				302	add %o2, (-3*2+1), %o2
				303
				304	L.4.13:
				305	! remainder is negative
				306	addcc %o3,%o5,%o3
				307	b 9f
				308	add %o2, (-3*2-1), %o2
				309
				310	L.2.15:
				311	! remainder is negative
				312	addcc %o3,%o5,%o3
				313	! depth 3, accumulated bits -3
				314	bl L.3.13
				315	srl %o5,1,%o5
				316	! remainder is positive
				317	subcc %o3,%o5,%o3
				318	! depth 4, accumulated bits -5
				319	bl L.4.11
				320	srl %o5,1,%o5
				321	! remainder is positive
				322	subcc %o3,%o5,%o3
				323	b 9f
				324	add %o2, (-5*2+1), %o2
				325
				326	L.4.11:
				327	! remainder is negative
				328	addcc %o3,%o5,%o3
				329	b 9f
				330	add %o2, (-5*2-1), %o2
				331
				332
				333	L.3.13:
				334	! remainder is negative
				335	addcc %o3,%o5,%o3
				336	! depth 4, accumulated bits -7
				337	bl L.4.9
				338	srl %o5,1,%o5
				339	! remainder is positive
				340	subcc %o3,%o5,%o3
				341	b 9f
				342	add %o2, (-7*2+1), %o2
				343
				344	L.4.9:
				345	! remainder is negative
				346	addcc %o3,%o5,%o3
				347	b 9f
				348	add %o2, (-7*2-1), %o2
				349
				350	9:
				351	Lend_regular_divide:
				352	subcc %o4, 1, %o4
				353	bge Ldivloop
				354	tst %o3
				355
				356	bl,a Lgot_result
				357	! non-restoring fixup here (one instruction only!)
				358	add %o3, %o1, %o3
				359
				360	Lgot_result:
				361	! check to see if answer should be < 0
				362	tst %g2
				363	bl,a 1f
				364	sub %g0, %o3, %o3
				365	1:
				366	retl
				367	mov %o3, %o0
				368
				369	.globl .rem_patch
				370	.rem_patch:
				371	sra %o0, 0x1f, %o4
				372	wr %o4, 0x0, %y
				373	nop
				374	nop
				375	nop
				376	sdivcc %o0, %o1, %o2
				377	bvs,a 1f
				378	xnor %o2, %g0, %o2
				379	1: smul %o2, %o1, %o2
				380	retl
				381	sub %o0, %o2, %o0
				382	nop