Blame - arch/blackfin/lib/udivsi3.S - kernel/msm-4.9

blob: 58fd96d747665fd8a2f3c8568de1af547c970213 [file] [log] [blame]

Bryan Wu	1394f03	2007-05-06 14:50:22 -0700	[diff] [blame]	1	/*
				2	* File: arch/blackfin/lib/udivsi3.S
				3	* Based on:
				4	* Author:
				5	*
				6	* Created:
				7	* Description:
				8	*
				9	* Modified:
				10	* Copyright 2004-2006 Analog Devices Inc.
				11	*
				12	* Bugs: Enter bugs at http://blackfin.uclinux.org/
				13	*
				14	* This program is free software; you can redistribute it and/or modify
				15	* it under the terms of the GNU General Public License as published by
				16	* the Free Software Foundation; either version 2 of the License, or
				17	* (at your option) any later version.
				18	*
				19	* This program is distributed in the hope that it will be useful,
				20	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				21	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				22	* GNU General Public License for more details.
				23	*
				24	* You should have received a copy of the GNU General Public License
				25	* along with this program; if not, see the file COPYING, or write
				26	* to the Free Software Foundation, Inc.,
				27	* 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
				28	*/
				29
				30	#include <linux/linkage.h>
				31
				32	#define CARRY AC0
				33
				34	#ifdef CONFIG_ARITHMETIC_OPS_L1
				35	.section .l1.text
				36	#else
				37	.text
				38	#endif
				39
				40
				41	ENTRY(___udivsi3)
				42
				43	CC = R0 < R1 (IU); /* If X < Y, always return 0 */
				44	IF CC JUMP .Lreturn_ident;
				45
				46	R2 = R1 << 16;
				47	CC = R2 <= R0 (IU);
				48	IF CC JUMP .Lidents;
				49
				50	R2 = R0 >> 31; /* if X is a 31-bit number */
				51	R3 = R1 >> 15; /* and Y is a 15-bit number */
				52	R2 = R2 \| R3; /* then it's okay to use the DIVQ builtins (fallthrough to fast)*/
				53	CC = R2;
				54	IF CC JUMP .Ly_16bit;
				55
				56	/* METHOD 1: FAST DIVQ
				57	We know we have a 31-bit dividend, and 15-bit divisor so we can use the
				58	simple divq approach (first setting AQ to 0 - implying unsigned division,
				59	then 16 DIVQ's).
				60	*/
				61
				62	AQ = CC; /* Clear AQ (CC==0) */
				63
				64	/* ISR States: When dividing two integers (32.0/16.0) using divide primitives,
				65	we need to shift the dividend one bit to the left.
				66	We have already checked that we have a 31-bit number so we are safe to do
				67	that.
				68	*/
				69	R0 <<= 1;
				70	DIVQ(R0, R1); // 1
				71	DIVQ(R0, R1); // 2
				72	DIVQ(R0, R1); // 3
				73	DIVQ(R0, R1); // 4
				74	DIVQ(R0, R1); // 5
				75	DIVQ(R0, R1); // 6
				76	DIVQ(R0, R1); // 7
				77	DIVQ(R0, R1); // 8
				78	DIVQ(R0, R1); // 9
				79	DIVQ(R0, R1); // 10
				80	DIVQ(R0, R1); // 11
				81	DIVQ(R0, R1); // 12
				82	DIVQ(R0, R1); // 13
				83	DIVQ(R0, R1); // 14
				84	DIVQ(R0, R1); // 15
				85	DIVQ(R0, R1); // 16
				86	R0 = R0.L (Z);
				87	RTS;
				88
				89	.Ly_16bit:
				90	/* We know that the upper 17 bits of Y might have bits set,
				91	** or that the sign bit of X might have a bit. If Y is a
				92	** 16-bit number, but not bigger, then we can use the builtins
				93	** with a post-divide correction.
				94	** R3 currently holds Y>>15, which means R3's LSB is the
				95	** bit we're interested in.
				96	*/
				97
				98	/* According to the ISR, to use the Divide primitives for
				99	** unsigned integer divide, the useable range is 31 bits
				100	*/
				101	CC = ! BITTST(R0, 31);
				102
				103	/* IF condition is true we can scale our inputs and use the divide primitives,
				104	** with some post-adjustment
				105	*/
				106	R3 += -1; /* if so, Y is 0x00008nnn */
				107	CC &= AZ;
				108
				109	/* If condition is true we can scale our inputs and use the divide primitives,
				110	** with some post-adjustment
				111	*/
				112	R3 = R1 >> 1; /* Pre-scaled divisor for primitive case */
				113	R2 = R0 >> 16;
				114
				115	R2 = R3 - R2; /* shifted divisor < upper 16 bits of dividend */
				116	CC &= CARRY;
				117	IF CC JUMP .Lshift_and_correct;
				118
				119	/* Fall through to the identities */
				120
				121	/* METHOD 2: identities and manual calculation
				122	We are not able to use the divide primites, but may still catch some special
				123	cases.
				124	*/
				125	.Lidents:
				126	/* Test for common identities. Value to be returned is placed in R2. */
				127	CC = R0 == 0; /* 0/Y => 0 */
				128	IF CC JUMP .Lreturn_r0;
				129	CC = R0 == R1; /* X==Y => 1 */
				130	IF CC JUMP .Lreturn_ident;
				131	CC = R1 == 1; /* X/1 => X */
				132	IF CC JUMP .Lreturn_ident;
				133
				134	R2.L = ONES R1;
				135	R2 = R2.L (Z);
				136	CC = R2 == 1;
				137	IF CC JUMP .Lpower_of_two;
				138
				139	[--SP] = (R7:5); /* Push registers R5-R7 */
				140
				141	/* Idents don't match. Go for the full operation. */
				142
				143
				144	R6 = 2; /* assume we'll shift two */
				145	R3 = 1;
				146
				147	P2 = R1;
				148	/* If either R0 or R1 have sign set, */
				149	/* divide them by two, and note it's */
				150	/* been done. */
				151	CC = R1 < 0;
				152	R2 = R1 >> 1;
				153	IF CC R1 = R2; /* Possibly-shifted R1 */
				154	IF !CC R6 = R3; /* R1 doesn't, so at most 1 shifted */
				155
				156	P0 = 0;
				157	R3 = -R1;
				158	[--SP] = R3;
				159	R2 = R0 >> 1;
				160	R2 = R0 >> 1;
				161	CC = R0 < 0;
				162	IF CC P0 = R6; /* Number of values divided */
				163	IF !CC R2 = R0; /* Shifted R0 */
				164
				165	/* P0 is 0, 1 (NR/=2) or 2 (NR/=2, DR/=2) */
				166
				167	/* r2 holds Copy dividend */
				168	R3 = 0; /* Clear partial remainder */
				169	R7 = 0; /* Initialise quotient bit */
				170
				171	P1 = 32; /* Set loop counter */
				172	LSETUP(.Lulst, .Lulend) LC0 = P1; /* Set loop counter */
				173	.Lulst: R6 = R2 >> 31; /* R6 = sign bit of R2, for carry */
				174	R2 = R2 << 1; /* Shift 64 bit dividend up by 1 bit */
				175	R3 = R3 << 1 \|\| R5 = [SP];
				176	R3 = R3 \| R6; /* Include any carry */
				177	CC = R7 < 0; /* Check quotient(AQ) */
				178	/* If AQ==0, we'll sub divisor */
				179	IF CC R5 = R1; /* and if AQ==1, we'll add it. */
				180	R3 = R3 + R5; /* Add/sub divsor to partial remainder */
				181	R7 = R3 ^ R1; /* Generate next quotient bit */
				182
				183	R5 = R7 >> 31; /* Get AQ */
				184	BITTGL(R5, 0); /* Invert it, to get what we'll shift */
				185	.Lulend: R2 = R2 + R5; /* and "shift" it in. */
				186
				187	CC = P0 == 0; /* Check how many inputs we shifted */
				188	IF CC JUMP .Lno_mult; /* if none... */
				189	R6 = R2 << 1;
				190	CC = P0 == 1;
				191	IF CC R2 = R6; /* if 1, Q = Q2 /
				192	IF !CC R1 = P2; /* if 2, restore stored divisor */
				193
				194	R3 = R2; /* Copy of R2 */
				195	R3 = R1; / Q * divisor */
				196	R5 = R0 - R3; /* Z = (dividend - Q * divisor) */
				197	CC = R1 <= R5 (IU); /* Check if divisor <= Z? */
				198	R6 = CC; /* if yes, R6 = 1 */
				199	R2 = R2 + R6; /* if yes, add one to quotient(Q) */
				200	.Lno_mult:
				201	SP += 4;
				202	(R7:5) = [SP++]; /* Pop registers R5-R7 */
				203	R0 = R2; /* Store quotient */
				204	RTS;
				205
				206	.Lreturn_ident:
				207	CC = R0 < R1 (IU); /* If X < Y, always return 0 */
				208	R2 = 0;
				209	IF CC JUMP .Ltrue_return_ident;
				210	R2 = -1 (X); /* X/0 => 0xFFFFFFFF */
				211	CC = R1 == 0;
				212	IF CC JUMP .Ltrue_return_ident;
				213	R2 = -R2; /* R2 now 1 */
				214	CC = R0 == R1; /* X==Y => 1 */
				215	IF CC JUMP .Ltrue_return_ident;
				216	R2 = R0; /* X/1 => X */
				217	/FALLTHRU/
				218
				219	.Ltrue_return_ident:
				220	R0 = R2;
				221	.Lreturn_r0:
				222	RTS;
				223
				224	.Lpower_of_two:
				225	/* Y has a single bit set, which means it's a power of two.
				226	** That means we can perform the division just by shifting
				227	** X to the right the appropriate number of bits
				228	*/
				229
				230	/* signbits returns the number of sign bits, minus one.
				231	** 1=>30, 2=>29, ..., 0x40000000=>0. Which means we need
				232	** to shift right n-signbits spaces. It also means 0x80000000
				233	** is a special case, because that also gives a signbits of 0
				234	*/
				235
				236	R2 = R0 >> 31;
				237	CC = R1 < 0;
				238	IF CC JUMP .Ltrue_return_ident;
				239
				240	R1.l = SIGNBITS R1;
				241	R1 = R1.L (Z);
				242	R1 += -30;
				243	R0 = LSHIFT R0 by R1.L;
				244	RTS;
				245
				246	/* METHOD 3: PRESCALE AND USE THE DIVIDE PRIMITIVES WITH SOME POST-CORRECTION
				247	Two scaling operations are required to use the divide primitives with a
				248	divisor > 0x7FFFF.
				249	Firstly (as in method 1) we need to shift the dividend 1 to the left for
				250	integer division.
				251	Secondly we need to shift both the divisor and dividend 1 to the right so
				252	both are in range for the primitives.
				253	The left/right shift of the dividend does nothing so we can skip it.
				254	*/
				255	.Lshift_and_correct:
				256	R2 = R0;
				257	// R3 is already R1 >> 1
				258	CC=!CC;
				259	AQ = CC; /* Clear AQ, got here with CC = 0 */
				260	DIVQ(R2, R3); // 1
				261	DIVQ(R2, R3); // 2
				262	DIVQ(R2, R3); // 3
				263	DIVQ(R2, R3); // 4
				264	DIVQ(R2, R3); // 5
				265	DIVQ(R2, R3); // 6
				266	DIVQ(R2, R3); // 7
				267	DIVQ(R2, R3); // 8
				268	DIVQ(R2, R3); // 9
				269	DIVQ(R2, R3); // 10
				270	DIVQ(R2, R3); // 11
				271	DIVQ(R2, R3); // 12
				272	DIVQ(R2, R3); // 13
				273	DIVQ(R2, R3); // 14
				274	DIVQ(R2, R3); // 15
				275	DIVQ(R2, R3); // 16
				276
				277	/* According to the Instruction Set Reference:
				278	To divide by a divisor > 0x7FFF,
				279	1. prescale and perform divide to obtain quotient (Q) (done above),
				280	2. multiply quotient by unscaled divisor (result M)
				281	3. subtract the product from the divident to get an error (E = X - M)
				282	4. if E < divisor (Y) subtract 1, if E > divisor (Y) add 1, else return quotient (Q)
				283	*/
				284	R3 = R2.L (Z); /* Q = X' / Y' */
				285	R2 = R3; /* Preserve Q */
				286	R2 = R1; / M = Q * Y */
				287	R2 = R0 - R2; /* E = X - M */
				288	R0 = R3; /* Copy Q into result reg */
				289
				290	/* Correction: If result of the multiply is negative, we overflowed
				291	and need to correct the result by subtracting 1 from the result.*/
				292	R3 = 0xFFFF (Z);
				293	R2 = R2 >> 16; /* E >> 16 */
				294	CC = R2 == R3;
				295	R3 = 1 ;
				296	R1 = R0 - R3;
				297	IF CC R0 = R1;
				298	RTS;
Mike Frysinger	51be24c	2007-06-11 15:31:30 +0800	[diff] [blame]	299
				300	ENDPROC(___udivsi3)