blob: 748a6a2e8c1719d72719d52b00949316a885d5e3 [file] [log] [blame]
Bryan Wu1394f032007-05-06 14:50:22 -07001/*
Robin Getz96f10502009-09-24 14:11:24 +00002 * Copyright 2004-2009 Analog Devices Inc.
Bryan Wu1394f032007-05-06 14:50:22 -07003 *
Sonic Zhangde450832012-05-17 14:45:27 +08004 * Licensed under the Clear BSD license or the GPL-2 (or later)
Bryan Wu1394f032007-05-06 14:50:22 -07005 */
6
7#include <linux/linkage.h>
8
9#define CARRY AC0
10
11#ifdef CONFIG_ARITHMETIC_OPS_L1
12.section .l1.text
13#else
14.text
15#endif
16
17
18ENTRY(___udivsi3)
19
20 CC = R0 < R1 (IU); /* If X < Y, always return 0 */
21 IF CC JUMP .Lreturn_ident;
22
23 R2 = R1 << 16;
24 CC = R2 <= R0 (IU);
25 IF CC JUMP .Lidents;
26
27 R2 = R0 >> 31; /* if X is a 31-bit number */
28 R3 = R1 >> 15; /* and Y is a 15-bit number */
29 R2 = R2 | R3; /* then it's okay to use the DIVQ builtins (fallthrough to fast)*/
30 CC = R2;
31 IF CC JUMP .Ly_16bit;
32
33/* METHOD 1: FAST DIVQ
34 We know we have a 31-bit dividend, and 15-bit divisor so we can use the
35 simple divq approach (first setting AQ to 0 - implying unsigned division,
36 then 16 DIVQ's).
37*/
38
39 AQ = CC; /* Clear AQ (CC==0) */
40
41/* ISR States: When dividing two integers (32.0/16.0) using divide primitives,
42 we need to shift the dividend one bit to the left.
43 We have already checked that we have a 31-bit number so we are safe to do
44 that.
45*/
46 R0 <<= 1;
47 DIVQ(R0, R1); // 1
48 DIVQ(R0, R1); // 2
49 DIVQ(R0, R1); // 3
50 DIVQ(R0, R1); // 4
51 DIVQ(R0, R1); // 5
52 DIVQ(R0, R1); // 6
53 DIVQ(R0, R1); // 7
54 DIVQ(R0, R1); // 8
55 DIVQ(R0, R1); // 9
56 DIVQ(R0, R1); // 10
57 DIVQ(R0, R1); // 11
58 DIVQ(R0, R1); // 12
59 DIVQ(R0, R1); // 13
60 DIVQ(R0, R1); // 14
61 DIVQ(R0, R1); // 15
62 DIVQ(R0, R1); // 16
63 R0 = R0.L (Z);
64 RTS;
65
66.Ly_16bit:
67 /* We know that the upper 17 bits of Y might have bits set,
68 ** or that the sign bit of X might have a bit. If Y is a
69 ** 16-bit number, but not bigger, then we can use the builtins
70 ** with a post-divide correction.
71 ** R3 currently holds Y>>15, which means R3's LSB is the
72 ** bit we're interested in.
73 */
74
75 /* According to the ISR, to use the Divide primitives for
76 ** unsigned integer divide, the useable range is 31 bits
77 */
78 CC = ! BITTST(R0, 31);
79
80 /* IF condition is true we can scale our inputs and use the divide primitives,
81 ** with some post-adjustment
82 */
83 R3 += -1; /* if so, Y is 0x00008nnn */
84 CC &= AZ;
85
86 /* If condition is true we can scale our inputs and use the divide primitives,
87 ** with some post-adjustment
88 */
89 R3 = R1 >> 1; /* Pre-scaled divisor for primitive case */
90 R2 = R0 >> 16;
91
92 R2 = R3 - R2; /* shifted divisor < upper 16 bits of dividend */
93 CC &= CARRY;
94 IF CC JUMP .Lshift_and_correct;
95
96 /* Fall through to the identities */
97
98/* METHOD 2: identities and manual calculation
99 We are not able to use the divide primites, but may still catch some special
100 cases.
101*/
102.Lidents:
103 /* Test for common identities. Value to be returned is placed in R2. */
104 CC = R0 == 0; /* 0/Y => 0 */
105 IF CC JUMP .Lreturn_r0;
106 CC = R0 == R1; /* X==Y => 1 */
107 IF CC JUMP .Lreturn_ident;
108 CC = R1 == 1; /* X/1 => X */
109 IF CC JUMP .Lreturn_ident;
110
111 R2.L = ONES R1;
112 R2 = R2.L (Z);
113 CC = R2 == 1;
114 IF CC JUMP .Lpower_of_two;
115
116 [--SP] = (R7:5); /* Push registers R5-R7 */
117
118 /* Idents don't match. Go for the full operation. */
119
120
121 R6 = 2; /* assume we'll shift two */
122 R3 = 1;
123
124 P2 = R1;
125 /* If either R0 or R1 have sign set, */
126 /* divide them by two, and note it's */
127 /* been done. */
128 CC = R1 < 0;
129 R2 = R1 >> 1;
130 IF CC R1 = R2; /* Possibly-shifted R1 */
131 IF !CC R6 = R3; /* R1 doesn't, so at most 1 shifted */
132
133 P0 = 0;
134 R3 = -R1;
135 [--SP] = R3;
136 R2 = R0 >> 1;
137 R2 = R0 >> 1;
138 CC = R0 < 0;
139 IF CC P0 = R6; /* Number of values divided */
140 IF !CC R2 = R0; /* Shifted R0 */
141
142 /* P0 is 0, 1 (NR/=2) or 2 (NR/=2, DR/=2) */
143
144 /* r2 holds Copy dividend */
145 R3 = 0; /* Clear partial remainder */
146 R7 = 0; /* Initialise quotient bit */
147
148 P1 = 32; /* Set loop counter */
149 LSETUP(.Lulst, .Lulend) LC0 = P1; /* Set loop counter */
150.Lulst: R6 = R2 >> 31; /* R6 = sign bit of R2, for carry */
151 R2 = R2 << 1; /* Shift 64 bit dividend up by 1 bit */
152 R3 = R3 << 1 || R5 = [SP];
153 R3 = R3 | R6; /* Include any carry */
154 CC = R7 < 0; /* Check quotient(AQ) */
155 /* If AQ==0, we'll sub divisor */
156 IF CC R5 = R1; /* and if AQ==1, we'll add it. */
157 R3 = R3 + R5; /* Add/sub divsor to partial remainder */
158 R7 = R3 ^ R1; /* Generate next quotient bit */
159
160 R5 = R7 >> 31; /* Get AQ */
161 BITTGL(R5, 0); /* Invert it, to get what we'll shift */
162.Lulend: R2 = R2 + R5; /* and "shift" it in. */
163
164 CC = P0 == 0; /* Check how many inputs we shifted */
165 IF CC JUMP .Lno_mult; /* if none... */
166 R6 = R2 << 1;
167 CC = P0 == 1;
168 IF CC R2 = R6; /* if 1, Q = Q*2 */
169 IF !CC R1 = P2; /* if 2, restore stored divisor */
170
171 R3 = R2; /* Copy of R2 */
172 R3 *= R1; /* Q * divisor */
173 R5 = R0 - R3; /* Z = (dividend - Q * divisor) */
174 CC = R1 <= R5 (IU); /* Check if divisor <= Z? */
175 R6 = CC; /* if yes, R6 = 1 */
176 R2 = R2 + R6; /* if yes, add one to quotient(Q) */
177.Lno_mult:
178 SP += 4;
179 (R7:5) = [SP++]; /* Pop registers R5-R7 */
180 R0 = R2; /* Store quotient */
181 RTS;
182
183.Lreturn_ident:
184 CC = R0 < R1 (IU); /* If X < Y, always return 0 */
185 R2 = 0;
186 IF CC JUMP .Ltrue_return_ident;
187 R2 = -1 (X); /* X/0 => 0xFFFFFFFF */
188 CC = R1 == 0;
189 IF CC JUMP .Ltrue_return_ident;
190 R2 = -R2; /* R2 now 1 */
191 CC = R0 == R1; /* X==Y => 1 */
192 IF CC JUMP .Ltrue_return_ident;
193 R2 = R0; /* X/1 => X */
194 /*FALLTHRU*/
195
196.Ltrue_return_ident:
197 R0 = R2;
198.Lreturn_r0:
199 RTS;
200
201.Lpower_of_two:
202 /* Y has a single bit set, which means it's a power of two.
203 ** That means we can perform the division just by shifting
204 ** X to the right the appropriate number of bits
205 */
206
207 /* signbits returns the number of sign bits, minus one.
208 ** 1=>30, 2=>29, ..., 0x40000000=>0. Which means we need
209 ** to shift right n-signbits spaces. It also means 0x80000000
210 ** is a special case, because that *also* gives a signbits of 0
211 */
212
213 R2 = R0 >> 31;
214 CC = R1 < 0;
215 IF CC JUMP .Ltrue_return_ident;
216
217 R1.l = SIGNBITS R1;
218 R1 = R1.L (Z);
219 R1 += -30;
220 R0 = LSHIFT R0 by R1.L;
221 RTS;
222
223/* METHOD 3: PRESCALE AND USE THE DIVIDE PRIMITIVES WITH SOME POST-CORRECTION
224 Two scaling operations are required to use the divide primitives with a
225 divisor > 0x7FFFF.
226 Firstly (as in method 1) we need to shift the dividend 1 to the left for
227 integer division.
228 Secondly we need to shift both the divisor and dividend 1 to the right so
229 both are in range for the primitives.
230 The left/right shift of the dividend does nothing so we can skip it.
231*/
232.Lshift_and_correct:
233 R2 = R0;
234 // R3 is already R1 >> 1
235 CC=!CC;
236 AQ = CC; /* Clear AQ, got here with CC = 0 */
237 DIVQ(R2, R3); // 1
238 DIVQ(R2, R3); // 2
239 DIVQ(R2, R3); // 3
240 DIVQ(R2, R3); // 4
241 DIVQ(R2, R3); // 5
242 DIVQ(R2, R3); // 6
243 DIVQ(R2, R3); // 7
244 DIVQ(R2, R3); // 8
245 DIVQ(R2, R3); // 9
246 DIVQ(R2, R3); // 10
247 DIVQ(R2, R3); // 11
248 DIVQ(R2, R3); // 12
249 DIVQ(R2, R3); // 13
250 DIVQ(R2, R3); // 14
251 DIVQ(R2, R3); // 15
252 DIVQ(R2, R3); // 16
253
254 /* According to the Instruction Set Reference:
255 To divide by a divisor > 0x7FFF,
256 1. prescale and perform divide to obtain quotient (Q) (done above),
257 2. multiply quotient by unscaled divisor (result M)
258 3. subtract the product from the divident to get an error (E = X - M)
259 4. if E < divisor (Y) subtract 1, if E > divisor (Y) add 1, else return quotient (Q)
260 */
261 R3 = R2.L (Z); /* Q = X' / Y' */
262 R2 = R3; /* Preserve Q */
263 R2 *= R1; /* M = Q * Y */
264 R2 = R0 - R2; /* E = X - M */
265 R0 = R3; /* Copy Q into result reg */
266
267/* Correction: If result of the multiply is negative, we overflowed
268 and need to correct the result by subtracting 1 from the result.*/
269 R3 = 0xFFFF (Z);
270 R2 = R2 >> 16; /* E >> 16 */
271 CC = R2 == R3;
272 R3 = 1 ;
273 R1 = R0 - R3;
274 IF CC R0 = R1;
275 RTS;
Mike Frysinger51be24c2007-06-11 15:31:30 +0800276
277ENDPROC(___udivsi3)