blob: 4e492f4b3f0e48cf1ca14229fc318627a6edc8c2 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
3 *
4 * Author: Nicolas Pitre <nico@cam.org>
5 * - contributed to gcc-3.4 on Sep 30, 2003
6 * - adapted for the Linux kernel on Oct 2, 2003
7 */
8
9/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
10
11This file is free software; you can redistribute it and/or modify it
12under the terms of the GNU General Public License as published by the
13Free Software Foundation; either version 2, or (at your option) any
14later version.
15
16In addition to the permissions in the GNU General Public License, the
17Free Software Foundation gives you unlimited permission to link the
18compiled version of this file into combinations with other programs,
19and to distribute those combinations without any restriction coming
20from the use of this file. (The General Public License restrictions
21do apply in other respects; for example, they cover modification of
22the file, and distribution when not linked into a combine
23executable.)
24
25This file is distributed in the hope that it will be useful, but
26WITHOUT ANY WARRANTY; without even the implied warranty of
27MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
28General Public License for more details.
29
30You should have received a copy of the GNU General Public License
31along with this program; see the file COPYING. If not, write to
32the Free Software Foundation, 59 Temple Place - Suite 330,
33Boston, MA 02111-1307, USA. */
34
35
36#include <linux/linkage.h>
37#include <asm/assembler.h>
38
39
40.macro ARM_DIV_BODY dividend, divisor, result, curbit
41
42#if __LINUX_ARM_ARCH__ >= 5
43
44 clz \curbit, \divisor
45 clz \result, \dividend
46 sub \result, \curbit, \result
47 mov \curbit, #1
48 mov \divisor, \divisor, lsl \result
49 mov \curbit, \curbit, lsl \result
50 mov \result, #0
51
52#else
53
54 @ Initially shift the divisor left 3 bits if possible,
55 @ set curbit accordingly. This allows for curbit to be located
56 @ at the left end of each 4 bit nibbles in the division loop
57 @ to save one loop in most cases.
58 tst \divisor, #0xe0000000
59 moveq \divisor, \divisor, lsl #3
60 moveq \curbit, #8
61 movne \curbit, #1
62
63 @ Unless the divisor is very big, shift it up in multiples of
64 @ four bits, since this is the amount of unwinding in the main
65 @ division loop. Continue shifting until the divisor is
66 @ larger than the dividend.
671: cmp \divisor, #0x10000000
68 cmplo \divisor, \dividend
69 movlo \divisor, \divisor, lsl #4
70 movlo \curbit, \curbit, lsl #4
71 blo 1b
72
73 @ For very big divisors, we must shift it a bit at a time, or
74 @ we will be in danger of overflowing.
751: cmp \divisor, #0x80000000
76 cmplo \divisor, \dividend
77 movlo \divisor, \divisor, lsl #1
78 movlo \curbit, \curbit, lsl #1
79 blo 1b
80
81 mov \result, #0
82
83#endif
84
85 @ Division loop
861: cmp \dividend, \divisor
87 subhs \dividend, \dividend, \divisor
88 orrhs \result, \result, \curbit
89 cmp \dividend, \divisor, lsr #1
90 subhs \dividend, \dividend, \divisor, lsr #1
91 orrhs \result, \result, \curbit, lsr #1
92 cmp \dividend, \divisor, lsr #2
93 subhs \dividend, \dividend, \divisor, lsr #2
94 orrhs \result, \result, \curbit, lsr #2
95 cmp \dividend, \divisor, lsr #3
96 subhs \dividend, \dividend, \divisor, lsr #3
97 orrhs \result, \result, \curbit, lsr #3
98 cmp \dividend, #0 @ Early termination?
99 movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
100 movne \divisor, \divisor, lsr #4
101 bne 1b
102
103.endm
104
105
106.macro ARM_DIV2_ORDER divisor, order
107
108#if __LINUX_ARM_ARCH__ >= 5
109
110 clz \order, \divisor
111 rsb \order, \order, #31
112
113#else
114
115 cmp \divisor, #(1 << 16)
116 movhs \divisor, \divisor, lsr #16
117 movhs \order, #16
118 movlo \order, #0
119
120 cmp \divisor, #(1 << 8)
121 movhs \divisor, \divisor, lsr #8
122 addhs \order, \order, #8
123
124 cmp \divisor, #(1 << 4)
125 movhs \divisor, \divisor, lsr #4
126 addhs \order, \order, #4
127
128 cmp \divisor, #(1 << 2)
129 addhi \order, \order, #3
130 addls \order, \order, \divisor, lsr #1
131
132#endif
133
134.endm
135
136
137.macro ARM_MOD_BODY dividend, divisor, order, spare
138
139#if __LINUX_ARM_ARCH__ >= 5
140
141 clz \order, \divisor
142 clz \spare, \dividend
143 sub \order, \order, \spare
144 mov \divisor, \divisor, lsl \order
145
146#else
147
148 mov \order, #0
149
150 @ Unless the divisor is very big, shift it up in multiples of
151 @ four bits, since this is the amount of unwinding in the main
152 @ division loop. Continue shifting until the divisor is
153 @ larger than the dividend.
1541: cmp \divisor, #0x10000000
155 cmplo \divisor, \dividend
156 movlo \divisor, \divisor, lsl #4
157 addlo \order, \order, #4
158 blo 1b
159
160 @ For very big divisors, we must shift it a bit at a time, or
161 @ we will be in danger of overflowing.
1621: cmp \divisor, #0x80000000
163 cmplo \divisor, \dividend
164 movlo \divisor, \divisor, lsl #1
165 addlo \order, \order, #1
166 blo 1b
167
168#endif
169
170 @ Perform all needed substractions to keep only the reminder.
171 @ Do comparisons in batch of 4 first.
172 subs \order, \order, #3 @ yes, 3 is intended here
173 blt 2f
174
1751: cmp \dividend, \divisor
176 subhs \dividend, \dividend, \divisor
177 cmp \dividend, \divisor, lsr #1
178 subhs \dividend, \dividend, \divisor, lsr #1
179 cmp \dividend, \divisor, lsr #2
180 subhs \dividend, \dividend, \divisor, lsr #2
181 cmp \dividend, \divisor, lsr #3
182 subhs \dividend, \dividend, \divisor, lsr #3
183 cmp \dividend, #1
184 mov \divisor, \divisor, lsr #4
185 subges \order, \order, #4
186 bge 1b
187
188 tst \order, #3
189 teqne \dividend, #0
190 beq 5f
191
192 @ Either 1, 2 or 3 comparison/substractions are left.
1932: cmn \order, #2
194 blt 4f
195 beq 3f
196 cmp \dividend, \divisor
197 subhs \dividend, \dividend, \divisor
198 mov \divisor, \divisor, lsr #1
1993: cmp \dividend, \divisor
200 subhs \dividend, \dividend, \divisor
201 mov \divisor, \divisor, lsr #1
2024: cmp \dividend, \divisor
203 subhs \dividend, \dividend, \divisor
2045:
205.endm
206
207
208ENTRY(__udivsi3)
Nicolas Pitreba95e4e2006-01-14 16:18:29 +0000209ENTRY(__aeabi_uidiv)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210
211 subs r2, r1, #1
212 moveq pc, lr
213 bcc Ldiv0
214 cmp r0, r1
215 bls 11f
216 tst r1, r2
217 beq 12f
218
219 ARM_DIV_BODY r0, r1, r2, r3
220
221 mov r0, r2
222 mov pc, lr
223
22411: moveq r0, #1
225 movne r0, #0
226 mov pc, lr
227
22812: ARM_DIV2_ORDER r1, r2
229
230 mov r0, r0, lsr r2
231 mov pc, lr
232
233
234ENTRY(__umodsi3)
235
236 subs r2, r1, #1 @ compare divisor with 1
237 bcc Ldiv0
238 cmpne r0, r1 @ compare dividend with divisor
239 moveq r0, #0
240 tsthi r1, r2 @ see if divisor is power of 2
241 andeq r0, r0, r2
242 movls pc, lr
243
244 ARM_MOD_BODY r0, r1, r2, r3
245
246 mov pc, lr
247
248
249ENTRY(__divsi3)
Nicolas Pitreba95e4e2006-01-14 16:18:29 +0000250ENTRY(__aeabi_idiv)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251
252 cmp r1, #0
253 eor ip, r0, r1 @ save the sign of the result.
254 beq Ldiv0
255 rsbmi r1, r1, #0 @ loops below use unsigned.
256 subs r2, r1, #1 @ division by 1 or -1 ?
257 beq 10f
258 movs r3, r0
259 rsbmi r3, r0, #0 @ positive dividend value
260 cmp r3, r1
261 bls 11f
262 tst r1, r2 @ divisor is power of 2 ?
263 beq 12f
264
265 ARM_DIV_BODY r3, r1, r0, r2
266
267 cmp ip, #0
268 rsbmi r0, r0, #0
269 mov pc, lr
270
27110: teq ip, r0 @ same sign ?
272 rsbmi r0, r0, #0
273 mov pc, lr
274
27511: movlo r0, #0
276 moveq r0, ip, asr #31
277 orreq r0, r0, #1
278 mov pc, lr
279
28012: ARM_DIV2_ORDER r1, r2
281
282 cmp ip, #0
283 mov r0, r3, lsr r2
284 rsbmi r0, r0, #0
285 mov pc, lr
286
287
288ENTRY(__modsi3)
289
290 cmp r1, #0
291 beq Ldiv0
292 rsbmi r1, r1, #0 @ loops below use unsigned.
293 movs ip, r0 @ preserve sign of dividend
294 rsbmi r0, r0, #0 @ if negative make positive
295 subs r2, r1, #1 @ compare divisor with 1
296 cmpne r0, r1 @ compare dividend with divisor
297 moveq r0, #0
298 tsthi r1, r2 @ see if divisor is power of 2
299 andeq r0, r0, r2
300 bls 10f
301
302 ARM_MOD_BODY r0, r1, r2, r3
303
30410: cmp ip, #0
305 rsbmi r0, r0, #0
306 mov pc, lr
307
Nicolas Pitreba95e4e2006-01-14 16:18:29 +0000308#ifdef CONFIG_AEABI
309
310ENTRY(__aeabi_uidivmod)
311
312 stmfd sp!, {r0, r1, ip, lr}
313 bl __aeabi_uidiv
314 ldmfd sp!, {r1, r2, ip, lr}
315 mul r3, r0, r2
316 sub r1, r1, r3
317 mov pc, lr
318
319ENTRY(__aeabi_idivmod)
320
321 stmfd sp!, {r0, r1, ip, lr}
322 bl __aeabi_idiv
323 ldmfd sp!, {r1, r2, ip, lr}
324 mul r3, r0, r2
325 sub r1, r1, r3
326 mov pc, lr
327
328#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329
330Ldiv0:
331
Nicolas Pitre499b2ea2006-01-14 16:18:09 +0000332 str lr, [sp, #-8]!
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333 bl __div0
334 mov r0, #0 @ About as wrong as it could be.
Nicolas Pitre499b2ea2006-01-14 16:18:09 +0000335 ldr pc, [sp], #8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336
337