blob: 6dc06487f3c3e95359b2559163f24d8a8ca635cf [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
3 *
Nicolas Pitre2f82af02009-09-14 03:25:28 -04004 * Author: Nicolas Pitre <nico@fluxnic.net>
Linus Torvalds1da177e2005-04-16 15:20:36 -07005 * - contributed to gcc-3.4 on Sep 30, 2003
6 * - adapted for the Linux kernel on Oct 2, 2003
7 */
8
9/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
10
11This file is free software; you can redistribute it and/or modify it
12under the terms of the GNU General Public License as published by the
13Free Software Foundation; either version 2, or (at your option) any
14later version.
15
16In addition to the permissions in the GNU General Public License, the
17Free Software Foundation gives you unlimited permission to link the
18compiled version of this file into combinations with other programs,
19and to distribute those combinations without any restriction coming
20from the use of this file. (The General Public License restrictions
21do apply in other respects; for example, they cover modification of
22the file, and distribution when not linked into a combine
23executable.)
24
25This file is distributed in the hope that it will be useful, but
26WITHOUT ANY WARRANTY; without even the implied warranty of
27MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
28General Public License for more details.
29
30You should have received a copy of the GNU General Public License
31along with this program; see the file COPYING. If not, write to
32the Free Software Foundation, 59 Temple Place - Suite 330,
33Boston, MA 02111-1307, USA. */
34
35
36#include <linux/linkage.h>
37#include <asm/assembler.h>
38
39
40.macro ARM_DIV_BODY dividend, divisor, result, curbit
41
42#if __LINUX_ARM_ARCH__ >= 5
43
44 clz \curbit, \divisor
45 clz \result, \dividend
46 sub \result, \curbit, \result
47 mov \curbit, #1
48 mov \divisor, \divisor, lsl \result
49 mov \curbit, \curbit, lsl \result
50 mov \result, #0
51
52#else
53
54 @ Initially shift the divisor left 3 bits if possible,
55 @ set curbit accordingly. This allows for curbit to be located
56 @ at the left end of each 4 bit nibbles in the division loop
57 @ to save one loop in most cases.
58 tst \divisor, #0xe0000000
59 moveq \divisor, \divisor, lsl #3
60 moveq \curbit, #8
61 movne \curbit, #1
62
63 @ Unless the divisor is very big, shift it up in multiples of
64 @ four bits, since this is the amount of unwinding in the main
65 @ division loop. Continue shifting until the divisor is
66 @ larger than the dividend.
671: cmp \divisor, #0x10000000
68 cmplo \divisor, \dividend
69 movlo \divisor, \divisor, lsl #4
70 movlo \curbit, \curbit, lsl #4
71 blo 1b
72
73 @ For very big divisors, we must shift it a bit at a time, or
74 @ we will be in danger of overflowing.
751: cmp \divisor, #0x80000000
76 cmplo \divisor, \dividend
77 movlo \divisor, \divisor, lsl #1
78 movlo \curbit, \curbit, lsl #1
79 blo 1b
80
81 mov \result, #0
82
83#endif
84
85 @ Division loop
861: cmp \dividend, \divisor
87 subhs \dividend, \dividend, \divisor
88 orrhs \result, \result, \curbit
89 cmp \dividend, \divisor, lsr #1
90 subhs \dividend, \dividend, \divisor, lsr #1
91 orrhs \result, \result, \curbit, lsr #1
92 cmp \dividend, \divisor, lsr #2
93 subhs \dividend, \dividend, \divisor, lsr #2
94 orrhs \result, \result, \curbit, lsr #2
95 cmp \dividend, \divisor, lsr #3
96 subhs \dividend, \dividend, \divisor, lsr #3
97 orrhs \result, \result, \curbit, lsr #3
98 cmp \dividend, #0 @ Early termination?
99 movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
100 movne \divisor, \divisor, lsr #4
101 bne 1b
102
103.endm
104
105
106.macro ARM_DIV2_ORDER divisor, order
107
108#if __LINUX_ARM_ARCH__ >= 5
109
110 clz \order, \divisor
111 rsb \order, \order, #31
112
113#else
114
115 cmp \divisor, #(1 << 16)
116 movhs \divisor, \divisor, lsr #16
117 movhs \order, #16
118 movlo \order, #0
119
120 cmp \divisor, #(1 << 8)
121 movhs \divisor, \divisor, lsr #8
122 addhs \order, \order, #8
123
124 cmp \divisor, #(1 << 4)
125 movhs \divisor, \divisor, lsr #4
126 addhs \order, \order, #4
127
128 cmp \divisor, #(1 << 2)
129 addhi \order, \order, #3
130 addls \order, \order, \divisor, lsr #1
131
132#endif
133
134.endm
135
136
137.macro ARM_MOD_BODY dividend, divisor, order, spare
138
139#if __LINUX_ARM_ARCH__ >= 5
140
141 clz \order, \divisor
142 clz \spare, \dividend
143 sub \order, \order, \spare
144 mov \divisor, \divisor, lsl \order
145
146#else
147
148 mov \order, #0
149
150 @ Unless the divisor is very big, shift it up in multiples of
151 @ four bits, since this is the amount of unwinding in the main
152 @ division loop. Continue shifting until the divisor is
153 @ larger than the dividend.
1541: cmp \divisor, #0x10000000
155 cmplo \divisor, \dividend
156 movlo \divisor, \divisor, lsl #4
157 addlo \order, \order, #4
158 blo 1b
159
160 @ For very big divisors, we must shift it a bit at a time, or
161 @ we will be in danger of overflowing.
1621: cmp \divisor, #0x80000000
163 cmplo \divisor, \dividend
164 movlo \divisor, \divisor, lsl #1
165 addlo \order, \order, #1
166 blo 1b
167
168#endif
169
170 @ Perform all needed substractions to keep only the reminder.
171 @ Do comparisons in batch of 4 first.
172 subs \order, \order, #3 @ yes, 3 is intended here
173 blt 2f
174
1751: cmp \dividend, \divisor
176 subhs \dividend, \dividend, \divisor
177 cmp \dividend, \divisor, lsr #1
178 subhs \dividend, \dividend, \divisor, lsr #1
179 cmp \dividend, \divisor, lsr #2
180 subhs \dividend, \dividend, \divisor, lsr #2
181 cmp \dividend, \divisor, lsr #3
182 subhs \dividend, \dividend, \divisor, lsr #3
183 cmp \dividend, #1
184 mov \divisor, \divisor, lsr #4
185 subges \order, \order, #4
186 bge 1b
187
188 tst \order, #3
189 teqne \dividend, #0
190 beq 5f
191
192 @ Either 1, 2 or 3 comparison/substractions are left.
1932: cmn \order, #2
194 blt 4f
195 beq 3f
196 cmp \dividend, \divisor
197 subhs \dividend, \dividend, \divisor
198 mov \divisor, \divisor, lsr #1
1993: cmp \dividend, \divisor
200 subhs \dividend, \dividend, \divisor
201 mov \divisor, \divisor, lsr #1
2024: cmp \dividend, \divisor
203 subhs \dividend, \dividend, \divisor
2045:
205.endm
206
207
208ENTRY(__udivsi3)
Nicolas Pitreba95e4e2006-01-14 16:18:29 +0000209ENTRY(__aeabi_uidiv)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210
211 subs r2, r1, #1
212 moveq pc, lr
213 bcc Ldiv0
214 cmp r0, r1
215 bls 11f
216 tst r1, r2
217 beq 12f
218
219 ARM_DIV_BODY r0, r1, r2, r3
220
221 mov r0, r2
222 mov pc, lr
223
22411: moveq r0, #1
225 movne r0, #0
226 mov pc, lr
227
22812: ARM_DIV2_ORDER r1, r2
229
230 mov r0, r0, lsr r2
231 mov pc, lr
232
Catalin Marinas93ed3972008-08-28 11:22:32 +0100233ENDPROC(__udivsi3)
234ENDPROC(__aeabi_uidiv)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235
236ENTRY(__umodsi3)
237
238 subs r2, r1, #1 @ compare divisor with 1
239 bcc Ldiv0
240 cmpne r0, r1 @ compare dividend with divisor
241 moveq r0, #0
242 tsthi r1, r2 @ see if divisor is power of 2
243 andeq r0, r0, r2
244 movls pc, lr
245
246 ARM_MOD_BODY r0, r1, r2, r3
247
248 mov pc, lr
249
Catalin Marinas93ed3972008-08-28 11:22:32 +0100250ENDPROC(__umodsi3)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251
252ENTRY(__divsi3)
Nicolas Pitreba95e4e2006-01-14 16:18:29 +0000253ENTRY(__aeabi_idiv)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700254
255 cmp r1, #0
256 eor ip, r0, r1 @ save the sign of the result.
257 beq Ldiv0
258 rsbmi r1, r1, #0 @ loops below use unsigned.
259 subs r2, r1, #1 @ division by 1 or -1 ?
260 beq 10f
261 movs r3, r0
262 rsbmi r3, r0, #0 @ positive dividend value
263 cmp r3, r1
264 bls 11f
265 tst r1, r2 @ divisor is power of 2 ?
266 beq 12f
267
268 ARM_DIV_BODY r3, r1, r0, r2
269
270 cmp ip, #0
271 rsbmi r0, r0, #0
272 mov pc, lr
273
27410: teq ip, r0 @ same sign ?
275 rsbmi r0, r0, #0
276 mov pc, lr
277
27811: movlo r0, #0
279 moveq r0, ip, asr #31
280 orreq r0, r0, #1
281 mov pc, lr
282
28312: ARM_DIV2_ORDER r1, r2
284
285 cmp ip, #0
286 mov r0, r3, lsr r2
287 rsbmi r0, r0, #0
288 mov pc, lr
289
Catalin Marinas93ed3972008-08-28 11:22:32 +0100290ENDPROC(__divsi3)
291ENDPROC(__aeabi_idiv)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292
293ENTRY(__modsi3)
294
295 cmp r1, #0
296 beq Ldiv0
297 rsbmi r1, r1, #0 @ loops below use unsigned.
298 movs ip, r0 @ preserve sign of dividend
299 rsbmi r0, r0, #0 @ if negative make positive
300 subs r2, r1, #1 @ compare divisor with 1
301 cmpne r0, r1 @ compare dividend with divisor
302 moveq r0, #0
303 tsthi r1, r2 @ see if divisor is power of 2
304 andeq r0, r0, r2
305 bls 10f
306
307 ARM_MOD_BODY r0, r1, r2, r3
308
30910: cmp ip, #0
310 rsbmi r0, r0, #0
311 mov pc, lr
312
Catalin Marinas93ed3972008-08-28 11:22:32 +0100313ENDPROC(__modsi3)
314
Nicolas Pitreba95e4e2006-01-14 16:18:29 +0000315#ifdef CONFIG_AEABI
316
317ENTRY(__aeabi_uidivmod)
318
319 stmfd sp!, {r0, r1, ip, lr}
320 bl __aeabi_uidiv
321 ldmfd sp!, {r1, r2, ip, lr}
322 mul r3, r0, r2
323 sub r1, r1, r3
324 mov pc, lr
325
Catalin Marinas93ed3972008-08-28 11:22:32 +0100326ENDPROC(__aeabi_uidivmod)
327
Nicolas Pitreba95e4e2006-01-14 16:18:29 +0000328ENTRY(__aeabi_idivmod)
329
330 stmfd sp!, {r0, r1, ip, lr}
331 bl __aeabi_idiv
332 ldmfd sp!, {r1, r2, ip, lr}
333 mul r3, r0, r2
334 sub r1, r1, r3
335 mov pc, lr
336
Catalin Marinas93ed3972008-08-28 11:22:32 +0100337ENDPROC(__aeabi_idivmod)
338
Nicolas Pitreba95e4e2006-01-14 16:18:29 +0000339#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700340
341Ldiv0:
342
Nicolas Pitre499b2ea2006-01-14 16:18:09 +0000343 str lr, [sp, #-8]!
Linus Torvalds1da177e2005-04-16 15:20:36 -0700344 bl __div0
345 mov r0, #0 @ About as wrong as it could be.
Nicolas Pitre499b2ea2006-01-14 16:18:09 +0000346 ldr pc, [sp], #8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700347
348