blob: 28979fee4bdcc85ca80f19f4d6016c54685db3b8 [file] [log] [blame]
Stephen Canon6bbe0bb2011-03-18 16:35:02 +00001/*===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===//
2 *
3 * The LLVM Compiler Infrastructure
4 *
5 * This file is dual licensed under the MIT and the University of Illinois Open
6 * Source Licenses. See LICENSE.TXT for details.
7 *
8 *===----------------------------------------------------------------------===//
9 *
10 * This file implements the __udivsi3 (32-bit unsigned integer divide)
11 * function for the ARM architecture. A naive digit-by-digit computation is
12 * employed for simplicity.
13 *
14 *===----------------------------------------------------------------------===*/
15
16#include "../assembly.h"
17
18#define ESTABLISH_FRAME \
19 push {r7, lr} ;\
20 mov r7, sp
21#define CLEAR_FRAME_AND_RETURN \
22 pop {r7, pc}
23
24#define a r0
25#define b r1
26#define r r2
27#define i r3
28#define q ip
29#define one lr
30
31.syntax unified
32.align 3
Anton Korobeynikov37b97d12011-04-19 17:51:24 +000033// Ok, APCS and AAPCS agree on 32 bit args, so it's safe to use the same routine.
34DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3)
Stephen Canon6bbe0bb2011-03-18 16:35:02 +000035DEFINE_COMPILERRT_FUNCTION(__udivsi3)
Bob Wilson004c4342012-09-29 23:37:01 +000036#if __ARM_ARCH_7S__
37 tst r1,r1
38 beq LOCAL_LABEL(divzero)
39 udiv r0, r0, r1
40 bx lr
41 LOCAL_LABEL(divzero):
42 mov r0,#0
43 bx lr
44#else
Stephen Canon6bbe0bb2011-03-18 16:35:02 +000045// We use a simple digit by digit algorithm; before we get into the actual
46// divide loop, we must calculate the left-shift amount necessary to align
47// the MSB of the divisor with that of the dividend (If this shift is
48// negative, then the result is zero, and we early out). We also conjure a
49// bit mask of 1 to use in constructing the quotient, and initialize the
50// quotient to zero.
51 ESTABLISH_FRAME
52 clz r2, a
53 tst b, b // detect divide-by-zero
54 clz r3, b
55 mov q, #0
Anton Korobeynikove1f95ca2011-04-19 17:50:42 +000056 beq LOCAL_LABEL(return) // return 0 if b is zero.
Stephen Canon6bbe0bb2011-03-18 16:35:02 +000057 mov one, #1
58 subs i, r3, r2
Anton Korobeynikove1f95ca2011-04-19 17:50:42 +000059 blt LOCAL_LABEL(return) // return 0 if MSB(a) < MSB(b)
Stephen Canon6bbe0bb2011-03-18 16:35:02 +000060
Anton Korobeynikove1f95ca2011-04-19 17:50:42 +000061LOCAL_LABEL(mainLoop):
Stephen Canon6bbe0bb2011-03-18 16:35:02 +000062// This loop basically implements the following:
63//
64// do {
65// if (a >= b << i) {
66// a -= b << i;
67// q |= 1 << i;
68// if (a == 0) break;
69// }
70// } while (--i)
71//
72// Note that this does not perform the final iteration (i == 0); by doing it
73// this way, we can merge the two branches which is a substantial win for
74// such a tight loop on current ARM architectures.
75 subs r, a, b, lsl i
76 orrhs q, q,one, lsl i
77 movhs a, r
78 subsne i, i, #1
Anton Korobeynikove1f95ca2011-04-19 17:50:42 +000079 bhi LOCAL_LABEL(mainLoop)
Stephen Canon6bbe0bb2011-03-18 16:35:02 +000080
81// Do the final test subtraction and update of quotient (i == 0), as it is
82// not performed in the main loop.
83 subs r, a, b
84 orrhs q, #1
85
Anton Korobeynikove1f95ca2011-04-19 17:50:42 +000086LOCAL_LABEL(return):
Stephen Canon6bbe0bb2011-03-18 16:35:02 +000087// Move the quotient to r0 and return.
88 mov r0, q
89 CLEAR_FRAME_AND_RETURN
Bob Wilson004c4342012-09-29 23:37:01 +000090#endif