blob: d164a751d0895414ea89463a57246b90c201429a [file] [log] [blame]
Stephen Canon5abb5c12011-03-18 16:35:02 +00001/*===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===//
2 *
3 * The LLVM Compiler Infrastructure
4 *
5 * This file is dual licensed under the MIT and the University of Illinois Open
6 * Source Licenses. See LICENSE.TXT for details.
7 *
8 *===----------------------------------------------------------------------===//
9 *
10 * This file implements the __udivmodsi4 (32-bit unsigned integer divide and
11 * modulus) function for the ARM architecture. A naive digit-by-digit
12 * computation is employed for simplicity.
13 *
14 *===----------------------------------------------------------------------===*/
15
16#include "../assembly.h"
17
18#define ESTABLISH_FRAME \
19 push {r4, r7, lr} ;\
20 add r7, sp, #4
21#define CLEAR_FRAME_AND_RETURN \
22 pop {r4, r7, pc}
23
24#define a r0
25#define b r1
26#define i r3
27#define r r4
28#define q ip
29#define one lr
30
31.syntax unified
32.align 3
33DEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
34// We use a simple digit by digit algorithm; before we get into the actual
35// divide loop, we must calculate the left-shift amount necessary to align
36// the MSB of the divisor with that of the dividend (If this shift is
37// negative, then the result is zero, and we early out). We also conjure a
38// bit mask of 1 to use in constructing the quotient, and initialize the
39// quotient to zero.
40 ESTABLISH_FRAME
41 clz r4, a
42 tst b, b // detect divide-by-zero
43 clz r3, b
44 mov q, #0
Anton Korobeynikovbdadd872011-04-19 17:50:42 +000045 beq LOCAL_LABEL(return) // return 0 if b is zero.
Stephen Canon5abb5c12011-03-18 16:35:02 +000046 mov one, #1
47 subs i, r3, r4
Anton Korobeynikovbdadd872011-04-19 17:50:42 +000048 blt LOCAL_LABEL(return) // return 0 if MSB(a) < MSB(b)
Stephen Canon5abb5c12011-03-18 16:35:02 +000049
Anton Korobeynikovbdadd872011-04-19 17:50:42 +000050LOCAL_LABEL(mainLoop):
Stephen Canon5abb5c12011-03-18 16:35:02 +000051// This loop basically implements the following:
52//
53// do {
54// if (a >= b << i) {
55// a -= b << i;
56// q |= 1 << i;
57// if (a == 0) break;
58// }
59// } while (--i)
60//
61// Note that this does not perform the final iteration (i == 0); by doing it
62// this way, we can merge the two branches which is a substantial win for
63// such a tight loop on current ARM architectures.
64 subs r, a, b, lsl i
65 orrhs q, q,one, lsl i
66 movhs a, r
67 subsne i, i, #1
Anton Korobeynikovbdadd872011-04-19 17:50:42 +000068 bhi LOCAL_LABEL(mainLoop)
Stephen Canon5abb5c12011-03-18 16:35:02 +000069
70// Do the final test subtraction and update of quotient (i == 0), as it is
71// not performed in the main loop.
72 subs r, a, b
73 orrhs q, #1
74 movhs a, r
75
Anton Korobeynikovbdadd872011-04-19 17:50:42 +000076LOCAL_LABEL(return):
Stephen Canon5abb5c12011-03-18 16:35:02 +000077// Store the remainder, and move the quotient to r0, then return.
78 str a, [r2]
79 mov r0, q
80 CLEAR_FRAME_AND_RETURN