| /*===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===// |
| * |
| * The LLVM Compiler Infrastructure |
| * |
| * This file is dual licensed under the MIT and the University of Illinois Open |
| * Source Licenses. See LICENSE.TXT for details. |
| * |
| *===----------------------------------------------------------------------===// |
| * |
| * This file implements the __udivsi3 (32-bit unsigned integer divide) |
| * function for the ARM architecture. A naive digit-by-digit computation is |
| * employed for simplicity. |
| * |
| *===----------------------------------------------------------------------===*/ |
| |
| #include "../assembly.h" |
| |
| #define ESTABLISH_FRAME \ |
| push {r7, lr} ;\ |
| mov r7, sp |
| #define CLEAR_FRAME_AND_RETURN \ |
| pop {r7, pc} |
| |
| #define a r0 |
| #define b r1 |
| #define r r2 |
| #define i r3 |
| #define q ip |
| #define one lr |
| |
| .syntax unified |
| .align 3 |
| // Ok, APCS and AAPCS agree on 32 bit args, so it's safe to use the same routine. |
| DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3) |
| DEFINE_COMPILERRT_FUNCTION(__udivsi3) |
| #if __ARM_ARCH_EXT_IDIV__ |
| tst r1,r1 |
| beq LOCAL_LABEL(divzero) |
| udiv r0, r0, r1 |
| bx lr |
| LOCAL_LABEL(divzero): |
| mov r0,#0 |
| bx lr |
| #else |
| // We use a simple digit by digit algorithm; before we get into the actual |
| // divide loop, we must calculate the left-shift amount necessary to align |
| // the MSB of the divisor with that of the dividend (If this shift is |
| // negative, then the result is zero, and we early out). We also conjure a |
| // bit mask of 1 to use in constructing the quotient, and initialize the |
| // quotient to zero. |
| ESTABLISH_FRAME |
| clz r2, a |
| tst b, b // detect divide-by-zero |
| clz r3, b |
| mov q, #0 |
| beq LOCAL_LABEL(return) // return 0 if b is zero. |
| mov one, #1 |
| subs i, r3, r2 |
| blt LOCAL_LABEL(return) // return 0 if MSB(a) < MSB(b) |
| |
| LOCAL_LABEL(mainLoop): |
| // This loop basically implements the following: |
| // |
| // do { |
| // if (a >= b << i) { |
| // a -= b << i; |
| // q |= 1 << i; |
| // if (a == 0) break; |
| // } |
| // } while (--i) |
| // |
| // Note that this does not perform the final iteration (i == 0); by doing it |
| // this way, we can merge the two branches which is a substantial win for |
| // such a tight loop on current ARM architectures. |
| subs r, a, b, lsl i |
| orrhs q, q,one, lsl i |
| movhs a, r |
| subsne i, i, #1 |
| bhi LOCAL_LABEL(mainLoop) |
| |
| // Do the final test subtraction and update of quotient (i == 0), as it is |
| // not performed in the main loop. |
| subs r, a, b |
| orrhs q, #1 |
| |
| LOCAL_LABEL(return): |
| // Move the quotient to r0 and return. |
| mov r0, q |
| CLEAR_FRAME_AND_RETURN |
| #endif |