Stephen Canon | 5abb5c1 | 2011-03-18 16:35:02 +0000 | [diff] [blame] | 1 | /*===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===// |
| 2 | * |
| 3 | * The LLVM Compiler Infrastructure |
| 4 | * |
| 5 | * This file is dual licensed under the MIT and the University of Illinois Open |
| 6 | * Source Licenses. See LICENSE.TXT for details. |
| 7 | * |
| 8 | *===----------------------------------------------------------------------===// |
| 9 | * |
| 10 | * This file implements the __udivmodsi4 (32-bit unsigned integer divide and |
| 11 | * modulus) function for the ARM architecture. A naive digit-by-digit |
| 12 | * computation is employed for simplicity. |
| 13 | * |
| 14 | *===----------------------------------------------------------------------===*/ |
| 15 | |
| 16 | #include "../assembly.h" |
| 17 | |
| 18 | #define ESTABLISH_FRAME \ |
| 19 | push {r4, r7, lr} ;\ |
| 20 | add r7, sp, #4 |
| 21 | #define CLEAR_FRAME_AND_RETURN \ |
| 22 | pop {r4, r7, pc} |
| 23 | |
| 24 | #define a r0 |
| 25 | #define b r1 |
| 26 | #define i r3 |
| 27 | #define r r4 |
| 28 | #define q ip |
| 29 | #define one lr |
| 30 | |
| 31 | .syntax unified |
| 32 | .align 3 |
| 33 | DEFINE_COMPILERRT_FUNCTION(__udivmodsi4) |
| 34 | // We use a simple digit by digit algorithm; before we get into the actual |
| 35 | // divide loop, we must calculate the left-shift amount necessary to align |
| 36 | // the MSB of the divisor with that of the dividend (If this shift is |
| 37 | // negative, then the result is zero, and we early out). We also conjure a |
| 38 | // bit mask of 1 to use in constructing the quotient, and initialize the |
| 39 | // quotient to zero. |
| 40 | ESTABLISH_FRAME |
| 41 | clz r4, a |
| 42 | tst b, b // detect divide-by-zero |
| 43 | clz r3, b |
| 44 | mov q, #0 |
Anton Korobeynikov | bdadd87 | 2011-04-19 17:50:42 +0000 | [diff] [blame] | 45 | beq LOCAL_LABEL(return) // return 0 if b is zero. |
Stephen Canon | 5abb5c1 | 2011-03-18 16:35:02 +0000 | [diff] [blame] | 46 | mov one, #1 |
| 47 | subs i, r3, r4 |
Anton Korobeynikov | bdadd87 | 2011-04-19 17:50:42 +0000 | [diff] [blame] | 48 | blt LOCAL_LABEL(return) // return 0 if MSB(a) < MSB(b) |
Stephen Canon | 5abb5c1 | 2011-03-18 16:35:02 +0000 | [diff] [blame] | 49 | |
Anton Korobeynikov | bdadd87 | 2011-04-19 17:50:42 +0000 | [diff] [blame] | 50 | LOCAL_LABEL(mainLoop): |
Stephen Canon | 5abb5c1 | 2011-03-18 16:35:02 +0000 | [diff] [blame] | 51 | // This loop basically implements the following: |
| 52 | // |
| 53 | // do { |
| 54 | // if (a >= b << i) { |
| 55 | // a -= b << i; |
| 56 | // q |= 1 << i; |
| 57 | // if (a == 0) break; |
| 58 | // } |
| 59 | // } while (--i) |
| 60 | // |
| 61 | // Note that this does not perform the final iteration (i == 0); by doing it |
| 62 | // this way, we can merge the two branches which is a substantial win for |
| 63 | // such a tight loop on current ARM architectures. |
| 64 | subs r, a, b, lsl i |
| 65 | orrhs q, q,one, lsl i |
| 66 | movhs a, r |
| 67 | subsne i, i, #1 |
Anton Korobeynikov | bdadd87 | 2011-04-19 17:50:42 +0000 | [diff] [blame] | 68 | bhi LOCAL_LABEL(mainLoop) |
Stephen Canon | 5abb5c1 | 2011-03-18 16:35:02 +0000 | [diff] [blame] | 69 | |
| 70 | // Do the final test subtraction and update of quotient (i == 0), as it is |
| 71 | // not performed in the main loop. |
| 72 | subs r, a, b |
| 73 | orrhs q, #1 |
| 74 | movhs a, r |
| 75 | |
Anton Korobeynikov | bdadd87 | 2011-04-19 17:50:42 +0000 | [diff] [blame] | 76 | LOCAL_LABEL(return): |
Stephen Canon | 5abb5c1 | 2011-03-18 16:35:02 +0000 | [diff] [blame] | 77 | // Store the remainder, and move the quotient to r0, then return. |
| 78 | str a, [r2] |
| 79 | mov r0, q |
| 80 | CLEAR_FRAME_AND_RETURN |