Stephen Canon | 5abb5c1 | 2011-03-18 16:35:02 +0000 | [diff] [blame^] | 1 | /*===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===// |
| 2 | * |
| 3 | * The LLVM Compiler Infrastructure |
| 4 | * |
| 5 | * This file is dual licensed under the MIT and the University of Illinois Open |
| 6 | * Source Licenses. See LICENSE.TXT for details. |
| 7 | * |
| 8 | *===----------------------------------------------------------------------===// |
| 9 | * |
| 10 | * This file implements the __udivsi3 (32-bit unsigned integer divide) |
| 11 | * function for the ARM architecture. A naive digit-by-digit computation is |
| 12 | * employed for simplicity. |
| 13 | * |
| 14 | *===----------------------------------------------------------------------===*/ |
| 15 | |
| 16 | #include "../assembly.h" |
| 17 | |
| 18 | #define ESTABLISH_FRAME \ |
| 19 | push {r7, lr} ;\ |
| 20 | mov r7, sp |
| 21 | #define CLEAR_FRAME_AND_RETURN \ |
| 22 | pop {r7, pc} |
| 23 | |
| 24 | #define a r0 |
| 25 | #define b r1 |
| 26 | #define r r2 |
| 27 | #define i r3 |
| 28 | #define q ip |
| 29 | #define one lr |
| 30 | |
| 31 | .syntax unified |
| 32 | .align 3 |
| 33 | DEFINE_COMPILERRT_FUNCTION(__udivsi3) |
| 34 | // We use a simple digit by digit algorithm; before we get into the actual |
| 35 | // divide loop, we must calculate the left-shift amount necessary to align |
| 36 | // the MSB of the divisor with that of the dividend (If this shift is |
| 37 | // negative, then the result is zero, and we early out). We also conjure a |
| 38 | // bit mask of 1 to use in constructing the quotient, and initialize the |
| 39 | // quotient to zero. |
| 40 | ESTABLISH_FRAME |
| 41 | clz r2, a |
| 42 | tst b, b // detect divide-by-zero |
| 43 | clz r3, b |
| 44 | mov q, #0 |
| 45 | beq L_return // return 0 if b is zero. |
| 46 | mov one, #1 |
| 47 | subs i, r3, r2 |
| 48 | blt L_return // return 0 if MSB(a) < MSB(b) |
| 49 | |
| 50 | L_mainLoop: |
| 51 | // This loop basically implements the following: |
| 52 | // |
| 53 | // do { |
| 54 | // if (a >= b << i) { |
| 55 | // a -= b << i; |
| 56 | // q |= 1 << i; |
| 57 | // if (a == 0) break; |
| 58 | // } |
| 59 | // } while (--i) |
| 60 | // |
| 61 | // Note that this does not perform the final iteration (i == 0); by doing it |
| 62 | // this way, we can merge the two branches which is a substantial win for |
| 63 | // such a tight loop on current ARM architectures. |
| 64 | subs r, a, b, lsl i |
| 65 | orrhs q, q,one, lsl i |
| 66 | movhs a, r |
| 67 | subsne i, i, #1 |
| 68 | bhi L_mainLoop |
| 69 | |
| 70 | // Do the final test subtraction and update of quotient (i == 0), as it is |
| 71 | // not performed in the main loop. |
| 72 | subs r, a, b |
| 73 | orrhs q, #1 |
| 74 | |
| 75 | L_return: |
| 76 | // Move the quotient to r0 and return. |
| 77 | mov r0, q |
| 78 | CLEAR_FRAME_AND_RETURN |