| Daniel Dunbar | b3a6901 | 2009-06-26 16:47:03 +0000 | [diff] [blame^] | 1 | //===-- udivmodti4.c - Implement __udivmodti4 -----------------------------===// | 
 | 2 | // | 
 | 3 | //                     The LLVM Compiler Infrastructure | 
 | 4 | // | 
 | 5 | // This file is distributed under the University of Illinois Open Source | 
 | 6 | // License. See LICENSE.TXT for details. | 
 | 7 | // | 
 | 8 | //===----------------------------------------------------------------------===// | 
 | 9 | // | 
 | 10 | // This file implements __udivmodti4 for the compiler_rt library. | 
 | 11 | // | 
 | 12 | //===----------------------------------------------------------------------===// | 
 | 13 |  | 
 | 14 | #if __x86_64 | 
 | 15 |  | 
 | 16 | #include "int_lib.h" | 
 | 17 |  | 
 | 18 | // Effects: if rem != 0, *rem = a % b | 
 | 19 | // Returns: a / b | 
 | 20 |  | 
 | 21 | // Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide | 
 | 22 |  | 
 | 23 | tu_int | 
 | 24 | __udivmodti4(tu_int a, tu_int b, tu_int* rem) | 
 | 25 | { | 
 | 26 |     const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; | 
 | 27 |     const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT; | 
 | 28 |     utwords n; | 
 | 29 |     n.all = a; | 
 | 30 |     utwords d; | 
 | 31 |     d.all = b; | 
 | 32 |     utwords q; | 
 | 33 |     utwords r; | 
 | 34 |     unsigned sr; | 
 | 35 |     // special cases, X is unknown, K != 0 | 
 | 36 |     if (n.high == 0) | 
 | 37 |     { | 
 | 38 |         if (d.high == 0) | 
 | 39 |         { | 
 | 40 |             // 0 X | 
 | 41 |             // --- | 
 | 42 |             // 0 X | 
 | 43 |             if (rem) | 
 | 44 |                 *rem = n.low % d.low; | 
 | 45 |             return n.low / d.low; | 
 | 46 |         } | 
 | 47 |         // 0 X | 
 | 48 |         // --- | 
 | 49 |         // K X | 
 | 50 |         if (rem) | 
 | 51 |             *rem = n.low; | 
 | 52 |         return 0; | 
 | 53 |     } | 
 | 54 |     // n.high != 0 | 
 | 55 |     if (d.low == 0) | 
 | 56 |     { | 
 | 57 |         if (d.high == 0) | 
 | 58 |         { | 
 | 59 |             // K X | 
 | 60 |             // --- | 
 | 61 |             // 0 0 | 
 | 62 |             if (rem) | 
 | 63 |                 *rem = n.high % d.low; | 
 | 64 |             return n.high / d.low; | 
 | 65 |         } | 
 | 66 |         // d.high != 0 | 
 | 67 |         if (n.low == 0) | 
 | 68 |         { | 
 | 69 |             // K 0 | 
 | 70 |             // --- | 
 | 71 |             // K 0 | 
 | 72 |             if (rem) | 
 | 73 |             { | 
 | 74 |                 r.high = n.high % d.high; | 
 | 75 |                 r.low = 0; | 
 | 76 |                 *rem = r.all; | 
 | 77 |             } | 
 | 78 |             return n.high / d.high; | 
 | 79 |         } | 
 | 80 |         // K K | 
 | 81 |         // --- | 
 | 82 |         // K 0 | 
 | 83 |         if ((d.high & (d.high - 1)) == 0)     // if d is a power of 2 | 
 | 84 |         { | 
 | 85 |             if (rem) | 
 | 86 |             { | 
 | 87 |                 r.low = n.low; | 
 | 88 |                 r.high = n.high & (d.high - 1); | 
 | 89 |                 *rem = r.all; | 
 | 90 |             } | 
 | 91 |             return n.high >> __builtin_ctzll(d.high); | 
 | 92 |         } | 
 | 93 |         // K K | 
 | 94 |         // --- | 
 | 95 |         // K 0 | 
 | 96 |         sr = __builtin_clzll(d.high) - __builtin_clzll(n.high); | 
 | 97 |         // 0 <= sr <= n_udword_bits - 2 or sr large | 
 | 98 |         if (sr > n_udword_bits - 2) | 
 | 99 |         { | 
 | 100 |            if (rem) | 
 | 101 |                 *rem = n.all; | 
 | 102 |             return 0; | 
 | 103 |         } | 
 | 104 |         ++sr; | 
 | 105 |         // 1 <= sr <= n_udword_bits - 1 | 
 | 106 |         // q.all = n.all << (n_utword_bits - sr); | 
 | 107 |         q.low = 0; | 
 | 108 |         q.high = n.low << (n_udword_bits - sr); | 
 | 109 |         // r.all = n.all >> sr; | 
 | 110 |         r.high = n.high >> sr; | 
 | 111 |         r.low = (n.high << (n_udword_bits - sr)) | (n.low >> sr); | 
 | 112 |     } | 
 | 113 |     else  // d.low != 0 | 
 | 114 |     { | 
 | 115 |         if (d.high == 0) | 
 | 116 |         { | 
 | 117 |             // K X | 
 | 118 |             // --- | 
 | 119 |             // 0 K | 
 | 120 |             if ((d.low & (d.low - 1)) == 0)     // if d is a power of 2 | 
 | 121 |             { | 
 | 122 |                 if (rem) | 
 | 123 |                     *rem = n.low & (d.low - 1); | 
 | 124 |                 if (d.low == 1) | 
 | 125 |                     return n.all; | 
 | 126 |                 unsigned sr = __builtin_ctzll(d.low); | 
 | 127 |                 q.high = n.high >> sr; | 
 | 128 |                 q.low = (n.high << (n_udword_bits - sr)) | (n.low >> sr); | 
 | 129 |                 return q.all; | 
 | 130 |             } | 
 | 131 |             // K X | 
 | 132 |             // --- | 
 | 133 |             // 0 K | 
 | 134 |             sr = 1 + n_udword_bits + __builtin_clzll(d.low) | 
 | 135 |                                    - __builtin_clzll(n.high); | 
 | 136 |             // 2 <= sr <= n_utword_bits - 1 | 
 | 137 |             // q.all = n.all << (n_utword_bits - sr); | 
 | 138 |             // r.all = n.all >> sr; | 
 | 139 |             // if (sr == n_udword_bits) | 
 | 140 |             // { | 
 | 141 |             //     q.low = 0; | 
 | 142 |             //     q.high = n.low; | 
 | 143 |             //     r.high = 0; | 
 | 144 |             //     r.low = n.high; | 
 | 145 |             // } | 
 | 146 |             // else if (sr < n_udword_bits)  // 2 <= sr <= n_udword_bits - 1 | 
 | 147 |             // { | 
 | 148 |             //     q.low = 0; | 
 | 149 |             //     q.high = n.low << (n_udword_bits - sr); | 
 | 150 |             //     r.high = n.high >> sr; | 
 | 151 |             //     r.low = (n.high << (n_udword_bits - sr)) | (n.low >> sr); | 
 | 152 |             // } | 
 | 153 |             // else              // n_udword_bits + 1 <= sr <= n_utword_bits - 1 | 
 | 154 |             // { | 
 | 155 |             //     q.low = n.low << (n_utword_bits - sr); | 
 | 156 |             //     q.high = (n.high << (n_utword_bits - sr)) | | 
 | 157 |             //              (n.low >> (sr - n_udword_bits)); | 
 | 158 |             //     r.high = 0; | 
 | 159 |             //     r.low = n.high >> (sr - n_udword_bits); | 
 | 160 |             // } | 
 | 161 |             q.low =  (n.low << (n_utword_bits - sr)) & | 
 | 162 |                      ((di_int)(int)(n_udword_bits - sr) >> (n_udword_bits-1)); | 
 | 163 |             q.high = ((n.low << ( n_udword_bits - sr))                        & | 
 | 164 |                      ((di_int)(int)(sr - n_udword_bits - 1) >> (n_udword_bits-1))) | | 
 | 165 |                      (((n.high << (n_utword_bits - sr))                       | | 
 | 166 |                      (n.low >> (sr - n_udword_bits)))                         & | 
 | 167 |                      ((di_int)(int)(n_udword_bits - sr) >> (n_udword_bits-1))); | 
 | 168 |             r.high = (n.high >> sr) & | 
 | 169 |                      ((di_int)(int)(sr - n_udword_bits) >> (n_udword_bits-1)); | 
 | 170 |             r.low =  ((n.high >> (sr - n_udword_bits))                        & | 
 | 171 |                      ((di_int)(int)(n_udword_bits - sr - 1) >> (n_udword_bits-1))) | | 
 | 172 |                      (((n.high << (n_udword_bits - sr))                       | | 
 | 173 |                      (n.low >> sr))                                           & | 
 | 174 |                      ((di_int)(int)(sr - n_udword_bits) >> (n_udword_bits-1))); | 
 | 175 |         } | 
 | 176 |         else | 
 | 177 |         { | 
 | 178 |             // K X | 
 | 179 |             // --- | 
 | 180 |             // K K | 
 | 181 |             sr = __builtin_clzll(d.high) - __builtin_clzll(n.high); | 
 | 182 |             // 0 <= sr <= n_udword_bits - 1 or sr large | 
 | 183 |             if (sr > n_udword_bits - 1) | 
 | 184 |             { | 
 | 185 |                if (rem) | 
 | 186 |                     *rem = n.all; | 
 | 187 |                 return 0; | 
 | 188 |             } | 
 | 189 |             ++sr; | 
 | 190 |             // 1 <= sr <= n_udword_bits | 
 | 191 |             // q.all = n.all << (n_utword_bits - sr); | 
 | 192 |             q.low = 0; | 
 | 193 |             q.high = n.low << (n_udword_bits - sr); | 
 | 194 |             // r.all = n.all >> sr; | 
 | 195 |             // if (sr < n_udword_bits) | 
 | 196 |             // { | 
 | 197 |             //     r.high = n.high >> sr; | 
 | 198 |             //     r.low = (n.high << (n_udword_bits - sr)) | (n.low >> sr); | 
 | 199 |             // } | 
 | 200 |             // else | 
 | 201 |             // { | 
 | 202 |             //     r.high = 0; | 
 | 203 |             //     r.low = n.high; | 
 | 204 |             // } | 
 | 205 |             r.high = (n.high >> sr) & | 
 | 206 |                      ((di_int)(int)(sr - n_udword_bits) >> (n_udword_bits-1)); | 
 | 207 |             r.low = (n.high << (n_udword_bits - sr)) | | 
 | 208 |                     ((n.low >> sr)                   & | 
 | 209 |                     ((di_int)(int)(sr - n_udword_bits) >> (n_udword_bits-1))); | 
 | 210 |         } | 
 | 211 |     } | 
 | 212 |     // Not a special case | 
 | 213 |     // q and r are initialized with: | 
 | 214 |     // q.all = n.all << (n_utword_bits - sr); | 
 | 215 |     // r.all = n.all >> sr; | 
 | 216 |     // 1 <= sr <= n_utword_bits - 1 | 
 | 217 |     su_int carry = 0; | 
 | 218 |     for (; sr > 0; --sr) | 
 | 219 |     { | 
 | 220 |         // r:q = ((r:q)  << 1) | carry | 
 | 221 |         r.high = (r.high << 1) | (r.low  >> (n_udword_bits - 1)); | 
 | 222 |         r.low  = (r.low  << 1) | (q.high >> (n_udword_bits - 1)); | 
 | 223 |         q.high = (q.high << 1) | (q.low  >> (n_udword_bits - 1)); | 
 | 224 |         q.low  = (q.low  << 1) | carry; | 
 | 225 |         // carry = 0; | 
 | 226 |         // if (r.all >= d.all) | 
 | 227 |         // { | 
 | 228 |         //      r.all -= d.all; | 
 | 229 |         //      carry = 1; | 
 | 230 |         // } | 
 | 231 |         const ti_int s = (ti_int)(d.all - r.all - 1) >> (n_utword_bits - 1); | 
 | 232 |         carry = s & 1; | 
 | 233 |         r.all -= d.all & s; | 
 | 234 |     } | 
 | 235 |     q.all = (q.all << 1) | carry; | 
 | 236 |     if (rem) | 
 | 237 |         *rem = r.all; | 
 | 238 |     return q.all; | 
 | 239 | } | 
 | 240 |  | 
 | 241 | #endif |