blob: ad4d2aa2216fb6e97944dda453d20c67e9680eaf [file] [log] [blame]
Daniel Dunbarfd089992009-06-26 16:47:03 +00001//===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements __udivmoddi4 for the compiler_rt library.
11//
12//===----------------------------------------------------------------------===//
13
14#include "int_lib.h"
15
16// Effects: if rem != 0, *rem = a % b
17// Returns: a / b
18
19// Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide
20
21du_int
22__udivmoddi4(du_int a, du_int b, du_int* rem)
23{
24 const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT;
25 const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
26 udwords n;
27 n.all = a;
28 udwords d;
29 d.all = b;
30 udwords q;
31 udwords r;
32 unsigned sr;
33 // special cases, X is unknown, K != 0
34 if (n.high == 0)
35 {
36 if (d.high == 0)
37 {
38 // 0 X
39 // ---
40 // 0 X
41 if (rem)
42 *rem = n.low % d.low;
43 return n.low / d.low;
44 }
45 // 0 X
46 // ---
47 // K X
48 if (rem)
49 *rem = n.low;
50 return 0;
51 }
52 // n.high != 0
53 if (d.low == 0)
54 {
55 if (d.high == 0)
56 {
57 // K X
58 // ---
59 // 0 0
60 if (rem)
61 *rem = n.high % d.low;
62 return n.high / d.low;
63 }
64 // d.high != 0
65 if (n.low == 0)
66 {
67 // K 0
68 // ---
69 // K 0
70 if (rem)
71 {
72 r.high = n.high % d.high;
73 r.low = 0;
74 *rem = r.all;
75 }
76 return n.high / d.high;
77 }
78 // K K
79 // ---
80 // K 0
81 if ((d.high & (d.high - 1)) == 0) // if d is a power of 2
82 {
83 if (rem)
84 {
85 r.low = n.low;
86 r.high = n.high & (d.high - 1);
87 *rem = r.all;
88 }
89 return n.high >> __builtin_ctz(d.high);
90 }
91 // K K
92 // ---
93 // K 0
94 sr = __builtin_clz(d.high) - __builtin_clz(n.high);
95 // 0 <= sr <= n_uword_bits - 2 or sr large
96 if (sr > n_uword_bits - 2)
97 {
98 if (rem)
99 *rem = n.all;
100 return 0;
101 }
102 ++sr;
103 // 1 <= sr <= n_uword_bits - 1
104 // q.all = n.all << (n_udword_bits - sr);
105 q.low = 0;
106 q.high = n.low << (n_uword_bits - sr);
107 // r.all = n.all >> sr;
108 r.high = n.high >> sr;
109 r.low = (n.high << (n_uword_bits - sr)) | (n.low >> sr);
110 }
111 else // d.low != 0
112 {
113 if (d.high == 0)
114 {
115 // K X
116 // ---
117 // 0 K
118 if ((d.low & (d.low - 1)) == 0) // if d is a power of 2
119 {
120 if (rem)
121 *rem = n.low & (d.low - 1);
122 if (d.low == 1)
123 return n.all;
124 unsigned sr = __builtin_ctz(d.low);
125 q.high = n.high >> sr;
126 q.low = (n.high << (n_uword_bits - sr)) | (n.low >> sr);
127 return q.all;
128 }
129 // K X
130 // ---
131 // 0 K
132 sr = 1 + n_uword_bits + __builtin_clz(d.low) - __builtin_clz(n.high);
133 // 2 <= sr <= n_udword_bits - 1
134 // q.all = n.all << (n_udword_bits - sr);
135 // r.all = n.all >> sr;
136 // if (sr == n_uword_bits)
137 // {
138 // q.low = 0;
139 // q.high = n.low;
140 // r.high = 0;
141 // r.low = n.high;
142 // }
143 // else if (sr < n_uword_bits) // 2 <= sr <= n_uword_bits - 1
144 // {
145 // q.low = 0;
146 // q.high = n.low << (n_uword_bits - sr);
147 // r.high = n.high >> sr;
148 // r.low = (n.high << (n_uword_bits - sr)) | (n.low >> sr);
149 // }
150 // else // n_uword_bits + 1 <= sr <= n_udword_bits - 1
151 // {
152 // q.low = n.low << (n_udword_bits - sr);
153 // q.high = (n.high << (n_udword_bits - sr)) |
154 // (n.low >> (sr - n_uword_bits));
155 // r.high = 0;
156 // r.low = n.high >> (sr - n_uword_bits);
157 // }
158 q.low = (n.low << (n_udword_bits - sr)) &
159 ((si_int)(n_uword_bits - sr) >> (n_uword_bits-1));
160 q.high = ((n.low << ( n_uword_bits - sr)) &
161 ((si_int)(sr - n_uword_bits - 1) >> (n_uword_bits-1))) |
162 (((n.high << (n_udword_bits - sr)) |
163 (n.low >> (sr - n_uword_bits))) &
164 ((si_int)(n_uword_bits - sr) >> (n_uword_bits-1)));
165 r.high = (n.high >> sr) &
166 ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1));
167 r.low = ((n.high >> (sr - n_uword_bits)) &
168 ((si_int)(n_uword_bits - sr - 1) >> (n_uword_bits-1))) |
169 (((n.high << (n_uword_bits - sr)) |
170 (n.low >> sr)) &
171 ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1)));
172 }
173 else
174 {
175 // K X
176 // ---
177 // K K
178 sr = __builtin_clz(d.high) - __builtin_clz(n.high);
179 // 0 <= sr <= n_uword_bits - 1 or sr large
180 if (sr > n_uword_bits - 1)
181 {
182 if (rem)
183 *rem = n.all;
184 return 0;
185 }
186 ++sr;
187 // 1 <= sr <= n_uword_bits
188 // q.all = n.all << (n_udword_bits - sr);
189 q.low = 0;
190 q.high = n.low << (n_uword_bits - sr);
191 // r.all = n.all >> sr;
192 // if (sr < n_uword_bits)
193 // {
194 // r.high = n.high >> sr;
195 // r.low = (n.high << (n_uword_bits - sr)) | (n.low >> sr);
196 // }
197 // else
198 // {
199 // r.high = 0;
200 // r.low = n.high;
201 // }
202 r.high = (n.high >> sr) &
203 ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1));
204 r.low = (n.high << (n_uword_bits - sr)) |
205 ((n.low >> sr) &
206 ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1)));
207 }
208 }
209 // Not a special case
210 // q and r are initialized with:
211 // q.all = n.all << (n_udword_bits - sr);
212 // r.all = n.all >> sr;
213 // 1 <= sr <= n_udword_bits - 1
214 su_int carry = 0;
215 for (; sr > 0; --sr)
216 {
217 // r:q = ((r:q) << 1) | carry
218 r.high = (r.high << 1) | (r.low >> (n_uword_bits - 1));
219 r.low = (r.low << 1) | (q.high >> (n_uword_bits - 1));
220 q.high = (q.high << 1) | (q.low >> (n_uword_bits - 1));
221 q.low = (q.low << 1) | carry;
222 // carry = 0;
223 // if (r.all >= d.all)
224 // {
225 // r.all -= d.all;
226 // carry = 1;
227 // }
228 const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1);
229 carry = s & 1;
230 r.all -= d.all & s;
231 }
232 q.all = (q.all << 1) | carry;
233 if (rem)
234 *rem = r.all;
235 return q.all;
236}