blob: 79cf6f2fc67eb38156603917eb6fb614288dcee8 [file] [log] [blame]
Daniel Dunbarb3a69012009-06-26 16:47:03 +00001//===-- udivmodti4.c - Implement __udivmodti4 -----------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements __udivmodti4 for the compiler_rt library.
11//
12//===----------------------------------------------------------------------===//
13
14#if __x86_64
15
16#include "int_lib.h"
17
18// Effects: if rem != 0, *rem = a % b
19// Returns: a / b
20
21// Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide
22
23tu_int
24__udivmodti4(tu_int a, tu_int b, tu_int* rem)
25{
26 const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
27 const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT;
28 utwords n;
29 n.all = a;
30 utwords d;
31 d.all = b;
32 utwords q;
33 utwords r;
34 unsigned sr;
35 // special cases, X is unknown, K != 0
36 if (n.high == 0)
37 {
38 if (d.high == 0)
39 {
40 // 0 X
41 // ---
42 // 0 X
43 if (rem)
44 *rem = n.low % d.low;
45 return n.low / d.low;
46 }
47 // 0 X
48 // ---
49 // K X
50 if (rem)
51 *rem = n.low;
52 return 0;
53 }
54 // n.high != 0
55 if (d.low == 0)
56 {
57 if (d.high == 0)
58 {
59 // K X
60 // ---
61 // 0 0
62 if (rem)
63 *rem = n.high % d.low;
64 return n.high / d.low;
65 }
66 // d.high != 0
67 if (n.low == 0)
68 {
69 // K 0
70 // ---
71 // K 0
72 if (rem)
73 {
74 r.high = n.high % d.high;
75 r.low = 0;
76 *rem = r.all;
77 }
78 return n.high / d.high;
79 }
80 // K K
81 // ---
82 // K 0
83 if ((d.high & (d.high - 1)) == 0) // if d is a power of 2
84 {
85 if (rem)
86 {
87 r.low = n.low;
88 r.high = n.high & (d.high - 1);
89 *rem = r.all;
90 }
91 return n.high >> __builtin_ctzll(d.high);
92 }
93 // K K
94 // ---
95 // K 0
96 sr = __builtin_clzll(d.high) - __builtin_clzll(n.high);
97 // 0 <= sr <= n_udword_bits - 2 or sr large
98 if (sr > n_udword_bits - 2)
99 {
100 if (rem)
101 *rem = n.all;
102 return 0;
103 }
104 ++sr;
105 // 1 <= sr <= n_udword_bits - 1
106 // q.all = n.all << (n_utword_bits - sr);
107 q.low = 0;
108 q.high = n.low << (n_udword_bits - sr);
109 // r.all = n.all >> sr;
110 r.high = n.high >> sr;
111 r.low = (n.high << (n_udword_bits - sr)) | (n.low >> sr);
112 }
113 else // d.low != 0
114 {
115 if (d.high == 0)
116 {
117 // K X
118 // ---
119 // 0 K
120 if ((d.low & (d.low - 1)) == 0) // if d is a power of 2
121 {
122 if (rem)
123 *rem = n.low & (d.low - 1);
124 if (d.low == 1)
125 return n.all;
126 unsigned sr = __builtin_ctzll(d.low);
127 q.high = n.high >> sr;
128 q.low = (n.high << (n_udword_bits - sr)) | (n.low >> sr);
129 return q.all;
130 }
131 // K X
132 // ---
133 // 0 K
134 sr = 1 + n_udword_bits + __builtin_clzll(d.low)
135 - __builtin_clzll(n.high);
136 // 2 <= sr <= n_utword_bits - 1
137 // q.all = n.all << (n_utword_bits - sr);
138 // r.all = n.all >> sr;
139 // if (sr == n_udword_bits)
140 // {
141 // q.low = 0;
142 // q.high = n.low;
143 // r.high = 0;
144 // r.low = n.high;
145 // }
146 // else if (sr < n_udword_bits) // 2 <= sr <= n_udword_bits - 1
147 // {
148 // q.low = 0;
149 // q.high = n.low << (n_udword_bits - sr);
150 // r.high = n.high >> sr;
151 // r.low = (n.high << (n_udword_bits - sr)) | (n.low >> sr);
152 // }
153 // else // n_udword_bits + 1 <= sr <= n_utword_bits - 1
154 // {
155 // q.low = n.low << (n_utword_bits - sr);
156 // q.high = (n.high << (n_utword_bits - sr)) |
157 // (n.low >> (sr - n_udword_bits));
158 // r.high = 0;
159 // r.low = n.high >> (sr - n_udword_bits);
160 // }
161 q.low = (n.low << (n_utword_bits - sr)) &
162 ((di_int)(int)(n_udword_bits - sr) >> (n_udword_bits-1));
163 q.high = ((n.low << ( n_udword_bits - sr)) &
164 ((di_int)(int)(sr - n_udword_bits - 1) >> (n_udword_bits-1))) |
165 (((n.high << (n_utword_bits - sr)) |
166 (n.low >> (sr - n_udword_bits))) &
167 ((di_int)(int)(n_udword_bits - sr) >> (n_udword_bits-1)));
168 r.high = (n.high >> sr) &
169 ((di_int)(int)(sr - n_udword_bits) >> (n_udword_bits-1));
170 r.low = ((n.high >> (sr - n_udword_bits)) &
171 ((di_int)(int)(n_udword_bits - sr - 1) >> (n_udword_bits-1))) |
172 (((n.high << (n_udword_bits - sr)) |
173 (n.low >> sr)) &
174 ((di_int)(int)(sr - n_udword_bits) >> (n_udword_bits-1)));
175 }
176 else
177 {
178 // K X
179 // ---
180 // K K
181 sr = __builtin_clzll(d.high) - __builtin_clzll(n.high);
182 // 0 <= sr <= n_udword_bits - 1 or sr large
183 if (sr > n_udword_bits - 1)
184 {
185 if (rem)
186 *rem = n.all;
187 return 0;
188 }
189 ++sr;
190 // 1 <= sr <= n_udword_bits
191 // q.all = n.all << (n_utword_bits - sr);
192 q.low = 0;
193 q.high = n.low << (n_udword_bits - sr);
194 // r.all = n.all >> sr;
195 // if (sr < n_udword_bits)
196 // {
197 // r.high = n.high >> sr;
198 // r.low = (n.high << (n_udword_bits - sr)) | (n.low >> sr);
199 // }
200 // else
201 // {
202 // r.high = 0;
203 // r.low = n.high;
204 // }
205 r.high = (n.high >> sr) &
206 ((di_int)(int)(sr - n_udword_bits) >> (n_udword_bits-1));
207 r.low = (n.high << (n_udword_bits - sr)) |
208 ((n.low >> sr) &
209 ((di_int)(int)(sr - n_udword_bits) >> (n_udword_bits-1)));
210 }
211 }
212 // Not a special case
213 // q and r are initialized with:
214 // q.all = n.all << (n_utword_bits - sr);
215 // r.all = n.all >> sr;
216 // 1 <= sr <= n_utword_bits - 1
217 su_int carry = 0;
218 for (; sr > 0; --sr)
219 {
220 // r:q = ((r:q) << 1) | carry
221 r.high = (r.high << 1) | (r.low >> (n_udword_bits - 1));
222 r.low = (r.low << 1) | (q.high >> (n_udword_bits - 1));
223 q.high = (q.high << 1) | (q.low >> (n_udword_bits - 1));
224 q.low = (q.low << 1) | carry;
225 // carry = 0;
226 // if (r.all >= d.all)
227 // {
228 // r.all -= d.all;
229 // carry = 1;
230 // }
231 const ti_int s = (ti_int)(d.all - r.all - 1) >> (n_utword_bits - 1);
232 carry = s & 1;
233 r.all -= d.all & s;
234 }
235 q.all = (q.all << 1) | carry;
236 if (rem)
237 *rem = r.all;
238 return q.all;
239}
240
241#endif