blob: 614812c69e27a0a199d80e4a01d0665d64826773 [file] [log] [blame]
Stefan Krah1919b7e2012-03-21 18:25:23 +01001/*
2 * Copyright (c) 2008-2012 Stefan Krah. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28
29#ifndef TYPEARITH_H
30#define TYPEARITH_H
31
32
33#include "mpdecimal.h"
34
35
36/*****************************************************************************/
37/* Low level native arithmetic on basic types */
38/*****************************************************************************/
39
40
41/** ------------------------------------------------------------
42 ** Double width multiplication and division
43 ** ------------------------------------------------------------
44 */
45
46#if defined(CONFIG_64)
47#if defined(ANSI)
48#if defined(HAVE_UINT128_T)
49static inline void
50_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
51{
52 __uint128_t hl;
53
54 hl = (__uint128_t)a * b;
55
56 *hi = hl >> 64;
57 *lo = (mpd_uint_t)hl;
58}
59
60static inline void
61_mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
62 mpd_uint_t d)
63{
64 __uint128_t hl;
65
66 hl = ((__uint128_t)hi<<64) + lo;
67 *q = (mpd_uint_t)(hl / d); /* quotient is known to fit */
68 *r = (mpd_uint_t)(hl - (__uint128_t)(*q) * d);
69}
70#else
71static inline void
72_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
73{
74 uint32_t w[4], carry;
75 uint32_t ah, al, bh, bl;
76 uint64_t hl;
77
78 ah = (uint32_t)(a>>32); al = (uint32_t)a;
79 bh = (uint32_t)(b>>32); bl = (uint32_t)b;
80
81 hl = (uint64_t)al * bl;
82 w[0] = (uint32_t)hl;
83 carry = (uint32_t)(hl>>32);
84
85 hl = (uint64_t)ah * bl + carry;
86 w[1] = (uint32_t)hl;
87 w[2] = (uint32_t)(hl>>32);
88
89 hl = (uint64_t)al * bh + w[1];
90 w[1] = (uint32_t)hl;
91 carry = (uint32_t)(hl>>32);
92
93 hl = ((uint64_t)ah * bh + w[2]) + carry;
94 w[2] = (uint32_t)hl;
95 w[3] = (uint32_t)(hl>>32);
96
97 *hi = ((uint64_t)w[3]<<32) + w[2];
98 *lo = ((uint64_t)w[1]<<32) + w[0];
99}
100
101/*
102 * By Henry S. Warren: http://www.hackersdelight.org/HDcode/divlu.c.txt
103 * http://www.hackersdelight.org/permissions.htm:
104 * "You are free to use, copy, and distribute any of the code on this web
105 * site, whether modified by you or not. You need not give attribution."
106 *
107 * Slightly modified, comments are mine.
108 */
109static inline int
110nlz(uint64_t x)
111{
112 int n;
113
114 if (x == 0) return(64);
115
116 n = 0;
117 if (x <= 0x00000000FFFFFFFF) {n = n +32; x = x <<32;}
118 if (x <= 0x0000FFFFFFFFFFFF) {n = n +16; x = x <<16;}
119 if (x <= 0x00FFFFFFFFFFFFFF) {n = n + 8; x = x << 8;}
120 if (x <= 0x0FFFFFFFFFFFFFFF) {n = n + 4; x = x << 4;}
121 if (x <= 0x3FFFFFFFFFFFFFFF) {n = n + 2; x = x << 2;}
122 if (x <= 0x7FFFFFFFFFFFFFFF) {n = n + 1;}
123
124 return n;
125}
126
127static inline void
128_mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t u1, mpd_uint_t u0,
129 mpd_uint_t v)
130{
131 const mpd_uint_t b = 4294967296;
132 mpd_uint_t un1, un0,
133 vn1, vn0,
134 q1, q0,
135 un32, un21, un10,
136 rhat, t;
137 int s;
138
139 assert(u1 < v);
140
141 s = nlz(v);
142 v = v << s;
143 vn1 = v >> 32;
144 vn0 = v & 0xFFFFFFFF;
145
146 t = (s == 0) ? 0 : u0 >> (64 - s);
147 un32 = (u1 << s) | t;
148 un10 = u0 << s;
149
150 un1 = un10 >> 32;
151 un0 = un10 & 0xFFFFFFFF;
152
153 q1 = un32 / vn1;
154 rhat = un32 - q1*vn1;
155again1:
156 if (q1 >= b || q1*vn0 > b*rhat + un1) {
157 q1 = q1 - 1;
158 rhat = rhat + vn1;
159 if (rhat < b) goto again1;
160 }
161
162 /*
163 * Before again1 we had:
164 * (1) q1*vn1 + rhat = un32
165 * (2) q1*vn1*b + rhat*b + un1 = un32*b + un1
166 *
167 * The statements inside the if-clause do not change the value
168 * of the left-hand side of (2), and the loop is only exited
169 * if q1*vn0 <= rhat*b + un1, so:
170 *
171 * (3) q1*vn1*b + q1*vn0 <= un32*b + un1
172 * (4) q1*v <= un32*b + un1
173 * (5) 0 <= un32*b + un1 - q1*v
174 *
175 * By (5) we are certain that the possible add-back step from
176 * Knuth's algorithm D is never required.
177 *
178 * Since the final quotient is less than 2**64, the following
179 * must be true:
180 *
181 * (6) un32*b + un1 - q1*v <= UINT64_MAX
182 *
183 * This means that in the following line, the high words
184 * of un32*b and q1*v can be discarded without any effect
185 * on the result.
186 */
187 un21 = un32*b + un1 - q1*v;
188
189 q0 = un21 / vn1;
190 rhat = un21 - q0*vn1;
191again2:
192 if (q0 >= b || q0*vn0 > b*rhat + un0) {
193 q0 = q0 - 1;
194 rhat = rhat + vn1;
195 if (rhat < b) goto again2;
196 }
197
198 *q = q1*b + q0;
199 *r = (un21*b + un0 - q0*v) >> s;
200}
201#endif
202
203/* END ANSI */
204#elif defined(ASM)
205static inline void
206_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
207{
208 mpd_uint_t h, l;
209
Stefan Kraha0346e52012-09-30 17:31:04 +0200210 __asm__ ( "mulq %3\n\t"
211 : "=d" (h), "=a" (l)
212 : "%a" (a), "rm" (b)
213 : "cc"
Stefan Krah1919b7e2012-03-21 18:25:23 +0100214 );
215
216 *hi = h;
217 *lo = l;
218}
219
220static inline void
221_mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
222 mpd_uint_t d)
223{
224 mpd_uint_t qq, rr;
225
Stefan Kraha0346e52012-09-30 17:31:04 +0200226 __asm__ ( "divq %4\n\t"
227 : "=a" (qq), "=d" (rr)
228 : "a" (lo), "d" (hi), "rm" (d)
229 : "cc"
Stefan Krah1919b7e2012-03-21 18:25:23 +0100230 );
231
232 *q = qq;
233 *r = rr;
234}
235/* END GCC ASM */
236#elif defined(MASM)
237#include <intrin.h>
238#pragma intrinsic(_umul128)
239
240static inline void
241_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
242{
243 *lo = _umul128(a, b, hi);
244}
245
246void _mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
247 mpd_uint_t d);
248
249/* END MASM (_MSC_VER) */
250#else
251 #error "need platform specific 128 bit multiplication and division"
252#endif
253
254#define DIVMOD(q, r, v, d) *q = v / d; *r = v - *q * d
255static inline void
256_mpd_divmod_pow10(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t v, mpd_uint_t exp)
257{
258 assert(exp <= 19);
259
260 if (exp <= 9) {
261 if (exp <= 4) {
262 switch (exp) {
263 case 0: *q = v; *r = 0; break;
264 case 1: DIVMOD(q, r, v, 10UL); break;
265 case 2: DIVMOD(q, r, v, 100UL); break;
266 case 3: DIVMOD(q, r, v, 1000UL); break;
267 case 4: DIVMOD(q, r, v, 10000UL); break;
268 }
269 }
270 else {
271 switch (exp) {
272 case 5: DIVMOD(q, r, v, 100000UL); break;
273 case 6: DIVMOD(q, r, v, 1000000UL); break;
274 case 7: DIVMOD(q, r, v, 10000000UL); break;
275 case 8: DIVMOD(q, r, v, 100000000UL); break;
276 case 9: DIVMOD(q, r, v, 1000000000UL); break;
277 }
278 }
279 }
280 else {
281 if (exp <= 14) {
282 switch (exp) {
283 case 10: DIVMOD(q, r, v, 10000000000ULL); break;
284 case 11: DIVMOD(q, r, v, 100000000000ULL); break;
285 case 12: DIVMOD(q, r, v, 1000000000000ULL); break;
286 case 13: DIVMOD(q, r, v, 10000000000000ULL); break;
287 case 14: DIVMOD(q, r, v, 100000000000000ULL); break;
288 }
289 }
290 else {
291 switch (exp) {
292 case 15: DIVMOD(q, r, v, 1000000000000000ULL); break;
293 case 16: DIVMOD(q, r, v, 10000000000000000ULL); break;
294 case 17: DIVMOD(q, r, v, 100000000000000000ULL); break;
295 case 18: DIVMOD(q, r, v, 1000000000000000000ULL); break;
296 case 19: DIVMOD(q, r, v, 10000000000000000000ULL); break; /* GCOV_NOT_REACHED */
297 }
298 }
299 }
300}
301
302/* END CONFIG_64 */
303#elif defined(CONFIG_32)
304#if defined(ANSI)
305#if !defined(LEGACY_COMPILER)
306static inline void
307_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
308{
309 mpd_uuint_t hl;
310
311 hl = (mpd_uuint_t)a * b;
312
313 *hi = hl >> 32;
314 *lo = (mpd_uint_t)hl;
315}
316
317static inline void
318_mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
319 mpd_uint_t d)
320{
321 mpd_uuint_t hl;
322
323 hl = ((mpd_uuint_t)hi<<32) + lo;
324 *q = (mpd_uint_t)(hl / d); /* quotient is known to fit */
325 *r = (mpd_uint_t)(hl - (mpd_uuint_t)(*q) * d);
326}
327/* END ANSI + uint64_t */
328#else
329static inline void
330_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
331{
332 uint16_t w[4], carry;
333 uint16_t ah, al, bh, bl;
334 uint32_t hl;
335
336 ah = (uint16_t)(a>>16); al = (uint16_t)a;
337 bh = (uint16_t)(b>>16); bl = (uint16_t)b;
338
339 hl = (uint32_t)al * bl;
340 w[0] = (uint16_t)hl;
341 carry = (uint16_t)(hl>>16);
342
343 hl = (uint32_t)ah * bl + carry;
344 w[1] = (uint16_t)hl;
345 w[2] = (uint16_t)(hl>>16);
346
347 hl = (uint32_t)al * bh + w[1];
348 w[1] = (uint16_t)hl;
349 carry = (uint16_t)(hl>>16);
350
351 hl = ((uint32_t)ah * bh + w[2]) + carry;
352 w[2] = (uint16_t)hl;
353 w[3] = (uint16_t)(hl>>16);
354
355 *hi = ((uint32_t)w[3]<<16) + w[2];
356 *lo = ((uint32_t)w[1]<<16) + w[0];
357}
358
359/*
360 * By Henry S. Warren: http://www.hackersdelight.org/HDcode/divlu.c.txt
361 * http://www.hackersdelight.org/permissions.htm:
362 * "You are free to use, copy, and distribute any of the code on this web
363 * site, whether modified by you or not. You need not give attribution."
364 *
365 * Slightly modified, comments are mine.
366 */
367static inline int
368nlz(uint32_t x)
369{
370 int n;
371
372 if (x == 0) return(32);
373
374 n = 0;
375 if (x <= 0x0000FFFF) {n = n +16; x = x <<16;}
376 if (x <= 0x00FFFFFF) {n = n + 8; x = x << 8;}
377 if (x <= 0x0FFFFFFF) {n = n + 4; x = x << 4;}
378 if (x <= 0x3FFFFFFF) {n = n + 2; x = x << 2;}
379 if (x <= 0x7FFFFFFF) {n = n + 1;}
380
381 return n;
382}
383
384static inline void
385_mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t u1, mpd_uint_t u0,
386 mpd_uint_t v)
387{
388 const mpd_uint_t b = 65536;
389 mpd_uint_t un1, un0,
390 vn1, vn0,
391 q1, q0,
392 un32, un21, un10,
393 rhat, t;
394 int s;
395
396 assert(u1 < v);
397
398 s = nlz(v);
399 v = v << s;
400 vn1 = v >> 16;
401 vn0 = v & 0xFFFF;
402
403 t = (s == 0) ? 0 : u0 >> (32 - s);
404 un32 = (u1 << s) | t;
405 un10 = u0 << s;
406
407 un1 = un10 >> 16;
408 un0 = un10 & 0xFFFF;
409
410 q1 = un32 / vn1;
411 rhat = un32 - q1*vn1;
412again1:
413 if (q1 >= b || q1*vn0 > b*rhat + un1) {
414 q1 = q1 - 1;
415 rhat = rhat + vn1;
416 if (rhat < b) goto again1;
417 }
418
419 /*
420 * Before again1 we had:
421 * (1) q1*vn1 + rhat = un32
422 * (2) q1*vn1*b + rhat*b + un1 = un32*b + un1
423 *
424 * The statements inside the if-clause do not change the value
425 * of the left-hand side of (2), and the loop is only exited
426 * if q1*vn0 <= rhat*b + un1, so:
427 *
428 * (3) q1*vn1*b + q1*vn0 <= un32*b + un1
429 * (4) q1*v <= un32*b + un1
430 * (5) 0 <= un32*b + un1 - q1*v
431 *
432 * By (5) we are certain that the possible add-back step from
433 * Knuth's algorithm D is never required.
434 *
435 * Since the final quotient is less than 2**32, the following
436 * must be true:
437 *
438 * (6) un32*b + un1 - q1*v <= UINT32_MAX
439 *
440 * This means that in the following line, the high words
441 * of un32*b and q1*v can be discarded without any effect
442 * on the result.
443 */
444 un21 = un32*b + un1 - q1*v;
445
446 q0 = un21 / vn1;
447 rhat = un21 - q0*vn1;
448again2:
449 if (q0 >= b || q0*vn0 > b*rhat + un0) {
450 q0 = q0 - 1;
451 rhat = rhat + vn1;
452 if (rhat < b) goto again2;
453 }
454
455 *q = q1*b + q0;
456 *r = (un21*b + un0 - q0*v) >> s;
457}
458#endif /* END ANSI + LEGACY_COMPILER */
459
460/* END ANSI */
461#elif defined(ASM)
462static inline void
463_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
464{
465 mpd_uint_t h, l;
466
Stefan Kraha0346e52012-09-30 17:31:04 +0200467 __asm__ ( "mull %3\n\t"
468 : "=d" (h), "=a" (l)
469 : "%a" (a), "rm" (b)
470 : "cc"
Stefan Krah1919b7e2012-03-21 18:25:23 +0100471 );
472
473 *hi = h;
474 *lo = l;
475}
476
477static inline void
478_mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
479 mpd_uint_t d)
480{
481 mpd_uint_t qq, rr;
482
Stefan Kraha0346e52012-09-30 17:31:04 +0200483 __asm__ ( "divl %4\n\t"
484 : "=a" (qq), "=d" (rr)
485 : "a" (lo), "d" (hi), "rm" (d)
486 : "cc"
Stefan Krah1919b7e2012-03-21 18:25:23 +0100487 );
488
489 *q = qq;
490 *r = rr;
491}
492/* END GCC ASM */
493#elif defined(MASM)
494static inline void __cdecl
495_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
496{
497 mpd_uint_t h, l;
498
499 __asm {
500 mov eax, a
501 mul b
502 mov h, edx
503 mov l, eax
504 }
505
506 *hi = h;
507 *lo = l;
508}
509
510static inline void __cdecl
511_mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
512 mpd_uint_t d)
513{
514 mpd_uint_t qq, rr;
515
516 __asm {
517 mov eax, lo
518 mov edx, hi
519 div d
520 mov qq, eax
521 mov rr, edx
522 }
523
524 *q = qq;
525 *r = rr;
526}
527/* END MASM (_MSC_VER) */
528#else
529 #error "need platform specific 64 bit multiplication and division"
530#endif
531
532#define DIVMOD(q, r, v, d) *q = v / d; *r = v - *q * d
533static inline void
534_mpd_divmod_pow10(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t v, mpd_uint_t exp)
535{
536 assert(exp <= 9);
537
538 if (exp <= 4) {
539 switch (exp) {
540 case 0: *q = v; *r = 0; break;
541 case 1: DIVMOD(q, r, v, 10UL); break;
542 case 2: DIVMOD(q, r, v, 100UL); break;
543 case 3: DIVMOD(q, r, v, 1000UL); break;
544 case 4: DIVMOD(q, r, v, 10000UL); break;
545 }
546 }
547 else {
548 switch (exp) {
549 case 5: DIVMOD(q, r, v, 100000UL); break;
550 case 6: DIVMOD(q, r, v, 1000000UL); break;
551 case 7: DIVMOD(q, r, v, 10000000UL); break;
552 case 8: DIVMOD(q, r, v, 100000000UL); break;
553 case 9: DIVMOD(q, r, v, 1000000000UL); break; /* GCOV_NOT_REACHED */
554 }
555 }
556}
557/* END CONFIG_32 */
558
559/* NO CONFIG */
560#else
561 #error "define CONFIG_64 or CONFIG_32"
562#endif /* CONFIG */
563
564
565static inline void
566_mpd_div_word(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t v, mpd_uint_t d)
567{
568 *q = v / d;
569 *r = v - *q * d;
570}
571
572static inline void
573_mpd_idiv_word(mpd_ssize_t *q, mpd_ssize_t *r, mpd_ssize_t v, mpd_ssize_t d)
574{
575 *q = v / d;
576 *r = v - *q * d;
577}
578
579
580/** ------------------------------------------------------------
581 ** Arithmetic with overflow checking
582 ** ------------------------------------------------------------
583 */
584
585/* The following macros do call exit() in case of an overflow.
586 If the library is used correctly (i.e. with valid context
587 parameters), such overflows cannot occur. The macros are used
588 as sanity checks in a couple of strategic places and should
589 be viewed as a handwritten version of gcc's -ftrapv option. */
590
591static inline mpd_size_t
592add_size_t(mpd_size_t a, mpd_size_t b)
593{
594 if (a > MPD_SIZE_MAX - b) {
595 mpd_err_fatal("add_size_t(): overflow: check the context"); /* GCOV_NOT_REACHED */
596 }
597 return a + b;
598}
599
600static inline mpd_size_t
601sub_size_t(mpd_size_t a, mpd_size_t b)
602{
603 if (b > a) {
604 mpd_err_fatal("sub_size_t(): overflow: check the context"); /* GCOV_NOT_REACHED */
605 }
606 return a - b;
607}
608
609#if MPD_SIZE_MAX != MPD_UINT_MAX
610 #error "adapt mul_size_t() and mulmod_size_t()"
611#endif
612
613static inline mpd_size_t
614mul_size_t(mpd_size_t a, mpd_size_t b)
615{
616 mpd_uint_t hi, lo;
617
618 _mpd_mul_words(&hi, &lo, (mpd_uint_t)a, (mpd_uint_t)b);
619 if (hi) {
620 mpd_err_fatal("mul_size_t(): overflow: check the context"); /* GCOV_NOT_REACHED */
621 }
622 return lo;
623}
624
625static inline mpd_size_t
626add_size_t_overflow(mpd_size_t a, mpd_size_t b, mpd_size_t *overflow)
627{
628 mpd_size_t ret;
629
630 *overflow = 0;
631 ret = a + b;
632 if (ret < a) *overflow = 1;
633 return ret;
634}
635
636static inline mpd_size_t
637mul_size_t_overflow(mpd_size_t a, mpd_size_t b, mpd_size_t *overflow)
638{
639 mpd_uint_t lo;
640
641 _mpd_mul_words((mpd_uint_t *)overflow, &lo, (mpd_uint_t)a,
642 (mpd_uint_t)b);
643 return lo;
644}
645
646static inline mpd_ssize_t
647mod_mpd_ssize_t(mpd_ssize_t a, mpd_ssize_t m)
648{
649 mpd_ssize_t r = a % m;
650 return (r < 0) ? r + m : r;
651}
652
653static inline mpd_size_t
654mulmod_size_t(mpd_size_t a, mpd_size_t b, mpd_size_t m)
655{
656 mpd_uint_t hi, lo;
657 mpd_uint_t q, r;
658
659 _mpd_mul_words(&hi, &lo, (mpd_uint_t)a, (mpd_uint_t)b);
660 _mpd_div_words(&q, &r, hi, lo, (mpd_uint_t)m);
661
662 return r;
663}
664
665
666#endif /* TYPEARITH_H */
667
668
669