Blame - libm/upstream-freebsd/lib/msun/src/s_fmal.c - fp2-dev/platform/bionic

blob: 92719010edd17744133d7821d494aaf602ec0762 [file] [log] [blame]

Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	1	/*-
				2	* Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>
				3	* All rights reserved.
				4	*
				5	* Redistribution and use in source and binary forms, with or without
				6	* modification, are permitted provided that the following conditions
				7	* are met:
				8	* 1. Redistributions of source code must retain the above copyright
				9	* notice, this list of conditions and the following disclaimer.
				10	* 2. Redistributions in binary form must reproduce the above copyright
				11	* notice, this list of conditions and the following disclaimer in the
				12	* documentation and/or other materials provided with the distribution.
				13	*
				14	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
				15	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
				16	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
				17	* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
				18	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
				19	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
				20	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
				21	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
				22	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
				23	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
				24	* SUCH DAMAGE.
				25	*/
				26
				27	#include <sys/cdefs.h>
				28	__FBSDID("$FreeBSD$");
				29
				30	#include <fenv.h>
				31	#include <float.h>
				32	#include <math.h>
				33
				34	#include "fpmath.h"
				35
				36	/*
				37	* A struct dd represents a floating-point number with twice the precision
				38	* of a long double. We maintain the invariant that "hi" stores the high-order
				39	* bits of the result.
				40	*/
				41	struct dd {
				42	long double hi;
				43	long double lo;
				44	};
				45
				46	/*
				47	* Compute a+b exactly, returning the exact result in a struct dd. We assume
				48	* that both a and b are finite, but make no assumptions about their relative
				49	* magnitudes.
				50	*/
				51	static inline struct dd
				52	dd_add(long double a, long double b)
				53	{
				54	struct dd ret;
				55	long double s;
				56
				57	ret.hi = a + b;
				58	s = ret.hi - a;
				59	ret.lo = (a - (ret.hi - s)) + (b - s);
				60	return (ret);
				61	}
				62
				63	/*
				64	* Compute a+b, with a small tweak: The least significant bit of the
				65	* result is adjusted into a sticky bit summarizing all the bits that
				66	* were lost to rounding. This adjustment negates the effects of double
				67	* rounding when the result is added to another number with a higher
				68	* exponent. For an explanation of round and sticky bits, see any reference
				69	* on FPU design, e.g.,
				70	*
				71	* J. Coonen. An Implementation Guide to a Proposed Standard for
				72	* Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980.
				73	*/
				74	static inline long double
				75	add_adjusted(long double a, long double b)
				76	{
				77	struct dd sum;
				78	union IEEEl2bits u;
				79
				80	sum = dd_add(a, b);
				81	if (sum.lo != 0) {
				82	u.e = sum.hi;
				83	if ((u.bits.manl & 1) == 0)
				84	sum.hi = nextafterl(sum.hi, INFINITY * sum.lo);
				85	}
				86	return (sum.hi);
				87	}
				88
				89	/*
				90	* Compute ldexp(a+b, scale) with a single rounding error. It is assumed
				91	* that the result will be subnormal, and care is taken to ensure that
				92	* double rounding does not occur.
				93	*/
				94	static inline long double
				95	add_and_denormalize(long double a, long double b, int scale)
				96	{
				97	struct dd sum;
				98	int bits_lost;
				99	union IEEEl2bits u;
				100
				101	sum = dd_add(a, b);
				102
				103	/*
				104	* If we are losing at least two bits of accuracy to denormalization,
				105	* then the first lost bit becomes a round bit, and we adjust the
				106	* lowest bit of sum.hi to make it a sticky bit summarizing all the
				107	* bits in sum.lo. With the sticky bit adjusted, the hardware will
				108	* break any ties in the correct direction.
				109	*
				110	* If we are losing only one bit to denormalization, however, we must
				111	* break the ties manually.
				112	*/
				113	if (sum.lo != 0) {
				114	u.e = sum.hi;
				115	bits_lost = -u.bits.exp - scale + 1;
				116	if (bits_lost != 1 ^ (int)(u.bits.manl & 1))
				117	sum.hi = nextafterl(sum.hi, INFINITY * sum.lo);
				118	}
				119	return (ldexp(sum.hi, scale));
				120	}
				121
				122	/*
				123	* Compute a*b exactly, returning the exact result in a struct dd. We assume
				124	* that both a and b are normalized, so no underflow or overflow will occur.
				125	* The current rounding mode must be round-to-nearest.
				126	*/
				127	static inline struct dd
				128	dd_mul(long double a, long double b)
				129	{
				130	#if LDBL_MANT_DIG == 64
				131	static const long double split = 0x1p32L + 1.0;
				132	#elif LDBL_MANT_DIG == 113
				133	static const long double split = 0x1p57L + 1.0;
				134	#endif
				135	struct dd ret;
				136	long double ha, hb, la, lb, p, q;
				137
				138	p = a * split;
				139	ha = a - p;
				140	ha += p;
				141	la = a - ha;
				142
				143	p = b * split;
				144	hb = b - p;
				145	hb += p;
				146	lb = b - hb;
				147
				148	p = ha * hb;
				149	q = ha * lb + la * hb;
				150
				151	ret.hi = p + q;
				152	ret.lo = p - ret.hi + q + la * lb;
				153	return (ret);
				154	}
				155
				156	/*
				157	* Fused multiply-add: Compute x * y + z with a single rounding error.
				158	*
				159	* We use scaling to avoid overflow/underflow, along with the
				160	* canonical precision-doubling technique adapted from:
				161	*
				162	* Dekker, T. A Floating-Point Technique for Extending the
				163	* Available Precision. Numer. Math. 18, 224-242 (1971).
				164	*/
				165	long double
				166	fmal(long double x, long double y, long double z)
				167	{
				168	long double xs, ys, zs, adj;
				169	struct dd xy, r;
				170	int oround;
				171	int ex, ey, ez;
				172	int spread;
				173
				174	/*
				175	* Handle special cases. The order of operations and the particular
				176	* return values here are crucial in handling special cases involving
				177	* infinities, NaNs, overflows, and signed zeroes correctly.
				178	*/
				179	if (x == 0.0 \|\| y == 0.0)
				180	return (x * y + z);
				181	if (z == 0.0)
				182	return (x * y);
				183	if (!isfinite(x) \|\| !isfinite(y))
				184	return (x * y + z);
				185	if (!isfinite(z))
				186	return (z);
				187
				188	xs = frexpl(x, &ex);
				189	ys = frexpl(y, &ey);
				190	zs = frexpl(z, &ez);
				191	oround = fegetround();
				192	spread = ex + ey - ez;
				193
				194	/*
				195	* If x * y and z are many orders of magnitude apart, the scaling
				196	* will overflow, so we handle these cases specially. Rounding
				197	* modes other than FE_TONEAREST are painful.
				198	*/
				199	if (spread < -LDBL_MANT_DIG) {
				200	feraiseexcept(FE_INEXACT);
				201	if (!isnormal(z))
				202	feraiseexcept(FE_UNDERFLOW);
				203	switch (oround) {
				204	case FE_TONEAREST:
				205	return (z);
				206	case FE_TOWARDZERO:
				207	if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
				208	return (z);
				209	else
				210	return (nextafterl(z, 0));
				211	case FE_DOWNWARD:
				212	if (x > 0.0 ^ y < 0.0)
				213	return (z);
				214	else
				215	return (nextafterl(z, -INFINITY));
				216	default: /* FE_UPWARD */
				217	if (x > 0.0 ^ y < 0.0)
				218	return (nextafterl(z, INFINITY));
				219	else
				220	return (z);
				221	}
				222	}
				223	if (spread <= LDBL_MANT_DIG * 2)
				224	zs = ldexpl(zs, -spread);
				225	else
				226	zs = copysignl(LDBL_MIN, zs);
				227
				228	fesetround(FE_TONEAREST);
Elliott Hughes	7841946	2013-06-12 16:37:58 -0700	[diff] [blame]	229	/* work around clang bug 8100 */
				230	volatile long double vxs = xs;
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	231
				232	/*
				233	* Basic approach for round-to-nearest:
				234	*
				235	* (xy.hi, xy.lo) = x * y (exact)
				236	* (r.hi, r.lo) = xy.hi + z (exact)
				237	* adj = xy.lo + r.lo (inexact; low bit is sticky)
				238	* result = r.hi + adj (correctly rounded)
				239	*/
Elliott Hughes	7841946	2013-06-12 16:37:58 -0700	[diff] [blame]	240	xy = dd_mul(vxs, ys);
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	241	r = dd_add(xy.hi, zs);
				242
				243	spread = ex + ey;
				244
				245	if (r.hi == 0.0) {
				246	/*
				247	* When the addends cancel to 0, ensure that the result has
				248	* the correct sign.
				249	*/
				250	fesetround(oround);
				251	volatile long double vzs = zs; /* XXX gcc CSE bug workaround */
				252	return (xy.hi + vzs + ldexpl(xy.lo, spread));
				253	}
				254
				255	if (oround != FE_TONEAREST) {
				256	/*
				257	* There is no need to worry about double rounding in directed
				258	* rounding modes.
				259	*/
				260	fesetround(oround);
Elliott Hughes	7841946	2013-06-12 16:37:58 -0700	[diff] [blame]	261	/* work around clang bug 8100 */
				262	volatile long double vrlo = r.lo;
				263	adj = vrlo + xy.lo;
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	264	return (ldexpl(r.hi + adj, spread));
				265	}
				266
				267	adj = add_adjusted(r.lo, xy.lo);
				268	if (spread + ilogbl(r.hi) > -16383)
				269	return (ldexpl(r.hi + adj, spread));
				270	else
				271	return (add_and_denormalize(r.hi, adj, spread));
				272	}