Blame - math/pow.c - platform/external/arm-optimized-routines

blob: ac2d974780ad5505c1fad7e8e2bd642a3cfd5087 [file] [log] [blame]

Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	1	/*
				2	* Double-precision x^y function.
				3	*
				4	* Copyright (c) 2018, Arm Limited.
				5	* SPDX-License-Identifier: Apache-2.0
				6	*
				7	* Licensed under the Apache License, Version 2.0 (the "License");
				8	* you may not use this file except in compliance with the License.
				9	* You may obtain a copy of the License at
				10	*
				11	* http://www.apache.org/licenses/LICENSE-2.0
				12	*
				13	* Unless required by applicable law or agreed to in writing, software
				14	* distributed under the License is distributed on an "AS IS" BASIS,
				15	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				16	* See the License for the specific language governing permissions and
				17	* limitations under the License.
				18	*/
				19
				20	#include <math.h>
				21	#include <stdint.h>
				22	#include "math_config.h"
				23
				24	/*
Szabolcs Nagy	a623032	2018-06-21 17:53:15 +0100	[diff] [blame]	25	Worst-case error: 0.57 ULP (~= ulperr_exp + 1024Ln2relerr_log*2^53)
				26	relerr_log: 1.3 * 2^-68 (Relative error of log, 1.4 * 2^-68 without fma)
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	27	ulperr_exp: 0.509 ULP (ULP error of exp)
				28	*/
				29
				30	#define T __pow_log_data.tab
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	31	#define A __pow_log_data.poly
				32	#define Ln2hi __pow_log_data.ln2hi
				33	#define Ln2lo __pow_log_data.ln2lo
				34	#define N (1 << POW_LOG_TABLE_BITS)
Szabolcs Nagy	a623032	2018-06-21 17:53:15 +0100	[diff] [blame]	35	#define OFF 0x3fe6955500000000
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	36
				37	static inline uint32_t
Szabolcs Nagy	2117b83	2018-06-14 10:54:06 +0100	[diff] [blame]	38	top12 (double x)
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	39	{
Szabolcs Nagy	2117b83	2018-06-14 10:54:06 +0100	[diff] [blame]	40	return asuint64 (x) >> 52;
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	41	}
				42
				43	static inline double_t
				44	log_inline (uint64_t ix, double_t *tail)
				45	{
				46	/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
Szabolcs Nagy	a623032	2018-06-21 17:53:15 +0100	[diff] [blame]	47	double_t z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	48	uint64_t iz, tmp;
				49	int k, i;
				50
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	51	/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
				52	The range is split into N subintervals.
				53	The ith subinterval contains z and c is near its center. */
				54	tmp = ix - OFF;
				55	i = (tmp >> (52 - POW_LOG_TABLE_BITS)) % N;
				56	k = (int64_t) tmp >> 52; /* arithmetic shift */
				57	iz = ix - (tmp & 0xfffULL << 52);
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	58	z = asdouble (iz);
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	59	kd = (double_t) k;
				60
Szabolcs Nagy	a623032	2018-06-21 17:53:15 +0100	[diff] [blame]	61	/* log(x) = kLn2 + log(c) + log1p(z/c-1). /
				62	invc = T[i].invc;
				63	logc = T[i].logc;
				64	logctail = T[i].logctail;
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	65
Szabolcs Nagy	a623032	2018-06-21 17:53:15 +0100	[diff] [blame]	66	/* r = z/c - 1, arranged to be exact. */
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	67	#if HAVE_FAST_FMA
				68	r = fma (z, invc, -1.0);
				69	#else
Szabolcs Nagy	a623032	2018-06-21 17:53:15 +0100	[diff] [blame]	70	double_t zhi = asdouble (iz & (-1ULL << 32));
				71	double_t zlo = z - zhi;
				72	double_t rhi = zhi * invc - 1.0;
				73	double_t rlo = zlo * invc;
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	74	r = rhi + rlo;
				75	#endif
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	76
Szabolcs Nagy	a623032	2018-06-21 17:53:15 +0100	[diff] [blame]	77	/* kLn2 + log(c) + r. /
				78	t1 = kd * Ln2hi + logc;
				79	t2 = t1 + r;
				80	lo1 = kd * Ln2lo + logctail;
				81	lo2 = t1 - t2 + r;
				82
				83	/* Evaluation is optimized assuming superscalar pipelined execution. */
				84	double_t ar, ar2, ar3, lo3, lo4;
				85	ar = A[0] * r; /* A[0] = -0.5. */
				86	ar2 = r * ar;
				87	ar3 = r * ar2;
				88	/* kLn2 + log(c) + r + A[0]rr. /
				89	#if HAVE_FAST_FMA
				90	hi = t2 + ar2;
				91	lo3 = fma (ar, r, -ar2);
				92	lo4 = t2 - hi + ar2;
				93	#else
				94	double_t arhi = A[0] * rhi;
				95	double_t arhi2 = rhi * arhi;
				96	hi = t2 + arhi2;
				97	lo3 = rlo * (ar + arhi);
				98	lo4 = t2 - hi + arhi2;
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	99	#endif
Szabolcs Nagy	a623032	2018-06-21 17:53:15 +0100	[diff] [blame]	100	/* p = log1p(r) - r - A[0]rr. */
				101	#if POW_LOG_POLY_ORDER == 8
				102	p = ar3(A[1] + rA[2] + ar2(A[3] + rA[4] + ar2(A[5] + rA[6])));
				103	#endif
				104	lo = lo1 + lo2 + lo3 + lo4 + p;
				105	y = hi + lo;
				106	*tail = hi - y + lo;
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	107	return y;
				108	}
				109
				110	#undef N
				111	#undef T
				112	#define N (1 << EXP_TABLE_BITS)
				113	#define InvLn2N __exp_data.invln2N
				114	#define NegLn2hiN __exp_data.negln2hiN
				115	#define NegLn2loN __exp_data.negln2loN
				116	#define Shift __exp_data.shift
				117	#define T __exp_data.tab
				118	#define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
				119	#define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
				120	#define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
				121	#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
				122	#define C6 __exp_data.poly[9 - EXP_POLY_ORDER]
				123
				124	static inline double
				125	specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
				126	{
				127	double_t scale, y;
				128
				129	if ((ki & 0x80000000) == 0)
				130	{
Szabolcs Nagy	2117b83	2018-06-14 10:54:06 +0100	[diff] [blame]	131	/* k > 0, the exponent of scale might have overflowed by <= 460. */
				132	sbits -= 1009ull << 52;
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	133	scale = asdouble (sbits);
Szabolcs Nagy	2117b83	2018-06-14 10:54:06 +0100	[diff] [blame]	134	y = 0x1p1009 * (scale + scale * tmp);
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	135	return check_oflow (y);
				136	}
				137	/* k < 0, need special care in the subnormal range. */
				138	sbits += 1022ull << 52;
				139	/* Note: sbits is signed scale. */
				140	scale = asdouble (sbits);
				141	y = scale + scale * tmp;
				142	if (fabs (y) < 1.0)
				143	{
				144	/* Round y to the right precision before scaling it into the subnormal
				145	range to avoid double rounding that can cause 0.5+E/2 ulp error where
				146	E is the worst-case ulp error outside the subnormal range. So this
				147	is only useful if the goal is better than 1 ulp worst-case error. */
				148	double_t hi, lo, one = 1.0;
				149	if (y < 0.0)
				150	one = -1.0;
				151	lo = scale - y + scale * tmp;
				152	hi = one + y;
				153	lo = one - hi + y + lo;
				154	y = eval_as_double (hi + lo) - one;
Szabolcs Nagy	e00696a	2018-06-19 13:53:40 +0100	[diff] [blame]	155	/* Fix the sign of 0. */
				156	if (y == 0.0)
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	157	y = asdouble (sbits & 0x8000000000000000);
				158	/* The underflow exception needs to be signaled explicitly. */
Szabolcs Nagy	5fa69e1	2018-06-12 17:18:24 +0100	[diff] [blame]	159	force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	160	}
				161	y = 0x1p-1022 * y;
				162	return check_uflow (y);
				163	}
				164
				165	#define SIGN_BIAS (0x800 << EXP_TABLE_BITS)
				166
				167	static inline double
				168	exp_inline (double x, double xtail, uint32_t sign_bias)
				169	{
				170	uint32_t abstop;
				171	uint64_t ki, idx, top, sbits;
				172	/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
				173	double_t kd, z, r, r2, scale, tail, tmp;
				174
Szabolcs Nagy	2117b83	2018-06-14 10:54:06 +0100	[diff] [blame]	175	abstop = top12 (x) & 0x7ff;
				176	if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	177	{
Szabolcs Nagy	2117b83	2018-06-14 10:54:06 +0100	[diff] [blame]	178	if (abstop - top12 (0x1p-54) >= 0x80000000)
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	179	{
				180	/* Avoid spurious underflow for tiny x. */
				181	/* Note: 0 is common input. */
				182	double_t one = WANT_ROUNDING ? 1.0 + x : 1.0;
				183	return sign_bias ? -one : one;
				184	}
Szabolcs Nagy	2117b83	2018-06-14 10:54:06 +0100	[diff] [blame]	185	if (abstop >= top12 (1024.0))
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	186	{
				187	/* Note: inf and nan are already handled. */
				188	if (asuint64 (x) >> 63)
				189	return __math_uflow (sign_bias);
				190	else
				191	return __math_oflow (sign_bias);
				192	}
				193	/* Large x is special cased below. */
				194	abstop = 0;
				195	}
				196
				197	/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
				198	/* x = ln2/Nk + r, with int k and r in [-ln2/2N, ln2/2N]. /
				199	z = InvLn2N * x;
				200	#if TOINT_INTRINSICS
				201	kd = roundtoint (z);
				202	ki = converttoint (z);
				203	#elif EXP_USE_TOINT_NARROW
				204	/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
				205	kd = eval_as_double (z + Shift);
				206	ki = asuint64 (kd) >> 16;
				207	kd = (double_t) (int32_t) ki;
				208	#else
				209	/* z - kd is in [-1, 1] in non-nearest rounding modes. */
				210	kd = eval_as_double (z + Shift);
				211	ki = asuint64 (kd);
				212	kd -= Shift;
				213	#endif
				214	r = x + kdNegLn2hiN + kdNegLn2loN;
Szabolcs Nagy	2117b83	2018-06-14 10:54:06 +0100	[diff] [blame]	215	/* The code assumes 2^-200 < \|xtail\| < 2^-8/N. */
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	216	r += xtail;
				217	/* 2^(k/N) ~= scale * (1 + tail). */
				218	idx = 2*(ki % N);
				219	top = (ki + sign_bias) << (52 - EXP_TABLE_BITS);
				220	tail = asdouble (T[idx]);
				221	/* This is only a valid scale when -1023N < k < 1024N. */
				222	sbits = T[idx + 1] + top;
				223	/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
				224	/* Evaluation is optimized assuming superscalar pipelined execution. */
				225	r2 = r*r;
				226	/* Without fma the worst case error is 0.25/N ulp larger. */
				227	/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
				228	#if EXP_POLY_ORDER == 4
				229	tmp = tail + r + r2C2 + rr2(C3 + rC4);
				230	#elif EXP_POLY_ORDER == 5
				231	tmp = tail + r + r2(C2 + rC3) + r2r2(C4 + r*C5);
				232	#elif EXP_POLY_ORDER == 6
				233	tmp = tail + r + r2(0.5 + rC3) + r2r2(C4 + rC5 + r2C6);
				234	#endif
				235	if (unlikely (abstop == 0))
				236	return specialcase (tmp, sbits, ki);
				237	scale = asdouble (sbits);
Szabolcs Nagy	2117b83	2018-06-14 10:54:06 +0100	[diff] [blame]	238	/* Note: tmp == 0 or \|tmp\| > 2^-200 and scale > 2^-739, so there
				239	is no spurious underflow here even without fma. */
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	240	return scale + scale * tmp;
				241	}
				242
				243	/* Returns 0 if not int, 1 if odd int, 2 if even int. */
				244	static inline int
				245	checkint (uint64_t iy)
				246	{
				247	int e = iy >> 52 & 0x7ff;
				248	if (e < 0x3ff)
				249	return 0;
				250	if (e > 0x3ff + 52)
				251	return 2;
				252	if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
				253	return 0;
				254	if (iy & (1ULL << (0x3ff + 52 - e)))
				255	return 1;
				256	return 2;
				257	}
				258
				259	static inline int
				260	zeroinfnan (uint64_t i)
				261	{
				262	return 2 * i - 1 >= 2 * asuint64 (INFINITY) - 1;
				263	}
				264
				265	double
				266	pow (double x, double y)
				267	{
Szabolcs Nagy	db6e4e9	2018-06-18 11:03:27 +0100	[diff] [blame]	268	uint32_t sign_bias = 0;
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	269	uint64_t ix, iy;
				270	uint32_t topx, topy;
				271
				272	ix = asuint64 (x);
				273	iy = asuint64 (y);
Szabolcs Nagy	2117b83	2018-06-14 10:54:06 +0100	[diff] [blame]	274	topx = top12 (x);
				275	topy = top12 (y);
Szabolcs Nagy	76fd080	2018-06-22 17:28:45 +0100	[diff] [blame^]	276	if (unlikely (topx - 0x001 >= 0x7ff - 0x001
				277	\|\| (topy & 0x7ff) - 0x3be >= 0x43e - 0x3be))
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	278	{
				279	/* Note: if \|y\| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
				280	and if \|y\| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */
				281	/* Special cases: (x < 0x1p-126 or inf or nan) or
				282	(\|y\| < 0x1p-65 or \|y\| >= 0x1p63 or nan). */
				283	if (unlikely (zeroinfnan (iy)))
				284	{
				285	if (2 * iy == 0)
				286	return issignaling_inline (x) ? x + y : 1.0;
				287	if (ix == asuint64 (1.0))
				288	return issignaling_inline (y) ? x + y : 1.0;
Szabolcs Nagy	76fd080	2018-06-22 17:28:45 +0100	[diff] [blame^]	289	if (2 * ix > 2 * asuint64 (INFINITY)
				290	\|\| 2 * iy > 2 * asuint64 (INFINITY))
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	291	return x + y;
				292	if (2 * ix == 2 * asuint64 (1.0))
				293	return 1.0;
				294	if ((2 * ix < 2 * asuint64 (1.0)) == !(iy >> 63))
				295	return 0.0; /* \|x\|<1 && y==inf or \|x\|>1 && y==-inf. */
				296	return y * y;
				297	}
				298	if (unlikely (zeroinfnan (ix)))
				299	{
				300	double_t x2 = x * x;
				301	if (ix >> 63 && checkint (iy) == 1)
				302	{
				303	x2 = -x2;
				304	sign_bias = 1;
				305	}
				306	if (WANT_ERRNO && 2 * ix == 0 && iy >> 63)
				307	return __math_divzero (sign_bias);
Szabolcs Nagy	5fa69e1	2018-06-12 17:18:24 +0100	[diff] [blame]	308	/* Without the barrier some versions of clang hoist the 1/x2 and
				309	thus division by zero exception can be signaled spuriously. */
				310	return iy >> 63 ? opt_barrier_double (1 / x2) : x2;
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	311	}
				312	/* Here x and y are non-zero finite. */
				313	if (ix >> 63)
				314	{
				315	/* Finite x < 0. */
				316	int yint = checkint (iy);
				317	if (yint == 0)
				318	return __math_invalid (x);
				319	if (yint == 1)
				320	sign_bias = SIGN_BIAS;
				321	ix &= 0x7fffffffffffffff;
				322	topx &= 0x7ff;
				323	}
				324	if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be)
				325	{
				326	/* Note: sign_bias == 0 here because y is not odd. */
				327	if (ix == asuint64 (1.0))
				328	return 1.0;
				329	if ((topy & 0x7ff) < 0x3be)
				330	{
				331	/* \|y\| < 2^-65, x^y ~= 1 + ylog(x). /
				332	if (WANT_ROUNDING)
				333	return ix > asuint64 (1.0) ? 1.0 + y : 1.0 - y;
				334	else
				335	return 1.0;
				336	}
Szabolcs Nagy	76fd080	2018-06-22 17:28:45 +0100	[diff] [blame^]	337	return (ix > asuint64 (1.0)) == (topy < 0x800) ? __math_oflow (0)
				338	: __math_uflow (0);
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	339	}
				340	if (topx == 0)
				341	{
				342	/* Normalize subnormal x so exponent becomes negative. */
				343	ix = asuint64 (x * 0x1p52);
				344	ix &= 0x7fffffffffffffff;
				345	ix -= 52ULL << 52;
				346	}
				347	}
				348
				349	double_t lo;
				350	double_t hi = log_inline (ix, &lo);
				351	double_t ehi, elo;
				352	#if HAVE_FAST_FMA
Szabolcs Nagy	76fd080	2018-06-22 17:28:45 +0100	[diff] [blame^]	353	ehi = y * hi;
				354	elo = y * lo + fma (y, hi, -ehi);
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	355	#else
				356	double_t yhi = asdouble (iy & -1ULL << 27);
				357	double_t ylo = y - yhi;
				358	double_t lhi = asdouble (asuint64 (hi) & -1ULL << 27);
				359	double_t llo = hi - lhi + lo;
Szabolcs Nagy	76fd080	2018-06-22 17:28:45 +0100	[diff] [blame^]	360	ehi = yhi * lhi;
				361	elo = ylo * lhi + y * llo; /* \|elo\| < \|ehi\| * 2^-25. */
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	362	#endif
				363	return exp_inline (ehi, elo, sign_bias);
				364	}
Szabolcs Nagy	b7d568d	2018-06-06 12:26:56 +0100	[diff] [blame]	365	#if USE_GLIBC_ABI
				366	strong_alias (pow, __pow_finite)
				367	hidden_alias (pow, __ieee754_pow)
				368	#endif