Blame - math/pow.c - platform/external/arm-optimized-routines

blob: e118737040ad66d8f3b5937143cf16d90bf2b93e [file] [log] [blame]

Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	1	/*
				2	* Double-precision x^y function.
				3	*
				4	* Copyright (c) 2018, Arm Limited.
				5	* SPDX-License-Identifier: Apache-2.0
				6	*
				7	* Licensed under the Apache License, Version 2.0 (the "License");
				8	* you may not use this file except in compliance with the License.
				9	* You may obtain a copy of the License at
				10	*
				11	* http://www.apache.org/licenses/LICENSE-2.0
				12	*
				13	* Unless required by applicable law or agreed to in writing, software
				14	* distributed under the License is distributed on an "AS IS" BASIS,
				15	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				16	* See the License for the specific language governing permissions and
				17	* limitations under the License.
				18	*/
				19
				20	#include <math.h>
				21	#include <stdint.h>
				22	#include "math_config.h"
				23
				24	/*
				25	Worst-case error: 0.67 ULP (~= ulperr_exp + 1024Ln2relerr_log*2^53)
				26	relerr_log: 1.8 * 2^-66 (Relative error of log)
				27	ulperr_exp: 0.509 ULP (ULP error of exp)
				28	*/
				29
				30	#define T __pow_log_data.tab
				31	#define B __pow_log_data.poly1
				32	#define A __pow_log_data.poly
				33	#define Ln2hi __pow_log_data.ln2hi
				34	#define Ln2lo __pow_log_data.ln2lo
				35	#define N (1 << POW_LOG_TABLE_BITS)
				36	#define OFF 0x3fe6000000000000
				37
				38	static inline uint32_t
Szabolcs Nagy	2117b83	2018-06-14 10:54:06 +0100	[diff] [blame^]	39	top12 (double x)
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	40	{
Szabolcs Nagy	2117b83	2018-06-14 10:54:06 +0100	[diff] [blame^]	41	return asuint64 (x) >> 52;
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	42	}
				43
				44	static inline double_t
				45	log_inline (uint64_t ix, double_t *tail)
				46	{
				47	/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
				48	double_t w, z, zhi, zlo, r, r2, r3, y, invc, logc, kd, hi, lo, khi, rhi, rlo, p, q;
				49	uint64_t iz, tmp;
				50	int k, i;
				51
				52	#if POW_LOG_POLY1_ORDER == 9
				53	# define LO asuint64 (1 - 0x1.1p-7)
				54	# define HI asuint64 (1 + 0x1.98p-7)
				55	#endif
				56	if (unlikely (ix - LO < HI - LO))
				57	{
				58	r = asdouble (ix) - 1.0;
				59	/* Split r into top and bottom half. */
				60	w = r * 0x1p27;
				61	rhi = r + w - w;
				62	rlo = r - rhi;
				63	/* Compute r - rr/2 precisely into hi+lo. /
				64	w = rhirhiB[0]; /* B[0] == -0.5. */
				65	hi = r + w;
				66	lo = r - hi + w;
				67	lo += B[0]rlo(rhi + r);
				68	r2 = r*r;
				69	r3 = r*r2;
				70	#if POW_LOG_POLY1_ORDER == 9
				71	p = B[1] + r(B[2] + rB[3] + r2B[4] + r3(B[5] + rB[6] + r2B[7]));
				72	#endif
				73	q = lo + r3*p;
				74	y = hi + q;
				75	*tail = (hi - y) + q;
				76	return y;
				77	}
				78
				79	/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
				80	The range is split into N subintervals.
				81	The ith subinterval contains z and c is near its center. */
				82	tmp = ix - OFF;
				83	i = (tmp >> (52 - POW_LOG_TABLE_BITS)) % N;
				84	k = (int64_t) tmp >> 52; /* arithmetic shift */
				85	iz = ix - (tmp & 0xfffULL << 52);
				86	invc = T[i].invc;
				87	logc = T[i].logc;
				88	z = asdouble (iz);
				89	zhi = asdouble ((iz + (1ULL<<31)) & (-1ULL << 32));
				90	zlo = z - zhi;
				91
				92	/* log(x) = log1p(z/c-1) + log(c) + kLn2 /
				93	rhi = zhi * invc - 1.0;
				94	rlo = zlo * invc;
				95	kd = (double_t) k;
				96
				97	/* hi + lo = r + log(c) + kLn2. /
				98	khi = kd * Ln2hi;
				99	w = khi + logc;
				100	lo = khi - w + logc;
				101	hi = w + rhi;
				102	lo = w - hi + rhi + (lo + kd*Ln2lo) + rlo;
				103
				104	/* log(x) = lo + (log1p(r) - r) + hi. */
				105	/* Evaluation is optimized assuming superscalar pipelined execution. */
				106	#if HAVE_FAST_FMA
				107	r = fma (z, invc, -1.0);
				108	#else
				109	r = rhi + rlo;
				110	#endif
				111	r2 = r * r;
				112
				113	#if POW_LOG_POLY_ORDER == 7
				114	p = lo + rr2(A[1] + rA[2] + r2(A[3] + rA[4] + r2A[5]));
				115	#endif
				116	q = A[0]r2; / A[0] == -0.5. */
				117	w = q + hi;
				118	p += hi - w + q;
				119	y = p + w;
				120	*tail = w - y + p;
				121	return y;
				122	}
				123
				124	#undef N
				125	#undef T
				126	#define N (1 << EXP_TABLE_BITS)
				127	#define InvLn2N __exp_data.invln2N
				128	#define NegLn2hiN __exp_data.negln2hiN
				129	#define NegLn2loN __exp_data.negln2loN
				130	#define Shift __exp_data.shift
				131	#define T __exp_data.tab
				132	#define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
				133	#define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
				134	#define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
				135	#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
				136	#define C6 __exp_data.poly[9 - EXP_POLY_ORDER]
				137
				138	static inline double
				139	specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
				140	{
				141	double_t scale, y;
				142
				143	if ((ki & 0x80000000) == 0)
				144	{
Szabolcs Nagy	2117b83	2018-06-14 10:54:06 +0100	[diff] [blame^]	145	/* k > 0, the exponent of scale might have overflowed by <= 460. */
				146	sbits -= 1009ull << 52;
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	147	scale = asdouble (sbits);
Szabolcs Nagy	2117b83	2018-06-14 10:54:06 +0100	[diff] [blame^]	148	y = 0x1p1009 * (scale + scale * tmp);
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	149	return check_oflow (y);
				150	}
				151	/* k < 0, need special care in the subnormal range. */
				152	sbits += 1022ull << 52;
				153	/* Note: sbits is signed scale. */
				154	scale = asdouble (sbits);
				155	y = scale + scale * tmp;
				156	if (fabs (y) < 1.0)
				157	{
				158	/* Round y to the right precision before scaling it into the subnormal
				159	range to avoid double rounding that can cause 0.5+E/2 ulp error where
				160	E is the worst-case ulp error outside the subnormal range. So this
				161	is only useful if the goal is better than 1 ulp worst-case error. */
				162	double_t hi, lo, one = 1.0;
				163	if (y < 0.0)
				164	one = -1.0;
				165	lo = scale - y + scale * tmp;
				166	hi = one + y;
				167	lo = one - hi + y + lo;
				168	y = eval_as_double (hi + lo) - one;
				169	/* Avoid -0.0 with downward rounding. */
				170	if (WANT_ROUNDING && y == 0.0)
				171	y = asdouble (sbits & 0x8000000000000000);
				172	/* The underflow exception needs to be signaled explicitly. */
Szabolcs Nagy	5fa69e1	2018-06-12 17:18:24 +0100	[diff] [blame]	173	force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	174	}
				175	y = 0x1p-1022 * y;
				176	return check_uflow (y);
				177	}
				178
				179	#define SIGN_BIAS (0x800 << EXP_TABLE_BITS)
				180
				181	static inline double
				182	exp_inline (double x, double xtail, uint32_t sign_bias)
				183	{
				184	uint32_t abstop;
				185	uint64_t ki, idx, top, sbits;
				186	/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
				187	double_t kd, z, r, r2, scale, tail, tmp;
				188
Szabolcs Nagy	2117b83	2018-06-14 10:54:06 +0100	[diff] [blame^]	189	abstop = top12 (x) & 0x7ff;
				190	if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	191	{
Szabolcs Nagy	2117b83	2018-06-14 10:54:06 +0100	[diff] [blame^]	192	if (abstop - top12 (0x1p-54) >= 0x80000000)
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	193	{
				194	/* Avoid spurious underflow for tiny x. */
				195	/* Note: 0 is common input. */
				196	double_t one = WANT_ROUNDING ? 1.0 + x : 1.0;
				197	return sign_bias ? -one : one;
				198	}
Szabolcs Nagy	2117b83	2018-06-14 10:54:06 +0100	[diff] [blame^]	199	if (abstop >= top12 (1024.0))
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	200	{
				201	/* Note: inf and nan are already handled. */
				202	if (asuint64 (x) >> 63)
				203	return __math_uflow (sign_bias);
				204	else
				205	return __math_oflow (sign_bias);
				206	}
				207	/* Large x is special cased below. */
				208	abstop = 0;
				209	}
				210
				211	/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
				212	/* x = ln2/Nk + r, with int k and r in [-ln2/2N, ln2/2N]. /
				213	z = InvLn2N * x;
				214	#if TOINT_INTRINSICS
				215	kd = roundtoint (z);
				216	ki = converttoint (z);
				217	#elif EXP_USE_TOINT_NARROW
				218	/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
				219	kd = eval_as_double (z + Shift);
				220	ki = asuint64 (kd) >> 16;
				221	kd = (double_t) (int32_t) ki;
				222	#else
				223	/* z - kd is in [-1, 1] in non-nearest rounding modes. */
				224	kd = eval_as_double (z + Shift);
				225	ki = asuint64 (kd);
				226	kd -= Shift;
				227	#endif
				228	r = x + kdNegLn2hiN + kdNegLn2loN;
Szabolcs Nagy	2117b83	2018-06-14 10:54:06 +0100	[diff] [blame^]	229	/* The code assumes 2^-200 < \|xtail\| < 2^-8/N. */
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	230	r += xtail;
				231	/* 2^(k/N) ~= scale * (1 + tail). */
				232	idx = 2*(ki % N);
				233	top = (ki + sign_bias) << (52 - EXP_TABLE_BITS);
				234	tail = asdouble (T[idx]);
				235	/* This is only a valid scale when -1023N < k < 1024N. */
				236	sbits = T[idx + 1] + top;
				237	/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
				238	/* Evaluation is optimized assuming superscalar pipelined execution. */
				239	r2 = r*r;
				240	/* Without fma the worst case error is 0.25/N ulp larger. */
				241	/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
				242	#if EXP_POLY_ORDER == 4
				243	tmp = tail + r + r2C2 + rr2(C3 + rC4);
				244	#elif EXP_POLY_ORDER == 5
				245	tmp = tail + r + r2(C2 + rC3) + r2r2(C4 + r*C5);
				246	#elif EXP_POLY_ORDER == 6
				247	tmp = tail + r + r2(0.5 + rC3) + r2r2(C4 + rC5 + r2C6);
				248	#endif
				249	if (unlikely (abstop == 0))
				250	return specialcase (tmp, sbits, ki);
				251	scale = asdouble (sbits);
Szabolcs Nagy	2117b83	2018-06-14 10:54:06 +0100	[diff] [blame^]	252	/* Note: tmp == 0 or \|tmp\| > 2^-200 and scale > 2^-739, so there
				253	is no spurious underflow here even without fma. */
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	254	return scale + scale * tmp;
				255	}
				256
				257	/* Returns 0 if not int, 1 if odd int, 2 if even int. */
				258	static inline int
				259	checkint (uint64_t iy)
				260	{
				261	int e = iy >> 52 & 0x7ff;
				262	if (e < 0x3ff)
				263	return 0;
				264	if (e > 0x3ff + 52)
				265	return 2;
				266	if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
				267	return 0;
				268	if (iy & (1ULL << (0x3ff + 52 - e)))
				269	return 1;
				270	return 2;
				271	}
				272
				273	static inline int
				274	zeroinfnan (uint64_t i)
				275	{
				276	return 2 * i - 1 >= 2 * asuint64 (INFINITY) - 1;
				277	}
				278
				279	double
				280	pow (double x, double y)
				281	{
				282	uint64_t sign_bias = 0;
				283	uint64_t ix, iy;
				284	uint32_t topx, topy;
				285
				286	ix = asuint64 (x);
				287	iy = asuint64 (y);
Szabolcs Nagy	2117b83	2018-06-14 10:54:06 +0100	[diff] [blame^]	288	topx = top12 (x);
				289	topy = top12 (y);
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	290	if (unlikely (topx - 0x001 >= 0x7ff - 0x001 \|\| (topy & 0x7ff) - 0x3be >= 0x43e - 0x3be))
				291	{
				292	/* Note: if \|y\| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
				293	and if \|y\| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */
				294	/* Special cases: (x < 0x1p-126 or inf or nan) or
				295	(\|y\| < 0x1p-65 or \|y\| >= 0x1p63 or nan). */
				296	if (unlikely (zeroinfnan (iy)))
				297	{
				298	if (2 * iy == 0)
				299	return issignaling_inline (x) ? x + y : 1.0;
				300	if (ix == asuint64 (1.0))
				301	return issignaling_inline (y) ? x + y : 1.0;
				302	if (2 * ix > 2 * asuint64 (INFINITY) \|\| 2 * iy > 2 * asuint64 (INFINITY))
				303	return x + y;
				304	if (2 * ix == 2 * asuint64 (1.0))
				305	return 1.0;
				306	if ((2 * ix < 2 * asuint64 (1.0)) == !(iy >> 63))
				307	return 0.0; /* \|x\|<1 && y==inf or \|x\|>1 && y==-inf. */
				308	return y * y;
				309	}
				310	if (unlikely (zeroinfnan (ix)))
				311	{
				312	double_t x2 = x * x;
				313	if (ix >> 63 && checkint (iy) == 1)
				314	{
				315	x2 = -x2;
				316	sign_bias = 1;
				317	}
				318	if (WANT_ERRNO && 2 * ix == 0 && iy >> 63)
				319	return __math_divzero (sign_bias);
Szabolcs Nagy	5fa69e1	2018-06-12 17:18:24 +0100	[diff] [blame]	320	/* Without the barrier some versions of clang hoist the 1/x2 and
				321	thus division by zero exception can be signaled spuriously. */
				322	return iy >> 63 ? opt_barrier_double (1 / x2) : x2;
Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	323	}
				324	/* Here x and y are non-zero finite. */
				325	if (ix >> 63)
				326	{
				327	/* Finite x < 0. */
				328	int yint = checkint (iy);
				329	if (yint == 0)
				330	return __math_invalid (x);
				331	if (yint == 1)
				332	sign_bias = SIGN_BIAS;
				333	ix &= 0x7fffffffffffffff;
				334	topx &= 0x7ff;
				335	}
				336	if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be)
				337	{
				338	/* Note: sign_bias == 0 here because y is not odd. */
				339	if (ix == asuint64 (1.0))
				340	return 1.0;
				341	if ((topy & 0x7ff) < 0x3be)
				342	{
				343	/* \|y\| < 2^-65, x^y ~= 1 + ylog(x). /
				344	if (WANT_ROUNDING)
				345	return ix > asuint64 (1.0) ? 1.0 + y : 1.0 - y;
				346	else
				347	return 1.0;
				348	}
				349	return (ix > asuint64 (1.0)) == (topy < 0x800)
				350	? __math_oflow (0) : __math_uflow (0);
				351	}
				352	if (topx == 0)
				353	{
				354	/* Normalize subnormal x so exponent becomes negative. */
				355	ix = asuint64 (x * 0x1p52);
				356	ix &= 0x7fffffffffffffff;
				357	ix -= 52ULL << 52;
				358	}
				359	}
				360
				361	double_t lo;
				362	double_t hi = log_inline (ix, &lo);
				363	double_t ehi, elo;
				364	#if HAVE_FAST_FMA
				365	ehi = y*hi;
				366	elo = y*lo + fma (y, hi, -ehi);
				367	#else
				368	double_t yhi = asdouble (iy & -1ULL << 27);
				369	double_t ylo = y - yhi;
				370	double_t lhi = asdouble (asuint64 (hi) & -1ULL << 27);
				371	double_t llo = hi - lhi + lo;
				372	ehi = yhi*lhi;
				373	elo = ylolhi + yllo; /* \|elo\| < \|ehi\| * 2^-25. */
				374	#endif
				375	return exp_inline (ehi, elo, sign_bias);
				376	}
Szabolcs Nagy	b7d568d	2018-06-06 12:26:56 +0100	[diff] [blame]	377	#if USE_GLIBC_ABI
				378	strong_alias (pow, __pow_finite)
				379	hidden_alias (pow, __ieee754_pow)
				380	#endif