Blame - math/pow.c - platform/external/arm-optimized-routines

blob: 1164ce1979a37ad7becdf45d89c7aae3f2a1fee1 [file] [log] [blame]

Szabolcs Nagy	ed0ecff	2018-06-06 18:17:16 +0100	[diff] [blame]	1	/*
				2	* Double-precision x^y function.
				3	*
				4	* Copyright (c) 2018, Arm Limited.
				5	* SPDX-License-Identifier: Apache-2.0
				6	*
				7	* Licensed under the Apache License, Version 2.0 (the "License");
				8	* you may not use this file except in compliance with the License.
				9	* You may obtain a copy of the License at
				10	*
				11	* http://www.apache.org/licenses/LICENSE-2.0
				12	*
				13	* Unless required by applicable law or agreed to in writing, software
				14	* distributed under the License is distributed on an "AS IS" BASIS,
				15	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				16	* See the License for the specific language governing permissions and
				17	* limitations under the License.
				18	*/
				19
				20	#include <math.h>
				21	#include <stdint.h>
				22	#include "math_config.h"
				23
				24	/*
				25	Worst-case error: 0.67 ULP (~= ulperr_exp + 1024Ln2relerr_log*2^53)
				26	relerr_log: 1.8 * 2^-66 (Relative error of log)
				27	ulperr_exp: 0.509 ULP (ULP error of exp)
				28	*/
				29
				30	#define T __pow_log_data.tab
				31	#define B __pow_log_data.poly1
				32	#define A __pow_log_data.poly
				33	#define Ln2hi __pow_log_data.ln2hi
				34	#define Ln2lo __pow_log_data.ln2lo
				35	#define N (1 << POW_LOG_TABLE_BITS)
				36	#define OFF 0x3fe6000000000000
				37
				38	static inline uint32_t
				39	top16 (double x)
				40	{
				41	return asuint64 (x) >> 48;
				42	}
				43
				44	static inline double_t
				45	log_inline (uint64_t ix, double_t *tail)
				46	{
				47	/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
				48	double_t w, z, zhi, zlo, r, r2, r3, y, invc, logc, kd, hi, lo, khi, rhi, rlo, p, q;
				49	uint64_t iz, tmp;
				50	int k, i;
				51
				52	#if POW_LOG_POLY1_ORDER == 9
				53	# define LO asuint64 (1 - 0x1.1p-7)
				54	# define HI asuint64 (1 + 0x1.98p-7)
				55	#endif
				56	if (unlikely (ix - LO < HI - LO))
				57	{
				58	r = asdouble (ix) - 1.0;
				59	/* Split r into top and bottom half. */
				60	w = r * 0x1p27;
				61	rhi = r + w - w;
				62	rlo = r - rhi;
				63	/* Compute r - rr/2 precisely into hi+lo. /
				64	w = rhirhiB[0]; /* B[0] == -0.5. */
				65	hi = r + w;
				66	lo = r - hi + w;
				67	lo += B[0]rlo(rhi + r);
				68	r2 = r*r;
				69	r3 = r*r2;
				70	#if POW_LOG_POLY1_ORDER == 9
				71	p = B[1] + r(B[2] + rB[3] + r2B[4] + r3(B[5] + rB[6] + r2B[7]));
				72	#endif
				73	q = lo + r3*p;
				74	y = hi + q;
				75	*tail = (hi - y) + q;
				76	return y;
				77	}
				78
				79	/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
				80	The range is split into N subintervals.
				81	The ith subinterval contains z and c is near its center. */
				82	tmp = ix - OFF;
				83	i = (tmp >> (52 - POW_LOG_TABLE_BITS)) % N;
				84	k = (int64_t) tmp >> 52; /* arithmetic shift */
				85	iz = ix - (tmp & 0xfffULL << 52);
				86	invc = T[i].invc;
				87	logc = T[i].logc;
				88	z = asdouble (iz);
				89	zhi = asdouble ((iz + (1ULL<<31)) & (-1ULL << 32));
				90	zlo = z - zhi;
				91
				92	/* log(x) = log1p(z/c-1) + log(c) + kLn2 /
				93	rhi = zhi * invc - 1.0;
				94	rlo = zlo * invc;
				95	kd = (double_t) k;
				96
				97	/* hi + lo = r + log(c) + kLn2. /
				98	khi = kd * Ln2hi;
				99	w = khi + logc;
				100	lo = khi - w + logc;
				101	hi = w + rhi;
				102	lo = w - hi + rhi + (lo + kd*Ln2lo) + rlo;
				103
				104	/* log(x) = lo + (log1p(r) - r) + hi. */
				105	/* Evaluation is optimized assuming superscalar pipelined execution. */
				106	#if HAVE_FAST_FMA
				107	r = fma (z, invc, -1.0);
				108	#else
				109	r = rhi + rlo;
				110	#endif
				111	r2 = r * r;
				112
				113	#if POW_LOG_POLY_ORDER == 7
				114	p = lo + rr2(A[1] + rA[2] + r2(A[3] + rA[4] + r2A[5]));
				115	#endif
				116	q = A[0]r2; / A[0] == -0.5. */
				117	w = q + hi;
				118	p += hi - w + q;
				119	y = p + w;
				120	*tail = w - y + p;
				121	return y;
				122	}
				123
				124	#undef N
				125	#undef T
				126	#define N (1 << EXP_TABLE_BITS)
				127	#define InvLn2N __exp_data.invln2N
				128	#define NegLn2hiN __exp_data.negln2hiN
				129	#define NegLn2loN __exp_data.negln2loN
				130	#define Shift __exp_data.shift
				131	#define T __exp_data.tab
				132	#define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
				133	#define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
				134	#define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
				135	#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
				136	#define C6 __exp_data.poly[9 - EXP_POLY_ORDER]
				137
				138	static inline double
				139	specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
				140	{
				141	double_t scale, y;
				142
				143	if ((ki & 0x80000000) == 0)
				144	{
				145	/* k > 0, the exponent of scale might have overflowed by <= 85. */
				146	sbits -= 97ull << 52;
				147	scale = asdouble (sbits);
				148	y = 0x1p97 * (scale + scale * tmp);
				149	return check_oflow (y);
				150	}
				151	/* k < 0, need special care in the subnormal range. */
				152	sbits += 1022ull << 52;
				153	/* Note: sbits is signed scale. */
				154	scale = asdouble (sbits);
				155	y = scale + scale * tmp;
				156	if (fabs (y) < 1.0)
				157	{
				158	/* Round y to the right precision before scaling it into the subnormal
				159	range to avoid double rounding that can cause 0.5+E/2 ulp error where
				160	E is the worst-case ulp error outside the subnormal range. So this
				161	is only useful if the goal is better than 1 ulp worst-case error. */
				162	double_t hi, lo, one = 1.0;
				163	if (y < 0.0)
				164	one = -1.0;
				165	lo = scale - y + scale * tmp;
				166	hi = one + y;
				167	lo = one - hi + y + lo;
				168	y = eval_as_double (hi + lo) - one;
				169	/* Avoid -0.0 with downward rounding. */
				170	if (WANT_ROUNDING && y == 0.0)
				171	y = asdouble (sbits & 0x8000000000000000);
				172	/* The underflow exception needs to be signaled explicitly. */
				173	force_eval_double (0x1p-1022 * 0x1p-1022);
				174	}
				175	y = 0x1p-1022 * y;
				176	return check_uflow (y);
				177	}
				178
				179	#define SIGN_BIAS (0x800 << EXP_TABLE_BITS)
				180
				181	static inline double
				182	exp_inline (double x, double xtail, uint32_t sign_bias)
				183	{
				184	uint32_t abstop;
				185	uint64_t ki, idx, top, sbits;
				186	/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
				187	double_t kd, z, r, r2, scale, tail, tmp;
				188
				189	abstop = top16 (x) & 0x7fff;
				190	if (unlikely (abstop - top16 (0x1p-54) >= top16 (704.0) - top16 (0x1p-54)))
				191	{
				192	if (abstop - top16 (0x1p-54) >= 0x80000000)
				193	{
				194	/* Avoid spurious underflow for tiny x. */
				195	/* Note: 0 is common input. */
				196	double_t one = WANT_ROUNDING ? 1.0 + x : 1.0;
				197	return sign_bias ? -one : one;
				198	}
				199	if (abstop >= top16 (768.0))
				200	{
				201	/* Note: inf and nan are already handled. */
				202	if (asuint64 (x) >> 63)
				203	return __math_uflow (sign_bias);
				204	else
				205	return __math_oflow (sign_bias);
				206	}
				207	/* Large x is special cased below. */
				208	abstop = 0;
				209	}
				210
				211	/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
				212	/* x = ln2/Nk + r, with int k and r in [-ln2/2N, ln2/2N]. /
				213	z = InvLn2N * x;
				214	#if TOINT_INTRINSICS
				215	kd = roundtoint (z);
				216	ki = converttoint (z);
				217	#elif EXP_USE_TOINT_NARROW
				218	/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
				219	kd = eval_as_double (z + Shift);
				220	ki = asuint64 (kd) >> 16;
				221	kd = (double_t) (int32_t) ki;
				222	#else
				223	/* z - kd is in [-1, 1] in non-nearest rounding modes. */
				224	kd = eval_as_double (z + Shift);
				225	ki = asuint64 (kd);
				226	kd -= Shift;
				227	#endif
				228	r = x + kdNegLn2hiN + kdNegLn2loN;
				229	r += xtail;
				230	/* 2^(k/N) ~= scale * (1 + tail). */
				231	idx = 2*(ki % N);
				232	top = (ki + sign_bias) << (52 - EXP_TABLE_BITS);
				233	tail = asdouble (T[idx]);
				234	/* This is only a valid scale when -1023N < k < 1024N. */
				235	sbits = T[idx + 1] + top;
				236	/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
				237	/* Evaluation is optimized assuming superscalar pipelined execution. */
				238	r2 = r*r;
				239	/* Without fma the worst case error is 0.25/N ulp larger. */
				240	/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
				241	#if EXP_POLY_ORDER == 4
				242	tmp = tail + r + r2C2 + rr2(C3 + rC4);
				243	#elif EXP_POLY_ORDER == 5
				244	tmp = tail + r + r2(C2 + rC3) + r2r2(C4 + r*C5);
				245	#elif EXP_POLY_ORDER == 6
				246	tmp = tail + r + r2(0.5 + rC3) + r2r2(C4 + rC5 + r2C6);
				247	#endif
				248	if (unlikely (abstop == 0))
				249	return specialcase (tmp, sbits, ki);
				250	scale = asdouble (sbits);
				251	return scale + scale * tmp;
				252	}
				253
				254	/* Returns 0 if not int, 1 if odd int, 2 if even int. */
				255	static inline int
				256	checkint (uint64_t iy)
				257	{
				258	int e = iy >> 52 & 0x7ff;
				259	if (e < 0x3ff)
				260	return 0;
				261	if (e > 0x3ff + 52)
				262	return 2;
				263	if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
				264	return 0;
				265	if (iy & (1ULL << (0x3ff + 52 - e)))
				266	return 1;
				267	return 2;
				268	}
				269
				270	static inline int
				271	zeroinfnan (uint64_t i)
				272	{
				273	return 2 * i - 1 >= 2 * asuint64 (INFINITY) - 1;
				274	}
				275
				276	double
				277	pow (double x, double y)
				278	{
				279	uint64_t sign_bias = 0;
				280	uint64_t ix, iy;
				281	uint32_t topx, topy;
				282
				283	ix = asuint64 (x);
				284	iy = asuint64 (y);
				285	topx = ix >> 52;
				286	topy = iy >> 52;
				287	if (unlikely (topx - 0x001 >= 0x7ff - 0x001 \|\| (topy & 0x7ff) - 0x3be >= 0x43e - 0x3be))
				288	{
				289	/* Note: if \|y\| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
				290	and if \|y\| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */
				291	/* Special cases: (x < 0x1p-126 or inf or nan) or
				292	(\|y\| < 0x1p-65 or \|y\| >= 0x1p63 or nan). */
				293	if (unlikely (zeroinfnan (iy)))
				294	{
				295	if (2 * iy == 0)
				296	return issignaling_inline (x) ? x + y : 1.0;
				297	if (ix == asuint64 (1.0))
				298	return issignaling_inline (y) ? x + y : 1.0;
				299	if (2 * ix > 2 * asuint64 (INFINITY) \|\| 2 * iy > 2 * asuint64 (INFINITY))
				300	return x + y;
				301	if (2 * ix == 2 * asuint64 (1.0))
				302	return 1.0;
				303	if ((2 * ix < 2 * asuint64 (1.0)) == !(iy >> 63))
				304	return 0.0; /* \|x\|<1 && y==inf or \|x\|>1 && y==-inf. */
				305	return y * y;
				306	}
				307	if (unlikely (zeroinfnan (ix)))
				308	{
				309	double_t x2 = x * x;
				310	if (ix >> 63 && checkint (iy) == 1)
				311	{
				312	x2 = -x2;
				313	sign_bias = 1;
				314	}
				315	if (WANT_ERRNO && 2 * ix == 0 && iy >> 63)
				316	return __math_divzero (sign_bias);
				317	return iy >> 63 ? 1 / x2 : x2;
				318	}
				319	/* Here x and y are non-zero finite. */
				320	if (ix >> 63)
				321	{
				322	/* Finite x < 0. */
				323	int yint = checkint (iy);
				324	if (yint == 0)
				325	return __math_invalid (x);
				326	if (yint == 1)
				327	sign_bias = SIGN_BIAS;
				328	ix &= 0x7fffffffffffffff;
				329	topx &= 0x7ff;
				330	}
				331	if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be)
				332	{
				333	/* Note: sign_bias == 0 here because y is not odd. */
				334	if (ix == asuint64 (1.0))
				335	return 1.0;
				336	if ((topy & 0x7ff) < 0x3be)
				337	{
				338	/* \|y\| < 2^-65, x^y ~= 1 + ylog(x). /
				339	if (WANT_ROUNDING)
				340	return ix > asuint64 (1.0) ? 1.0 + y : 1.0 - y;
				341	else
				342	return 1.0;
				343	}
				344	return (ix > asuint64 (1.0)) == (topy < 0x800)
				345	? __math_oflow (0) : __math_uflow (0);
				346	}
				347	if (topx == 0)
				348	{
				349	/* Normalize subnormal x so exponent becomes negative. */
				350	ix = asuint64 (x * 0x1p52);
				351	ix &= 0x7fffffffffffffff;
				352	ix -= 52ULL << 52;
				353	}
				354	}
				355
				356	double_t lo;
				357	double_t hi = log_inline (ix, &lo);
				358	double_t ehi, elo;
				359	#if HAVE_FAST_FMA
				360	ehi = y*hi;
				361	elo = y*lo + fma (y, hi, -ehi);
				362	#else
				363	double_t yhi = asdouble (iy & -1ULL << 27);
				364	double_t ylo = y - yhi;
				365	double_t lhi = asdouble (asuint64 (hi) & -1ULL << 27);
				366	double_t llo = hi - lhi + lo;
				367	ehi = yhi*lhi;
				368	elo = ylolhi + yllo; /* \|elo\| < \|ehi\| * 2^-25. */
				369	#endif
				370	return exp_inline (ehi, elo, sign_bias);
				371	}
Szabolcs Nagy	b7d568d	2018-06-06 12:26:56 +0100	[diff] [blame]	372	#if USE_GLIBC_ABI
				373	strong_alias (pow, __pow_finite)
				374	hidden_alias (pow, __ieee754_pow)
				375	#endif