Blame - src/math/expm1minus-scalar-rr2-lut16-p4.c - platform/external/XNNPACK

blob: 8a085bb6e6e3a4c9d477c961625b08de35642685 [file] [log] [blame]

Marat Dukhan	c60742b	2020-11-23 12:33:27 -0800	[diff] [blame]	1	// Copyright 2020 Google LLC
				2	//
				3	// This source code is licensed under the BSD-style license found in the
				4	// LICENSE file in the root directory of this source tree.
				5
				6	#include <assert.h>
				7	#include <stddef.h>
				8
				9	#include <xnnpack/common.h>
				10	#include <xnnpack/math-stubs.h>
				11
				12	#include <fp16/bitcasts.h>
				13
				14
				15	// Table of exp2(k / 16) values decremented (as integer) by (k << 19), k = 0..15
				16	extern XNN_INTERNAL const uint32_t xnn_table_exp2minus_k_over_16[16];
				17
				18	void xnn_math_f32_expm1minus__scalar_rr2_lut16_p4(
				19	size_t n,
				20	const float* input,
				21	float* output)
				22	{
				23	assert(n % (4 * sizeof(float)) == 0);
				24
				25	// Large number such that ulp(magic bias) == exp2(-4)
				26	const float vmagic_bias = 0x1.800000p19f;
				27	const float vlog2e = 0x1.715476p+0f;
				28	// Mask for the lowest 4 bits
				29	const uint32_t vindex_mask = UINT32_C(0xF);
				30	// The largest x for which expm1f(x) is saturated at -1.0f.
				31	const float vsat_cutoff = -0x1.154246p+4f;
Marat Dukhan	de390d4	2020-11-29 19:32:18 -0800	[diff] [blame]	32	// Last 9 bits are zeroes
				33	const float vminus_ln2_hi = -0x1.62E400p-1f;
				34	const float vminus_ln2_lo = -0x1.7F7D1Cp-20f;
Marat Dukhan	c60742b	2020-11-23 12:33:27 -0800	[diff] [blame]	35	// Coefficient of polynomial approximation
				36	// exp(t) - 1 ~ t * (1 + t * (c2 + t * (c3 + t * c4)))
				37	// on [-log(2)/32, log(2)/32]
				38	const float vc4 = 0x1.55563Ap-5f;
				39	const float vc3 = 0x1.555708p-3f;
				40	const float vc2 = 0x1.000000p-1f;
				41	const float vone = 1.0f;
				42
				43	for (; n != 0; n -= sizeof(float)) {
				44	float vx = *input++;
				45
				46	// Compute reduced argument n := round(x / log(2), 4).
				47	// We do it by adding a large number (magic bias), which cause rounding of the result to 4 fractional bits, then
				48	// subtracing the large number back. The trick with adding large number is valid only within certain bounds
				49	// (\|x / log(2)\| <= 2**18, i.e. \|x\| <= 0x1.62E43p+17 = 181704.375), but that is acceptable, because inputs x are
				50	// restricted to [-17.328680, 0].
				51	// Note that addition-subtraction of the large number doesn't cause overflow for inputs in this range.
				52	float vn = vx * vlog2e + vmagic_bias;
				53
				54	// Create a floating-point number s (scale) such that s := 2**n for valid inputs, i.e. -17.328680 <= x <= 0.0. As n
				55	// has 4 fractional bits, we split s == 2n = 2int(n) * 2**frac(n). We create s in two steps:
				56	// 1. Fetch 2**frac(n) from the table using the 4 low bits of n, as integer. Note that the fetched values are in
				57	// the [1.0, 2.0) range, i.e. their floating-point exponent is 0.
				58	// 2. Adjust fecthed value by addition of int(n) to its floating-point exponent. The result is always a normalized
				59	// number, because for -17.328680 <= x <= 0.0 we have -25 <= int(n) <= 0, and thus the adjusted exponent is not
				60	// lower than -25.
				61	//
				62	// Shift bits 4:12 into 23:31 (position of floating-point exponent).
Marat Dukhan	ed6baaf	2020-12-01 15:07:08 -0800	[diff] [blame]	63	const uint32_t ven = fp32_to_bits(vn) << 19;
Marat Dukhan	c60742b	2020-11-23 12:33:27 -0800	[diff] [blame]	64
				65	// Use bits 0:4 bits of n, as integer, as an index for table lookup of l := 2**frac(n).
				66	const uint32_t vidx = fp32_to_bits(vn) & vindex_mask;
				67	// Adjust exponent of the value l fetched from the table to get the final s value.
Marat Dukhan	ed6baaf	2020-12-01 15:07:08 -0800	[diff] [blame]	68	float vs = fp32_from_bits(xnn_table_exp2minus_k_over_16[vidx] + ven);
Marat Dukhan	c60742b	2020-11-23 12:33:27 -0800	[diff] [blame]	69
				70	// Subtract the large number back to get final n := round(x / log(2), 4).
				71	vn -= vmagic_bias;
				72
Marat Dukhan	c60742b	2020-11-23 12:33:27 -0800	[diff] [blame]	73	// Compute reduced argument t := x - n * log(2).
				74	// Use Cody-Waite range reduction method (note two constants to represent log(2)) to improve accuracy.
				75	float vt = vn * vminus_ln2_hi + vx;
				76	vt = vn * vminus_ln2_lo + vt;
				77
Marat Dukhan	e332dd6	2020-12-14 14:31:54 -0800	[diff] [blame]	78	// The function saturates at -1 for large negative inputs: expm1f(x) == -1.0f for x <= sat_cutoff ~= -17.328680.
				79	// To guarantee this behaviour, we zero out s (scale) and t (reduced argument) for x <= sat_cutoff.
				80	if XNN_UNPREDICTABLE(vx <= vsat_cutoff) {
				81	vs = 0.0f;
				82	vt = 0.0f;
				83	}
				84
Marat Dukhan	c60742b	2020-11-23 12:33:27 -0800	[diff] [blame]	85	// Compute degree-4 polynomial approximation for exp(t) - 1 on [-log(2)/32, log(2)/32].
				86	// P(t) = t * (1 + t * (c2 + t * (c3 + t * c4))) = t + t * (t * (c2 + t * (c3 + t * c4))) = t + t * p
				87	float vp = vc4 * vt + vc3;
				88	vp = vp * vt + vc2;
				89	vp *= vt;
				90
				91	// Reconstruct the exp(x) - 1 value:
Marat Dukhan	de390d4	2020-11-29 19:32:18 -0800	[diff] [blame]	92	// exp(x) - 1 = s * (1 + t * (1 + t * (c2 + t * (c3 + t * c4)))) - 1
Marat Dukhan	c60742b	2020-11-23 12:33:27 -0800	[diff] [blame]	93	// = (s - 1) + s * (t + t * p)
				94	// = ((t * s) + (t * s) * p) + (s - 1)
				95	vt *= vs;
				96	const float vsm1 = vs - vone;
				97	vp = vp * vt + vt;
				98	const float vf = vp + vsm1;
				99
				100	*output++ = vf;
				101	}
				102	}