Blame - celt/mathops.c - platform/external/libopus

blob: 21a01f52e4321e0ff20aef9b440f67269889f188 [file] [log] [blame]

Jean-Marc Valin	48923ae	2010-07-23 17:28:50 -0400	[diff] [blame]	1	/* Copyright (c) 2002-2008 Jean-Marc Valin
				2	Copyright (c) 2007-2008 CSIRO
				3	Copyright (c) 2007-2009 Xiph.Org Foundation
				4	Written by Jean-Marc Valin */
				5	/**
				6	@file mathops.h
				7	@brief Various math functions
				8	*/
				9	/*
				10	Redistribution and use in source and binary forms, with or without
				11	modification, are permitted provided that the following conditions
				12	are met:
				13
				14	- Redistributions of source code must retain the above copyright
				15	notice, this list of conditions and the following disclaimer.
				16
				17	- Redistributions in binary form must reproduce the above copyright
				18	notice, this list of conditions and the following disclaimer in the
				19	documentation and/or other materials provided with the distribution.
				20
Jean-Marc Valin	48923ae	2010-07-23 17:28:50 -0400	[diff] [blame]	21	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				22	``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				23	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
Jean-Marc Valin	cb05e7c	2012-04-20 16:40:24 -0400	[diff] [blame]	24	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
				25	OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
Jean-Marc Valin	48923ae	2010-07-23 17:28:50 -0400	[diff] [blame]	26	EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
				27	PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
				28	PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
				29	LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
				30	NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
				31	SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				32	*/
				33
				34	#ifdef HAVE_CONFIG_H
				35	#include "config.h"
				36	#endif
				37
				38	#include "mathops.h"
				39
Timothy B. Terriberry	68242ac	2010-07-27 15:09:51 -0700	[diff] [blame]	40	/*Compute floor(sqrt(_val)) with exact arithmetic.
				41	This has been tested on all possible 32-bit inputs.*/
Jean-Marc Valin	d77d6a5	2011-07-29 17:33:06 -0400	[diff] [blame]	42	unsigned isqrt32(opus_uint32 _val){
Timothy B. Terriberry	68242ac	2010-07-27 15:09:51 -0700	[diff] [blame]	43	unsigned b;
				44	unsigned g;
				45	int bshift;
				46	/*Uses the second method from
				47	http://www.azillionmonkeys.com/qed/sqroot.html
				48	The main idea is to search for the largest binary digit b such that
				49	(g+b)(g+b) <= _val, and add it to the solution g./
				50	g=0;
Gregory Maxwell	75d2780	2011-08-30 14:02:41 -0400	[diff] [blame]	51	bshift=(EC_ILOG(_val)-1)>>1;
Timothy B. Terriberry	68242ac	2010-07-27 15:09:51 -0700	[diff] [blame]	52	b=1U<<bshift;
				53	do{
Jean-Marc Valin	d77d6a5	2011-07-29 17:33:06 -0400	[diff] [blame]	54	opus_uint32 t;
Gregory Maxwell	75d2780	2011-08-30 14:02:41 -0400	[diff] [blame]	55	t=(((opus_uint32)g<<1)+b)<<bshift;
Timothy B. Terriberry	68242ac	2010-07-27 15:09:51 -0700	[diff] [blame]	56	if(t<=_val){
				57	g+=b;
				58	_val-=t;
				59	}
				60	b>>=1;
				61	bshift--;
				62	}
				63	while(bshift>=0);
				64	return g;
				65	}
				66
Jean-Marc Valin	48923ae	2010-07-23 17:28:50 -0400	[diff] [blame]	67	#ifdef FIXED_POINT
				68
Jean-Marc Valin	ff5f722	2011-07-29 18:59:12 -0400	[diff] [blame]	69	opus_val32 frac_div32(opus_val32 a, opus_val32 b)
Jean-Marc Valin	48923ae	2010-07-23 17:28:50 -0400	[diff] [blame]	70	{
Jean-Marc Valin	ff5f722	2011-07-29 18:59:12 -0400	[diff] [blame]	71	opus_val16 rcp;
				72	opus_val32 result, rem;
Jean-Marc Valin	1ad93cf	2010-11-06 22:02:32 -0400	[diff] [blame]	73	int shift = celt_ilog2(b)-29;
				74	a = VSHR32(a,shift);
				75	b = VSHR32(b,shift);
Jean-Marc Valin	48923ae	2010-07-23 17:28:50 -0400	[diff] [blame]	76	/* 16-bit reciprocal */
Jean-Marc Valin	35095c6	2010-11-04 13:24:44 -0400	[diff] [blame]	77	rcp = ROUND16(celt_rcp(ROUND16(b,16)),3);
Jean-Marc Valin	367c394	2012-04-12 11:07:21 -0400	[diff] [blame]	78	result = MULT16_32_Q15(rcp, a);
				79	rem = PSHR32(a,2)-MULT32_32_Q31(result, b);
				80	result = ADD32(result, SHL32(MULT16_32_Q15(rcp, rem),2));
				81	if (result >= 536870912) /* 2^29 */
				82	return 2147483647; /* 2^31 - 1 */
				83	else if (result <= -536870912) /* -2^29 */
				84	return -2147483647; /* -2^31 */
				85	else
				86	return SHL32(result, 2);
Jean-Marc Valin	48923ae	2010-07-23 17:28:50 -0400	[diff] [blame]	87	}
				88
				89	/** Reciprocal sqrt approximation in the range [0.25,1) (Q16 in, Q14 out) */
Jean-Marc Valin	ff5f722	2011-07-29 18:59:12 -0400	[diff] [blame]	90	opus_val16 celt_rsqrt_norm(opus_val32 x)
Jean-Marc Valin	48923ae	2010-07-23 17:28:50 -0400	[diff] [blame]	91	{
Jean-Marc Valin	ff5f722	2011-07-29 18:59:12 -0400	[diff] [blame]	92	opus_val16 n;
				93	opus_val16 r;
				94	opus_val16 r2;
				95	opus_val16 y;
Jean-Marc Valin	48923ae	2010-07-23 17:28:50 -0400	[diff] [blame]	96	/* Range of n is [-16384,32767] ([-0.5,1) in Q15). */
				97	n = x-32768;
				98	/* Get a rough initial guess for the root.
				99	The optimal minimax quadratic approximation (using relative error) is
				100	r = 1.437799046117536+n(-0.823394375837328+n0.4096419668459485).
				101	Coefficients here, and the final result r, are Q14.*/
				102	r = ADD16(23557, MULT16_16_Q15(n, ADD16(-13490, MULT16_16_Q15(n, 6713))));
				103	/* We want y = xrr-1 in Q15, but x is 32-bit Q16 and r is Q14.
				104	We can compute the result from n and r using Q15 multiplies with some
				105	adjustment, carefully done to avoid overflow.
				106	Range of y is [-1564,1594]. */
				107	r2 = MULT16_16_Q15(r, r);
				108	y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1);
				109	/* Apply a 2nd-order Householder iteration: r += ry(y*0.375-0.5).
				110	This yields the Q14 reciprocal square root of the Q16 x, with a maximum
				111	relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a
				112	peak absolute error of 2.26591/16384. */
				113	return ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y,
				114	SUB16(MULT16_16_Q15(y, 12288), 16384))));
				115	}
				116
				117	/** Sqrt approximation (QX input, QX/2 output) */
Jean-Marc Valin	ff5f722	2011-07-29 18:59:12 -0400	[diff] [blame]	118	opus_val32 celt_sqrt(opus_val32 x)
Jean-Marc Valin	48923ae	2010-07-23 17:28:50 -0400	[diff] [blame]	119	{
				120	int k;
Jean-Marc Valin	ff5f722	2011-07-29 18:59:12 -0400	[diff] [blame]	121	opus_val16 n;
				122	opus_val32 rt;
				123	static const opus_val16 C[5] = {23175, 11561, -3011, 1699, -664};
Jean-Marc Valin	48923ae	2010-07-23 17:28:50 -0400	[diff] [blame]	124	if (x==0)
				125	return 0;
Jean-Marc Valin	c3273ed	2012-11-29 16:51:06 -0500	[diff] [blame]	126	else if (x>=1073741824)
				127	return 32767;
Jean-Marc Valin	48923ae	2010-07-23 17:28:50 -0400	[diff] [blame]	128	k = (celt_ilog2(x)>>1)-7;
Jean-Marc Valin	0da4f23	2011-09-25 00:11:46 -0400	[diff] [blame]	129	x = VSHR32(x, 2*k);
Jean-Marc Valin	48923ae	2010-07-23 17:28:50 -0400	[diff] [blame]	130	n = x-32768;
				131	rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2],
				132	MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4])))))))));
				133	rt = VSHR32(rt,7-k);
				134	return rt;
				135	}
				136
				137	#define L1 32767
				138	#define L2 -7651
				139	#define L3 8277
				140	#define L4 -626
				141
Gregory Maxwell	7830cf1	2013-10-17 15:56:52 -0700	[diff] [blame]	142	static OPUS_INLINE opus_val16 _celt_cos_pi_2(opus_val16 x)
Jean-Marc Valin	48923ae	2010-07-23 17:28:50 -0400	[diff] [blame]	143	{
Jean-Marc Valin	ff5f722	2011-07-29 18:59:12 -0400	[diff] [blame]	144	opus_val16 x2;
Jean-Marc Valin	48923ae	2010-07-23 17:28:50 -0400	[diff] [blame]	145
				146	x2 = MULT16_16_P15(x,x);
				147	return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2
				148	))))))));
				149	}
				150
				151	#undef L1
				152	#undef L2
				153	#undef L3
				154	#undef L4
				155
Jean-Marc Valin	ff5f722	2011-07-29 18:59:12 -0400	[diff] [blame]	156	opus_val16 celt_cos_norm(opus_val32 x)
Jean-Marc Valin	48923ae	2010-07-23 17:28:50 -0400	[diff] [blame]	157	{
				158	x = x&0x0001ffff;
				159	if (x>SHL32(EXTEND32(1), 16))
				160	x = SUB32(SHL32(EXTEND32(1), 17),x);
				161	if (x&0x00007fff)
				162	{
				163	if (x<SHL32(EXTEND32(1), 15))
				164	{
				165	return _celt_cos_pi_2(EXTRACT16(x));
				166	} else {
Jean-Marc Valin	b66080a	2016-06-20 12:11:05 -0400	[diff] [blame^]	167	return NEG16(_celt_cos_pi_2(EXTRACT16(65536-x)));
Jean-Marc Valin	48923ae	2010-07-23 17:28:50 -0400	[diff] [blame]	168	}
				169	} else {
				170	if (x&0x0000ffff)
				171	return 0;
				172	else if (x&0x0001ffff)
				173	return -32767;
				174	else
				175	return 32767;
				176	}
				177	}
				178
				179	/** Reciprocal approximation (Q15 input, Q16 output) */
Jean-Marc Valin	ff5f722	2011-07-29 18:59:12 -0400	[diff] [blame]	180	opus_val32 celt_rcp(opus_val32 x)
Jean-Marc Valin	48923ae	2010-07-23 17:28:50 -0400	[diff] [blame]	181	{
				182	int i;
Jean-Marc Valin	ff5f722	2011-07-29 18:59:12 -0400	[diff] [blame]	183	opus_val16 n;
				184	opus_val16 r;
Jean-Marc Valin	48923ae	2010-07-23 17:28:50 -0400	[diff] [blame]	185	celt_assert2(x>0, "celt_rcp() only defined for positive values");
				186	i = celt_ilog2(x);
				187	/* n is Q15 with range [0,1). */
				188	n = VSHR32(x,i-15)-32768;
				189	/* Start with a linear approximation:
				190	r = 1.8823529411764706-0.9411764705882353*n.
				191	The coefficients and the result are Q14 in the range [15420,30840].*/
				192	r = ADD16(30840, MULT16_16_Q15(-15420, n));
				193	/* Perform two Newton iterations:
				194	r -= r((rn)-1.Q15)
				195	= r((rn)+(r-1.Q15)). */
				196	r = SUB16(r, MULT16_16_Q15(r,
				197	ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768))));
				198	/* We subtract an extra 1 in the second iteration to avoid overflow; it also
				199	neatly compensates for truncation error in the rest of the process. */
				200	r = SUB16(r, ADD16(1, MULT16_16_Q15(r,
				201	ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768)))));
				202	/* r is now the Q15 solution to 2/(n+1), with a maximum relative error
				203	of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute
				204	error of 1.24665/32768. */
				205	return VSHR32(EXTEND32(r),i-16);
				206	}
				207
				208	#endif