Blame - celt/vq.c - platform/external/libopus

blob: d29f38fd8ef79015b63d8819a48d478ca8bce6e6 [file] [log] [blame]

Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	1	/* Copyright (c) 2007-2008 CSIRO
				2	Copyright (c) 2007-2009 Xiph.Org Foundation
				3	Written by Jean-Marc Valin */
				4	/*
				5	Redistribution and use in source and binary forms, with or without
				6	modification, are permitted provided that the following conditions
				7	are met:
				8
				9	- Redistributions of source code must retain the above copyright
				10	notice, this list of conditions and the following disclaimer.
				11
				12	- Redistributions in binary form must reproduce the above copyright
				13	notice, this list of conditions and the following disclaimer in the
				14	documentation and/or other materials provided with the distribution.
				15
				16	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				17	``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				18	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				19	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
				20	OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
				21	EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
				22	PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
				23	PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
				24	LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
				25	NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
				26	SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				27	*/
				28
				29	#ifdef HAVE_CONFIG_H
				30	#include "config.h"
				31	#endif
				32
				33	#include "mathops.h"
				34	#include "cwrs.h"
				35	#include "vq.h"
				36	#include "arch.h"
				37	#include "os_support.h"
				38	#include "bands.h"
				39	#include "rate.h"
flim	c91ee5b	2016-01-26 14:33:44 +0100	[diff] [blame]	40	#include "pitch.h"
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	41
flim	c91ee5b	2016-01-26 14:33:44 +0100	[diff] [blame]	42	#ifndef OVERRIDE_vq_exp_rotation1
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	43	static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
				44	{
				45	int i;
flim	c91ee5b	2016-01-26 14:33:44 +0100	[diff] [blame]	46	opus_val16 ms;
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	47	celt_norm *Xptr;
				48	Xptr = X;
flim	c91ee5b	2016-01-26 14:33:44 +0100	[diff] [blame]	49	ms = NEG16(s);
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	50	for (i=0;i<len-stride;i++)
				51	{
				52	celt_norm x1, x2;
				53	x1 = Xptr[0];
				54	x2 = Xptr[stride];
flim	c91ee5b	2016-01-26 14:33:44 +0100	[diff] [blame]	55	Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15));
				56	*Xptr++ = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	57	}
				58	Xptr = &X[len-2*stride-1];
				59	for (i=len-2*stride-1;i>=0;i--)
				60	{
				61	celt_norm x1, x2;
				62	x1 = Xptr[0];
				63	x2 = Xptr[stride];
flim	c91ee5b	2016-01-26 14:33:44 +0100	[diff] [blame]	64	Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15));
				65	*Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	66	}
				67	}
flim	c91ee5b	2016-01-26 14:33:44 +0100	[diff] [blame]	68	#endif /* OVERRIDE_vq_exp_rotation1 */
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	69
				70	static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
				71	{
				72	static const int SPREAD_FACTOR[3]={15,10,5};
				73	int i;
				74	opus_val16 c, s;
				75	opus_val16 gain, theta;
				76	int stride2=0;
				77	int factor;
				78
				79	if (2*K>=len \|\| spread==SPREAD_NONE)
				80	return;
				81	factor = SPREAD_FACTOR[spread-1];
				82
				83	gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K));
				84	theta = HALF16(MULT16_16_Q15(gain,gain));
				85
				86	c = celt_cos_norm(EXTEND32(theta));
				87	s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /* sin(theta) */
				88
				89	if (len>=8*stride)
				90	{
				91	stride2 = 1;
				92	/* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding.
				93	It's basically incrementing long as (stride2+0.5)^2 < len/stride. */
				94	while ((stride2stride2+stride2)stride + (stride>>2) < len)
				95	stride2++;
				96	}
				97	/*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
				98	extract_collapse_mask().*/
flim	c91ee5b	2016-01-26 14:33:44 +0100	[diff] [blame]	99	len = celt_udiv(len, stride);
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	100	for (i=0;i<stride;i++)
				101	{
				102	if (dir < 0)
				103	{
				104	if (stride2)
				105	exp_rotation1(X+i*len, len, stride2, s, c);
				106	exp_rotation1(X+i*len, len, 1, c, s);
				107	} else {
				108	exp_rotation1(X+i*len, len, 1, c, -s);
				109	if (stride2)
				110	exp_rotation1(X+i*len, len, stride2, s, -c);
				111	}
				112	}
				113	}
				114
				115	/** Takes the pitch vector and the decoded residual vector, computes the gain
				116	that will give \|\|p+gy\|\|=1 and mixes the residual with the pitch. /
				117	static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X,
				118	int N, opus_val32 Ryy, opus_val16 gain)
				119	{
				120	int i;
				121	#ifdef FIXED_POINT
				122	int k;
				123	#endif
				124	opus_val32 t;
				125	opus_val16 g;
				126
				127	#ifdef FIXED_POINT
				128	k = celt_ilog2(Ryy)>>1;
				129	#endif
				130	t = VSHR32(Ryy, 2*(k-7));
				131	g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
				132
				133	i=0;
				134	do
				135	X[i] = EXTRACT16(PSHR32(MULT16_16(g, iy[i]), k+1));
				136	while (++i < N);
				137	}
				138
				139	static unsigned extract_collapse_mask(int *iy, int N, int B)
				140	{
				141	unsigned collapse_mask;
				142	int N0;
				143	int i;
				144	if (B<=1)
				145	return 1;
				146	/*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
				147	exp_rotation().*/
flim	c91ee5b	2016-01-26 14:33:44 +0100	[diff] [blame]	148	N0 = celt_udiv(N, B);
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	149	collapse_mask = 0;
				150	i=0; do {
				151	int j;
flim	c91ee5b	2016-01-26 14:33:44 +0100	[diff] [blame]	152	unsigned tmp=0;
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	153	j=0; do {
flim	c91ee5b	2016-01-26 14:33:44 +0100	[diff] [blame]	154	tmp \|= iy[i*N0+j];
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	155	} while (++j<N0);
flim	c91ee5b	2016-01-26 14:33:44 +0100	[diff] [blame]	156	collapse_mask \|= (tmp!=0)<<i;
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	157	} while (++i<B);
				158	return collapse_mask;
				159	}
				160
				161	unsigned alg_quant(celt_norm X, int N, int K, int spread, int B, ec_enc enc
				162	#ifdef RESYNTH
				163	, opus_val16 gain
				164	#endif
				165	)
				166	{
				167	VARDECL(celt_norm, y);
				168	VARDECL(int, iy);
				169	VARDECL(opus_val16, signx);
				170	int i, j;
				171	opus_val16 s;
				172	int pulsesLeft;
				173	opus_val32 sum;
				174	opus_val32 xy;
				175	opus_val16 yy;
				176	unsigned collapse_mask;
				177	SAVE_STACK;
				178
				179	celt_assert2(K>0, "alg_quant() needs at least one pulse");
				180	celt_assert2(N>1, "alg_quant() needs at least two dimensions");
				181
				182	ALLOC(y, N, celt_norm);
				183	ALLOC(iy, N, int);
				184	ALLOC(signx, N, opus_val16);
				185
				186	exp_rotation(X, N, 1, B, K, spread);
				187
				188	/* Get rid of the sign */
				189	sum = 0;
				190	j=0; do {
				191	if (X[j]>0)
				192	signx[j]=1;
				193	else {
				194	signx[j]=-1;
				195	X[j]=-X[j];
				196	}
				197	iy[j] = 0;
				198	y[j] = 0;
				199	} while (++j<N);
				200
				201	xy = yy = 0;
				202
				203	pulsesLeft = K;
				204
				205	/* Do a pre-search by projecting on the pyramid */
				206	if (K > (N>>1))
				207	{
				208	opus_val16 rcp;
				209	j=0; do {
				210	sum += X[j];
				211	} while (++j<N);
				212
				213	/* If X is too small, just replace it with a pulse at 0 */
				214	#ifdef FIXED_POINT
				215	if (sum <= K)
				216	#else
				217	/* Prevents infinities and NaNs from causing too many pulses
				218	to be allocated. 64 is an approximation of infinity here. */
				219	if (!(sum > EPSILON && sum < 64))
				220	#endif
				221	{
				222	X[0] = QCONST16(1.f,14);
				223	j=1; do
				224	X[j]=0;
				225	while (++j<N);
				226	sum = QCONST16(1.f,14);
				227	}
				228	rcp = EXTRACT16(MULT16_32_Q16(K-1, celt_rcp(sum)));
				229	j=0; do {
				230	#ifdef FIXED_POINT
				231	/* It's really important to round towards zero here */
				232	iy[j] = MULT16_16_Q15(X[j],rcp);
				233	#else
				234	iy[j] = (int)floor(rcp*X[j]);
				235	#endif
				236	y[j] = (celt_norm)iy[j];
				237	yy = MAC16_16(yy, y[j],y[j]);
				238	xy = MAC16_16(xy, X[j],y[j]);
				239	y[j] *= 2;
				240	pulsesLeft -= iy[j];
				241	} while (++j<N);
				242	}
				243	celt_assert2(pulsesLeft>=1, "Allocated too many pulses in the quick pass");
				244
				245	/* This should never happen, but just in case it does (e.g. on silence)
				246	we fill the first bin with pulses. */
				247	#ifdef FIXED_POINT_DEBUG
				248	celt_assert2(pulsesLeft<=N+3, "Not enough pulses in the quick pass");
				249	#endif
				250	if (pulsesLeft > N+3)
				251	{
				252	opus_val16 tmp = (opus_val16)pulsesLeft;
				253	yy = MAC16_16(yy, tmp, tmp);
				254	yy = MAC16_16(yy, tmp, y[0]);
				255	iy[0] += pulsesLeft;
				256	pulsesLeft=0;
				257	}
				258
				259	s = 1;
				260	for (i=0;i<pulsesLeft;i++)
				261	{
				262	int best_id;
				263	opus_val32 best_num = -VERY_LARGE16;
				264	opus_val16 best_den = 0;
				265	#ifdef FIXED_POINT
				266	int rshift;
				267	#endif
				268	#ifdef FIXED_POINT
				269	rshift = 1+celt_ilog2(K-pulsesLeft+i+1);
				270	#endif
				271	best_id = 0;
				272	/* The squared magnitude term gets added anyway, so we might as well
				273	add it outside the loop */
Felicia Lim	d03c373	2016-07-25 20:28:37 +0200	[diff] [blame]	274	yy = ADD16(yy, 1);
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	275	j=0;
				276	do {
				277	opus_val16 Rxy, Ryy;
				278	/* Temporary sums of the new pulse(s) */
				279	Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift));
				280	/* We're multiplying y[j] by two so we don't have to do it here */
				281	Ryy = ADD16(yy, y[j]);
				282
				283	/* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
				284	Rxy is positive because the sign is pre-computed) */
				285	Rxy = MULT16_16_Q15(Rxy,Rxy);
				286	/* The idea is to check for num/den >= best_num/best_den, but that way
				287	we can do it without any division */
				288	/* OPT: Make sure to use conditional moves here */
				289	if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num))
				290	{
				291	best_den = Ryy;
				292	best_num = Rxy;
				293	best_id = j;
				294	}
				295	} while (++j<N);
				296
				297	/* Updating the sums of the new pulse(s) */
				298	xy = ADD32(xy, EXTEND32(X[best_id]));
				299	/* We're multiplying y[j] by two so we don't have to do it here */
				300	yy = ADD16(yy, y[best_id]);
				301
				302	/* Only now that we've made the final choice, update y/iy */
				303	/* Multiplying y[j] by 2 so we don't have to do it everywhere else */
				304	y[best_id] += 2*s;
				305	iy[best_id]++;
				306	}
				307
				308	/* Put the original sign back */
				309	j=0;
				310	do {
				311	X[j] = MULT16_16(signx[j],X[j]);
				312	if (signx[j] < 0)
				313	iy[j] = -iy[j];
				314	} while (++j<N);
				315	encode_pulses(iy, N, K, enc);
				316
				317	#ifdef RESYNTH
				318	normalise_residual(iy, X, N, yy, gain);
				319	exp_rotation(X, N, -1, B, K, spread);
				320	#endif
				321
				322	collapse_mask = extract_collapse_mask(iy, N, B);
				323	RESTORE_STACK;
				324	return collapse_mask;
				325	}
				326
				327	/** Decode pulse vector and combine the result with the pitch vector to produce
				328	the final normalised signal in the current band. */
				329	unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
				330	ec_dec *dec, opus_val16 gain)
				331	{
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	332	opus_val32 Ryy;
				333	unsigned collapse_mask;
				334	VARDECL(int, iy);
				335	SAVE_STACK;
				336
				337	celt_assert2(K>0, "alg_unquant() needs at least one pulse");
				338	celt_assert2(N>1, "alg_unquant() needs at least two dimensions");
				339	ALLOC(iy, N, int);
flim	c91ee5b	2016-01-26 14:33:44 +0100	[diff] [blame]	340	Ryy = decode_pulses(iy, N, K, dec);
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	341	normalise_residual(iy, X, N, Ryy, gain);
				342	exp_rotation(X, N, -1, B, K, spread);
				343	collapse_mask = extract_collapse_mask(iy, N, B);
				344	RESTORE_STACK;
				345	return collapse_mask;
				346	}
				347
flim	c91ee5b	2016-01-26 14:33:44 +0100	[diff] [blame]	348	#ifndef OVERRIDE_renormalise_vector
				349	void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch)
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	350	{
				351	int i;
				352	#ifdef FIXED_POINT
				353	int k;
				354	#endif
flim	c91ee5b	2016-01-26 14:33:44 +0100	[diff] [blame]	355	opus_val32 E;
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	356	opus_val16 g;
				357	opus_val32 t;
flim	c91ee5b	2016-01-26 14:33:44 +0100	[diff] [blame]	358	celt_norm *xptr;
				359	E = EPSILON + celt_inner_prod(X, X, N, arch);
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	360	#ifdef FIXED_POINT
				361	k = celt_ilog2(E)>>1;
				362	#endif
				363	t = VSHR32(E, 2*(k-7));
				364	g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
				365
				366	xptr = X;
				367	for (i=0;i<N;i++)
				368	{
				369	xptr = EXTRACT16(PSHR32(MULT16_16(g, xptr), k+1));
				370	xptr++;
				371	}
				372	/return celt_sqrt(E);/
				373	}
flim	c91ee5b	2016-01-26 14:33:44 +0100	[diff] [blame]	374	#endif /* OVERRIDE_renormalise_vector */
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	375
flim	c91ee5b	2016-01-26 14:33:44 +0100	[diff] [blame]	376	int stereo_itheta(const celt_norm X, const celt_norm Y, int stereo, int N, int arch)
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	377	{
				378	int i;
				379	int itheta;
				380	opus_val16 mid, side;
				381	opus_val32 Emid, Eside;
				382
				383	Emid = Eside = EPSILON;
				384	if (stereo)
				385	{
				386	for (i=0;i<N;i++)
				387	{
				388	celt_norm m, s;
				389	m = ADD16(SHR16(X[i],1),SHR16(Y[i],1));
				390	s = SUB16(SHR16(X[i],1),SHR16(Y[i],1));
				391	Emid = MAC16_16(Emid, m, m);
				392	Eside = MAC16_16(Eside, s, s);
				393	}
				394	} else {
flim	c91ee5b	2016-01-26 14:33:44 +0100	[diff] [blame]	395	Emid += celt_inner_prod(X, X, N, arch);
				396	Eside += celt_inner_prod(Y, Y, N, arch);
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	397	}
				398	mid = celt_sqrt(Emid);
				399	side = celt_sqrt(Eside);
				400	#ifdef FIXED_POINT
				401	/* 0.63662 = 2/pi */
				402	itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid));
				403	#else
				404	itheta = (int)floor(.5f+163840.63662fatan2(side,mid));
				405	#endif
				406
				407	return itheta;
				408	}