Blame - celt/vq.c - fp2-dev/platform/external/chromium_org/third_party/opus/src

blob: 98a0f36c9f0f2d3003c9eda5568a3871bb4918fa [file] [log] [blame]

sergeyu@chromium.org	885f2ff	2012-10-17 22:31:52 +0000	[diff] [blame]	1	/* Copyright (c) 2007-2008 CSIRO
				2	Copyright (c) 2007-2009 Xiph.Org Foundation
				3	Written by Jean-Marc Valin */
				4	/*
				5	Redistribution and use in source and binary forms, with or without
				6	modification, are permitted provided that the following conditions
				7	are met:
				8
				9	- Redistributions of source code must retain the above copyright
				10	notice, this list of conditions and the following disclaimer.
				11
				12	- Redistributions in binary form must reproduce the above copyright
				13	notice, this list of conditions and the following disclaimer in the
				14	documentation and/or other materials provided with the distribution.
				15
				16	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				17	``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				18	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				19	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
				20	OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
				21	EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
				22	PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
				23	PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
				24	LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
				25	NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
				26	SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				27	*/
				28
				29	#ifdef HAVE_CONFIG_H
				30	#include "config.h"
				31	#endif
				32
				33	#include "mathops.h"
				34	#include "cwrs.h"
				35	#include "vq.h"
				36	#include "arch.h"
				37	#include "os_support.h"
				38	#include "bands.h"
				39	#include "rate.h"
				40
				41	static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
				42	{
				43	int i;
				44	celt_norm *Xptr;
				45	Xptr = X;
				46	for (i=0;i<len-stride;i++)
				47	{
				48	celt_norm x1, x2;
				49	x1 = Xptr[0];
				50	x2 = Xptr[stride];
				51	Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15));
				52	*Xptr++ = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15));
				53	}
				54	Xptr = &X[len-2*stride-1];
				55	for (i=len-2*stride-1;i>=0;i--)
				56	{
				57	celt_norm x1, x2;
				58	x1 = Xptr[0];
				59	x2 = Xptr[stride];
				60	Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15));
				61	*Xptr-- = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15));
				62	}
				63	}
				64
				65	static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
				66	{
				67	static const int SPREAD_FACTOR[3]={15,10,5};
				68	int i;
				69	opus_val16 c, s;
				70	opus_val16 gain, theta;
				71	int stride2=0;
				72	int factor;
				73
				74	if (2*K>=len \|\| spread==SPREAD_NONE)
				75	return;
				76	factor = SPREAD_FACTOR[spread-1];
				77
				78	gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K));
				79	theta = HALF16(MULT16_16_Q15(gain,gain));
				80
				81	c = celt_cos_norm(EXTEND32(theta));
				82	s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /* sin(theta) */
				83
				84	if (len>=8*stride)
				85	{
				86	stride2 = 1;
				87	/* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding.
				88	It's basically incrementing long as (stride2+0.5)^2 < len/stride. */
				89	while ((stride2stride2+stride2)stride + (stride>>2) < len)
				90	stride2++;
				91	}
				92	/*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
				93	extract_collapse_mask().*/
				94	len /= stride;
				95	for (i=0;i<stride;i++)
				96	{
				97	if (dir < 0)
				98	{
				99	if (stride2)
				100	exp_rotation1(X+i*len, len, stride2, s, c);
				101	exp_rotation1(X+i*len, len, 1, c, s);
				102	} else {
				103	exp_rotation1(X+i*len, len, 1, c, -s);
				104	if (stride2)
				105	exp_rotation1(X+i*len, len, stride2, s, -c);
				106	}
				107	}
				108	}
				109
				110	/** Takes the pitch vector and the decoded residual vector, computes the gain
				111	that will give \|\|p+gy\|\|=1 and mixes the residual with the pitch. /
				112	static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X,
				113	int N, opus_val32 Ryy, opus_val16 gain)
				114	{
				115	int i;
				116	#ifdef FIXED_POINT
				117	int k;
				118	#endif
				119	opus_val32 t;
				120	opus_val16 g;
				121
				122	#ifdef FIXED_POINT
				123	k = celt_ilog2(Ryy)>>1;
				124	#endif
				125	t = VSHR32(Ryy, 2*(k-7));
				126	g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
				127
				128	i=0;
				129	do
				130	X[i] = EXTRACT16(PSHR32(MULT16_16(g, iy[i]), k+1));
				131	while (++i < N);
				132	}
				133
				134	static unsigned extract_collapse_mask(int *iy, int N, int B)
				135	{
				136	unsigned collapse_mask;
				137	int N0;
				138	int i;
				139	if (B<=1)
				140	return 1;
				141	/*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
				142	exp_rotation().*/
				143	N0 = N/B;
				144	collapse_mask = 0;
				145	i=0; do {
				146	int j;
				147	j=0; do {
				148	collapse_mask \|= (iy[i*N0+j]!=0)<<i;
				149	} while (++j<N0);
				150	} while (++i<B);
				151	return collapse_mask;
				152	}
				153
				154	unsigned alg_quant(celt_norm X, int N, int K, int spread, int B, ec_enc enc
				155	#ifdef RESYNTH
				156	, opus_val16 gain
				157	#endif
				158	)
				159	{
				160	VARDECL(celt_norm, y);
				161	VARDECL(int, iy);
				162	VARDECL(opus_val16, signx);
				163	int i, j;
				164	opus_val16 s;
				165	int pulsesLeft;
				166	opus_val32 sum;
				167	opus_val32 xy;
				168	opus_val16 yy;
				169	unsigned collapse_mask;
				170	SAVE_STACK;
				171
				172	celt_assert2(K>0, "alg_quant() needs at least one pulse");
				173	celt_assert2(N>1, "alg_quant() needs at least two dimensions");
				174
				175	ALLOC(y, N, celt_norm);
				176	ALLOC(iy, N, int);
				177	ALLOC(signx, N, opus_val16);
				178
				179	exp_rotation(X, N, 1, B, K, spread);
				180
				181	/* Get rid of the sign */
				182	sum = 0;
				183	j=0; do {
				184	if (X[j]>0)
				185	signx[j]=1;
				186	else {
				187	signx[j]=-1;
				188	X[j]=-X[j];
				189	}
				190	iy[j] = 0;
				191	y[j] = 0;
				192	} while (++j<N);
				193
				194	xy = yy = 0;
				195
				196	pulsesLeft = K;
				197
				198	/* Do a pre-search by projecting on the pyramid */
				199	if (K > (N>>1))
				200	{
				201	opus_val16 rcp;
				202	j=0; do {
				203	sum += X[j];
				204	} while (++j<N);
				205
				206	/* If X is too small, just replace it with a pulse at 0 */
				207	#ifdef FIXED_POINT
				208	if (sum <= K)
				209	#else
				210	/* Prevents infinities and NaNs from causing too many pulses
				211	to be allocated. 64 is an approximation of infinity here. */
				212	if (!(sum > EPSILON && sum < 64))
				213	#endif
				214	{
				215	X[0] = QCONST16(1.f,14);
				216	j=1; do
				217	X[j]=0;
				218	while (++j<N);
				219	sum = QCONST16(1.f,14);
				220	}
				221	rcp = EXTRACT16(MULT16_32_Q16(K-1, celt_rcp(sum)));
				222	j=0; do {
				223	#ifdef FIXED_POINT
				224	/* It's really important to round towards zero here */
				225	iy[j] = MULT16_16_Q15(X[j],rcp);
				226	#else
				227	iy[j] = (int)floor(rcp*X[j]);
				228	#endif
				229	y[j] = (celt_norm)iy[j];
				230	yy = MAC16_16(yy, y[j],y[j]);
				231	xy = MAC16_16(xy, X[j],y[j]);
				232	y[j] *= 2;
				233	pulsesLeft -= iy[j];
				234	} while (++j<N);
				235	}
				236	celt_assert2(pulsesLeft>=1, "Allocated too many pulses in the quick pass");
				237
				238	/* This should never happen, but just in case it does (e.g. on silence)
				239	we fill the first bin with pulses. */
				240	#ifdef FIXED_POINT_DEBUG
				241	celt_assert2(pulsesLeft<=N+3, "Not enough pulses in the quick pass");
				242	#endif
				243	if (pulsesLeft > N+3)
				244	{
				245	opus_val16 tmp = (opus_val16)pulsesLeft;
				246	yy = MAC16_16(yy, tmp, tmp);
				247	yy = MAC16_16(yy, tmp, y[0]);
				248	iy[0] += pulsesLeft;
				249	pulsesLeft=0;
				250	}
				251
				252	s = 1;
				253	for (i=0;i<pulsesLeft;i++)
				254	{
				255	int best_id;
				256	opus_val32 best_num = -VERY_LARGE16;
				257	opus_val16 best_den = 0;
				258	#ifdef FIXED_POINT
				259	int rshift;
				260	#endif
				261	#ifdef FIXED_POINT
				262	rshift = 1+celt_ilog2(K-pulsesLeft+i+1);
				263	#endif
				264	best_id = 0;
				265	/* The squared magnitude term gets added anyway, so we might as well
				266	add it outside the loop */
				267	yy = ADD32(yy, 1);
				268	j=0;
				269	do {
				270	opus_val16 Rxy, Ryy;
				271	/* Temporary sums of the new pulse(s) */
				272	Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift));
				273	/* We're multiplying y[j] by two so we don't have to do it here */
				274	Ryy = ADD16(yy, y[j]);
				275
				276	/* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
				277	Rxy is positive because the sign is pre-computed) */
				278	Rxy = MULT16_16_Q15(Rxy,Rxy);
				279	/* The idea is to check for num/den >= best_num/best_den, but that way
				280	we can do it without any division */
				281	/* OPT: Make sure to use conditional moves here */
				282	if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num))
				283	{
				284	best_den = Ryy;
				285	best_num = Rxy;
				286	best_id = j;
				287	}
				288	} while (++j<N);
				289
				290	/* Updating the sums of the new pulse(s) */
				291	xy = ADD32(xy, EXTEND32(X[best_id]));
				292	/* We're multiplying y[j] by two so we don't have to do it here */
				293	yy = ADD16(yy, y[best_id]);
				294
				295	/* Only now that we've made the final choice, update y/iy */
				296	/* Multiplying y[j] by 2 so we don't have to do it everywhere else */
				297	y[best_id] += 2*s;
				298	iy[best_id]++;
				299	}
				300
				301	/* Put the original sign back */
				302	j=0;
				303	do {
				304	X[j] = MULT16_16(signx[j],X[j]);
				305	if (signx[j] < 0)
				306	iy[j] = -iy[j];
				307	} while (++j<N);
				308	encode_pulses(iy, N, K, enc);
				309
				310	#ifdef RESYNTH
				311	normalise_residual(iy, X, N, yy, gain);
				312	exp_rotation(X, N, -1, B, K, spread);
				313	#endif
				314
				315	collapse_mask = extract_collapse_mask(iy, N, B);
				316	RESTORE_STACK;
				317	return collapse_mask;
				318	}
				319
				320	/** Decode pulse vector and combine the result with the pitch vector to produce
				321	the final normalised signal in the current band. */
				322	unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
				323	ec_dec *dec, opus_val16 gain)
				324	{
				325	int i;
				326	opus_val32 Ryy;
				327	unsigned collapse_mask;
				328	VARDECL(int, iy);
				329	SAVE_STACK;
				330
				331	celt_assert2(K>0, "alg_unquant() needs at least one pulse");
				332	celt_assert2(N>1, "alg_unquant() needs at least two dimensions");
				333	ALLOC(iy, N, int);
				334	decode_pulses(iy, N, K, dec);
				335	Ryy = 0;
				336	i=0;
				337	do {
				338	Ryy = MAC16_16(Ryy, iy[i], iy[i]);
				339	} while (++i < N);
				340	normalise_residual(iy, X, N, Ryy, gain);
				341	exp_rotation(X, N, -1, B, K, spread);
				342	collapse_mask = extract_collapse_mask(iy, N, B);
				343	RESTORE_STACK;
				344	return collapse_mask;
				345	}
				346
				347	void renormalise_vector(celt_norm *X, int N, opus_val16 gain)
				348	{
				349	int i;
				350	#ifdef FIXED_POINT
				351	int k;
				352	#endif
				353	opus_val32 E = EPSILON;
				354	opus_val16 g;
				355	opus_val32 t;
				356	celt_norm *xptr = X;
				357	for (i=0;i<N;i++)
				358	{
				359	E = MAC16_16(E, xptr, xptr);
				360	xptr++;
				361	}
				362	#ifdef FIXED_POINT
				363	k = celt_ilog2(E)>>1;
				364	#endif
				365	t = VSHR32(E, 2*(k-7));
				366	g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
				367
				368	xptr = X;
				369	for (i=0;i<N;i++)
				370	{
				371	xptr = EXTRACT16(PSHR32(MULT16_16(g, xptr), k+1));
				372	xptr++;
				373	}
				374	/return celt_sqrt(E);/
				375	}
				376
				377	int stereo_itheta(celt_norm X, celt_norm Y, int stereo, int N)
				378	{
				379	int i;
				380	int itheta;
				381	opus_val16 mid, side;
				382	opus_val32 Emid, Eside;
				383
				384	Emid = Eside = EPSILON;
				385	if (stereo)
				386	{
				387	for (i=0;i<N;i++)
				388	{
				389	celt_norm m, s;
				390	m = ADD16(SHR16(X[i],1),SHR16(Y[i],1));
				391	s = SUB16(SHR16(X[i],1),SHR16(Y[i],1));
				392	Emid = MAC16_16(Emid, m, m);
				393	Eside = MAC16_16(Eside, s, s);
				394	}
				395	} else {
				396	for (i=0;i<N;i++)
				397	{
				398	celt_norm m, s;
				399	m = X[i];
				400	s = Y[i];
				401	Emid = MAC16_16(Emid, m, m);
				402	Eside = MAC16_16(Eside, s, s);
				403	}
				404	}
				405	mid = celt_sqrt(Emid);
				406	side = celt_sqrt(Eside);
				407	#ifdef FIXED_POINT
				408	/* 0.63662 = 2/pi */
				409	itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid));
				410	#else
				411	itheta = (int)floor(.5f+163840.63662fatan2(side,mid));
				412	#endif
				413
				414	return itheta;
				415	}