Blame - celt/mdct.c - fp2-dev/platform/external/chromium_org/third_party/opus/src

blob: 90a214ad0e617a7d258f2aa61a07739d6ef9d7c4 [file] [log] [blame]

sergeyu@chromium.org	885f2ff	2012-10-17 22:31:52 +0000	[diff] [blame]	1	/* Copyright (c) 2007-2008 CSIRO
				2	Copyright (c) 2007-2008 Xiph.Org Foundation
				3	Written by Jean-Marc Valin */
				4	/*
				5	Redistribution and use in source and binary forms, with or without
				6	modification, are permitted provided that the following conditions
				7	are met:
				8
				9	- Redistributions of source code must retain the above copyright
				10	notice, this list of conditions and the following disclaimer.
				11
				12	- Redistributions in binary form must reproduce the above copyright
				13	notice, this list of conditions and the following disclaimer in the
				14	documentation and/or other materials provided with the distribution.
				15
				16	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				17	``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				18	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				19	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
				20	OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
				21	EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
				22	PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
				23	PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
				24	LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
				25	NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
				26	SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				27	*/
				28
				29	/* This is a simple MDCT implementation that uses a N/4 complex FFT
				30	to do most of the work. It should be relatively straightforward to
				31	plug in pretty much and FFT here.
				32
				33	This replaces the Vorbis FFT (and uses the exact same API), which
				34	was a bit too messy and that was ending up duplicating code
				35	(might as well use the same FFT everywhere).
				36
				37	The algorithm is similar to (and inspired from) Fabrice Bellard's
				38	MDCT implementation in FFMPEG, but has differences in signs, ordering
				39	and scaling in many places.
				40	*/
				41
				42	#ifndef SKIP_CONFIG_H
				43	#ifdef HAVE_CONFIG_H
				44	#include "config.h"
				45	#endif
				46	#endif
				47
				48	#include "mdct.h"
				49	#include "kiss_fft.h"
				50	#include "_kiss_fft_guts.h"
				51	#include <math.h>
				52	#include "os_support.h"
				53	#include "mathops.h"
				54	#include "stack_alloc.h"
				55
				56	#ifdef CUSTOM_MODES
				57
				58	int clt_mdct_init(mdct_lookup *l,int N, int maxshift)
				59	{
				60	int i;
				61	int N4;
				62	kiss_twiddle_scalar *trig;
				63	#if defined(FIXED_POINT)
				64	int N2=N>>1;
				65	#endif
				66	l->n = N;
				67	N4 = N>>2;
				68	l->maxshift = maxshift;
				69	for (i=0;i<=maxshift;i++)
				70	{
				71	if (i==0)
				72	l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0);
				73	else
				74	l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0]);
				75	#ifndef ENABLE_TI_DSPLIB55
				76	if (l->kfft[i]==NULL)
				77	return 0;
				78	#endif
				79	}
				80	l->trig = trig = (kiss_twiddle_scalar)opus_alloc((N4+1)sizeof(kiss_twiddle_scalar));
				81	if (l->trig==NULL)
				82	return 0;
				83	/* We have enough points that sine isn't necessary */
				84	#if defined(FIXED_POINT)
				85	for (i=0;i<=N4;i++)
				86	trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2),N));
				87	#else
				88	for (i=0;i<=N4;i++)
				89	trig[i] = (kiss_twiddle_scalar)cos(2PIi/N);
				90	#endif
				91	return 1;
				92	}
				93
				94	void clt_mdct_clear(mdct_lookup *l)
				95	{
				96	int i;
				97	for (i=0;i<=l->maxshift;i++)
				98	opus_fft_free(l->kfft[i]);
				99	opus_free((kiss_twiddle_scalar*)l->trig);
				100	}
				101
				102	#endif /* CUSTOM_MODES */
				103
				104	/* Forward MDCT trashes the input array */
				105	void clt_mdct_forward(const mdct_lookup l, kiss_fft_scalar in, kiss_fft_scalar * OPUS_RESTRICT out,
				106	const opus_val16 *window, int overlap, int shift, int stride)
				107	{
				108	int i;
				109	int N, N2, N4;
				110	kiss_twiddle_scalar sine;
				111	VARDECL(kiss_fft_scalar, f);
tlegrand@chromium.org	e3ea049	2013-10-23 09:13:50 +0000	[diff] [blame]	112	VARDECL(kiss_fft_scalar, f2);
sergeyu@chromium.org	885f2ff	2012-10-17 22:31:52 +0000	[diff] [blame]	113	SAVE_STACK;
				114	N = l->n;
				115	N >>= shift;
				116	N2 = N>>1;
				117	N4 = N>>2;
				118	ALLOC(f, N2, kiss_fft_scalar);
tlegrand@chromium.org	e3ea049	2013-10-23 09:13:50 +0000	[diff] [blame]	119	ALLOC(f2, N2, kiss_fft_scalar);
sergeyu@chromium.org	885f2ff	2012-10-17 22:31:52 +0000	[diff] [blame]	120	/* sin(x) ~= x here */
				121	#ifdef FIXED_POINT
				122	sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
				123	#else
				124	sine = (kiss_twiddle_scalar)2PI(.125f)/N;
				125	#endif
				126
				127	/* Consider the input to be composed of four blocks: [a, b, c, d] */
				128	/* Window, shuffle, fold */
				129	{
				130	/* Temp pointers to make it really clear to the compiler what we're doing */
				131	const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
				132	const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
				133	kiss_fft_scalar * OPUS_RESTRICT yp = f;
				134	const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
				135	const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
tlegrand@chromium.org	e3ea049	2013-10-23 09:13:50 +0000	[diff] [blame]	136	for(i=0;i<((overlap+3)>>2);i++)
sergeyu@chromium.org	885f2ff	2012-10-17 22:31:52 +0000	[diff] [blame]	137	{
				138	/* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
				139	yp++ = MULT16_32_Q15(wp2, xp1[N2]) + MULT16_32_Q15(wp1,xp2);
				140	yp++ = MULT16_32_Q15(wp1, xp1) - MULT16_32_Q15(wp2, xp2[-N2]);
				141	xp1+=2;
				142	xp2-=2;
				143	wp1+=2;
				144	wp2-=2;
				145	}
				146	wp1 = window;
				147	wp2 = window+overlap-1;
tlegrand@chromium.org	e3ea049	2013-10-23 09:13:50 +0000	[diff] [blame]	148	for(;i<N4-((overlap+3)>>2);i++)
sergeyu@chromium.org	885f2ff	2012-10-17 22:31:52 +0000	[diff] [blame]	149	{
				150	/* Real part arranged as a-bR, Imag part arranged as -c-dR */
				151	yp++ = xp2;
				152	yp++ = xp1;
				153	xp1+=2;
				154	xp2-=2;
				155	}
				156	for(;i<N4;i++)
				157	{
				158	/* Real part arranged as a-bR, Imag part arranged as -c-dR */
				159	yp++ = -MULT16_32_Q15(wp1, xp1[-N2]) + MULT16_32_Q15(wp2, xp2);
				160	yp++ = MULT16_32_Q15(wp2, xp1) + MULT16_32_Q15(wp1, xp2[N2]);
				161	xp1+=2;
				162	xp2-=2;
				163	wp1+=2;
				164	wp2-=2;
				165	}
				166	}
				167	/* Pre-rotation */
				168	{
				169	kiss_fft_scalar * OPUS_RESTRICT yp = f;
				170	const kiss_twiddle_scalar *t = &l->trig[0];
				171	for(i=0;i<N4;i++)
				172	{
				173	kiss_fft_scalar re, im, yr, yi;
				174	re = yp[0];
				175	im = yp[1];
				176	yr = -S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]);
				177	yi = -S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]);
				178	/* works because the cos is nearly one */
				179	*yp++ = yr + S_MUL(yi,sine);
				180	*yp++ = yi - S_MUL(yr,sine);
				181	}
				182	}
				183
				184	/* N/4 complex FFT, down-scales by 4/N */
tlegrand@chromium.org	e3ea049	2013-10-23 09:13:50 +0000	[diff] [blame]	185	opus_fft(l->kfft[shift], (kiss_fft_cpx )f, (kiss_fft_cpx )f2);
sergeyu@chromium.org	885f2ff	2012-10-17 22:31:52 +0000	[diff] [blame]	186
				187	/* Post-rotate */
				188	{
				189	/* Temp pointers to make it really clear to the compiler what we're doing */
tlegrand@chromium.org	e3ea049	2013-10-23 09:13:50 +0000	[diff] [blame]	190	const kiss_fft_scalar * OPUS_RESTRICT fp = f2;
sergeyu@chromium.org	885f2ff	2012-10-17 22:31:52 +0000	[diff] [blame]	191	kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
				192	kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
				193	const kiss_twiddle_scalar *t = &l->trig[0];
				194	/* Temp pointers to make it really clear to the compiler what we're doing */
				195	for(i=0;i<N4;i++)
				196	{
				197	kiss_fft_scalar yr, yi;
				198	yr = S_MUL(fp[1],t[(N4-i)<<shift]) + S_MUL(fp[0],t[i<<shift]);
				199	yi = S_MUL(fp[0],t[(N4-i)<<shift]) - S_MUL(fp[1],t[i<<shift]);
				200	/* works because the cos is nearly one */
				201	*yp1 = yr - S_MUL(yi,sine);
				202	*yp2 = yi + S_MUL(yr,sine);;
				203	fp += 2;
				204	yp1 += 2*stride;
				205	yp2 -= 2*stride;
				206	}
				207	}
				208	RESTORE_STACK;
				209	}
				210
				211	void clt_mdct_backward(const mdct_lookup l, kiss_fft_scalar in, kiss_fft_scalar * OPUS_RESTRICT out,
				212	const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride)
				213	{
				214	int i;
				215	int N, N2, N4;
				216	kiss_twiddle_scalar sine;
sergeyu@chromium.org	885f2ff	2012-10-17 22:31:52 +0000	[diff] [blame]	217	VARDECL(kiss_fft_scalar, f2);
				218	SAVE_STACK;
				219	N = l->n;
				220	N >>= shift;
				221	N2 = N>>1;
				222	N4 = N>>2;
sergeyu@chromium.org	885f2ff	2012-10-17 22:31:52 +0000	[diff] [blame]	223	ALLOC(f2, N2, kiss_fft_scalar);
				224	/* sin(x) ~= x here */
				225	#ifdef FIXED_POINT
				226	sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
				227	#else
				228	sine = (kiss_twiddle_scalar)2PI(.125f)/N;
				229	#endif
				230
				231	/* Pre-rotate */
				232	{
				233	/* Temp pointers to make it really clear to the compiler what we're doing */
				234	const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
				235	const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
				236	kiss_fft_scalar * OPUS_RESTRICT yp = f2;
				237	const kiss_twiddle_scalar *t = &l->trig[0];
				238	for(i=0;i<N4;i++)
				239	{
				240	kiss_fft_scalar yr, yi;
				241	yr = -S_MUL(xp2, t[i<<shift]) + S_MUL(xp1,t[(N4-i)<<shift]);
				242	yi = -S_MUL(xp2, t[(N4-i)<<shift]) - S_MUL(xp1,t[i<<shift]);
				243	/* works because the cos is nearly one */
				244	*yp++ = yr - S_MUL(yi,sine);
				245	*yp++ = yi + S_MUL(yr,sine);
				246	xp1+=2*stride;
				247	xp2-=2*stride;
				248	}
				249	}
				250
				251	/* Inverse N/4 complex FFT. This one should not downscale even in fixed-point */
tlegrand@chromium.org	e3ea049	2013-10-23 09:13:50 +0000	[diff] [blame]	252	opus_ifft(l->kfft[shift], (kiss_fft_cpx )f2, (kiss_fft_cpx )(out+(overlap>>1)));
sergeyu@chromium.org	885f2ff	2012-10-17 22:31:52 +0000	[diff] [blame]	253
tlegrand@chromium.org	e3ea049	2013-10-23 09:13:50 +0000	[diff] [blame]	254	/* Post-rotate and de-shuffle from both ends of the buffer at once to make
				255	it in-place. */
sergeyu@chromium.org	885f2ff	2012-10-17 22:31:52 +0000	[diff] [blame]	256	{
tlegrand@chromium.org	e3ea049	2013-10-23 09:13:50 +0000	[diff] [blame]	257	kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1);
				258	kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2;
sergeyu@chromium.org	885f2ff	2012-10-17 22:31:52 +0000	[diff] [blame]	259	const kiss_twiddle_scalar *t = &l->trig[0];
tlegrand@chromium.org	e3ea049	2013-10-23 09:13:50 +0000	[diff] [blame]	260	/* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
				261	middle pair will be computed twice. */
				262	for(i=0;i<(N4+1)>>1;i++)
sergeyu@chromium.org	885f2ff	2012-10-17 22:31:52 +0000	[diff] [blame]	263	{
				264	kiss_fft_scalar re, im, yr, yi;
tlegrand@chromium.org	e3ea049	2013-10-23 09:13:50 +0000	[diff] [blame]	265	kiss_twiddle_scalar t0, t1;
				266	re = yp0[0];
				267	im = yp0[1];
				268	t0 = t[i<<shift];
				269	t1 = t[(N4-i)<<shift];
sergeyu@chromium.org	885f2ff	2012-10-17 22:31:52 +0000	[diff] [blame]	270	/* We'd scale up by 2 here, but instead it's done when mixing the windows */
tlegrand@chromium.org	e3ea049	2013-10-23 09:13:50 +0000	[diff] [blame]	271	yr = S_MUL(re,t0) - S_MUL(im,t1);
				272	yi = S_MUL(im,t0) + S_MUL(re,t1);
				273	re = yp1[0];
				274	im = yp1[1];
sergeyu@chromium.org	885f2ff	2012-10-17 22:31:52 +0000	[diff] [blame]	275	/* works because the cos is nearly one */
tlegrand@chromium.org	e3ea049	2013-10-23 09:13:50 +0000	[diff] [blame]	276	yp0[0] = -(yr - S_MUL(yi,sine));
				277	yp1[1] = yi + S_MUL(yr,sine);
				278
				279	t0 = t[(N4-i-1)<<shift];
				280	t1 = t[(i+1)<<shift];
				281	/* We'd scale up by 2 here, but instead it's done when mixing the windows */
				282	yr = S_MUL(re,t0) - S_MUL(im,t1);
				283	yi = S_MUL(im,t0) + S_MUL(re,t1);
				284	/* works because the cos is nearly one */
				285	yp1[0] = -(yr - S_MUL(yi,sine));
				286	yp0[1] = yi + S_MUL(yr,sine);
				287	yp0 += 2;
				288	yp1 -= 2;
sergeyu@chromium.org	885f2ff	2012-10-17 22:31:52 +0000	[diff] [blame]	289	}
				290	}
tlegrand@chromium.org	e3ea049	2013-10-23 09:13:50 +0000	[diff] [blame]	291
sergeyu@chromium.org	885f2ff	2012-10-17 22:31:52 +0000	[diff] [blame]	292	/* Mirror on both sides for TDAC */
				293	{
tlegrand@chromium.org	e3ea049	2013-10-23 09:13:50 +0000	[diff] [blame]	294	kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
				295	kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
sergeyu@chromium.org	885f2ff	2012-10-17 22:31:52 +0000	[diff] [blame]	296	const opus_val16 * OPUS_RESTRICT wp1 = window;
				297	const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
tlegrand@chromium.org	e3ea049	2013-10-23 09:13:50 +0000	[diff] [blame]	298
				299	for(i = 0; i < overlap/2; i++)
sergeyu@chromium.org	885f2ff	2012-10-17 22:31:52 +0000	[diff] [blame]	300	{
tlegrand@chromium.org	e3ea049	2013-10-23 09:13:50 +0000	[diff] [blame]	301	kiss_fft_scalar x1, x2;
				302	x1 = *xp1;
				303	x2 = *yp1;
				304	yp1++ = MULT16_32_Q15(wp2, x2) - MULT16_32_Q15(*wp1, x1);
				305	xp1-- = MULT16_32_Q15(wp1, x2) + MULT16_32_Q15(*wp2, x1);
sergeyu@chromium.org	885f2ff	2012-10-17 22:31:52 +0000	[diff] [blame]	306	wp1++;
				307	wp2--;
				308	}
				309	}
				310	RESTORE_STACK;
				311	}