Blame - src/analysis.c - platform/external/libopus

blob: 360ebcc8ddbeb7255693f92f619a4e71d837ed52 [file] [log] [blame]

Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	1	/* Copyright (c) 2011 Xiph.Org Foundation
				2	Written by Jean-Marc Valin */
				3	/*
				4	Redistribution and use in source and binary forms, with or without
				5	modification, are permitted provided that the following conditions
				6	are met:
				7
				8	- Redistributions of source code must retain the above copyright
				9	notice, this list of conditions and the following disclaimer.
				10
				11	- Redistributions in binary form must reproduce the above copyright
				12	notice, this list of conditions and the following disclaimer in the
				13	documentation and/or other materials provided with the distribution.
				14
				15	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				16	``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				17	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				18	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
				19	CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
				20	EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
				21	PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
				22	PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
				23	LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
				24	NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
				25	SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				26	*/
				27
				28	#ifdef HAVE_CONFIG_H
				29	#include "config.h"
				30	#endif
				31
				32	#include "kiss_fft.h"
				33	#include "celt.h"
				34	#include "modes.h"
				35	#include "arch.h"
				36	#include "quant_bands.h"
				37	#include <stdio.h>
				38	#include "analysis.h"
				39	#include "mlp.h"
				40	#include "stack_alloc.h"
				41
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	42	#ifndef M_PI
				43	#define M_PI 3.141592653
				44	#endif
				45
				46	static const float dct_table[128] = {
				47	0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f,
				48	0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f,
				49	0.351851f, 0.338330f, 0.311806f, 0.273300f, 0.224292f, 0.166664f, 0.102631f, 0.034654f,
				50	-0.034654f,-0.102631f,-0.166664f,-0.224292f,-0.273300f,-0.311806f,-0.338330f,-0.351851f,
				51	0.346760f, 0.293969f, 0.196424f, 0.068975f,-0.068975f,-0.196424f,-0.293969f,-0.346760f,
				52	-0.346760f,-0.293969f,-0.196424f,-0.068975f, 0.068975f, 0.196424f, 0.293969f, 0.346760f,
				53	0.338330f, 0.224292f, 0.034654f,-0.166664f,-0.311806f,-0.351851f,-0.273300f,-0.102631f,
				54	0.102631f, 0.273300f, 0.351851f, 0.311806f, 0.166664f,-0.034654f,-0.224292f,-0.338330f,
				55	0.326641f, 0.135299f,-0.135299f,-0.326641f,-0.326641f,-0.135299f, 0.135299f, 0.326641f,
				56	0.326641f, 0.135299f,-0.135299f,-0.326641f,-0.326641f,-0.135299f, 0.135299f, 0.326641f,
				57	0.311806f, 0.034654f,-0.273300f,-0.338330f,-0.102631f, 0.224292f, 0.351851f, 0.166664f,
				58	-0.166664f,-0.351851f,-0.224292f, 0.102631f, 0.338330f, 0.273300f,-0.034654f,-0.311806f,
				59	0.293969f,-0.068975f,-0.346760f,-0.196424f, 0.196424f, 0.346760f, 0.068975f,-0.293969f,
				60	-0.293969f, 0.068975f, 0.346760f, 0.196424f,-0.196424f,-0.346760f,-0.068975f, 0.293969f,
				61	0.273300f,-0.166664f,-0.338330f, 0.034654f, 0.351851f, 0.102631f,-0.311806f,-0.224292f,
				62	0.224292f, 0.311806f,-0.102631f,-0.351851f,-0.034654f, 0.338330f, 0.166664f,-0.273300f,
				63	};
				64
				65	static const float analysis_window[240] = {
				66	0.000043f, 0.000171f, 0.000385f, 0.000685f, 0.001071f, 0.001541f, 0.002098f, 0.002739f,
				67	0.003466f, 0.004278f, 0.005174f, 0.006156f, 0.007222f, 0.008373f, 0.009607f, 0.010926f,
				68	0.012329f, 0.013815f, 0.015385f, 0.017037f, 0.018772f, 0.020590f, 0.022490f, 0.024472f,
				69	0.026535f, 0.028679f, 0.030904f, 0.033210f, 0.035595f, 0.038060f, 0.040604f, 0.043227f,
				70	0.045928f, 0.048707f, 0.051564f, 0.054497f, 0.057506f, 0.060591f, 0.063752f, 0.066987f,
				71	0.070297f, 0.073680f, 0.077136f, 0.080665f, 0.084265f, 0.087937f, 0.091679f, 0.095492f,
				72	0.099373f, 0.103323f, 0.107342f, 0.111427f, 0.115579f, 0.119797f, 0.124080f, 0.128428f,
				73	0.132839f, 0.137313f, 0.141849f, 0.146447f, 0.151105f, 0.155823f, 0.160600f, 0.165435f,
				74	0.170327f, 0.175276f, 0.180280f, 0.185340f, 0.190453f, 0.195619f, 0.200838f, 0.206107f,
				75	0.211427f, 0.216797f, 0.222215f, 0.227680f, 0.233193f, 0.238751f, 0.244353f, 0.250000f,
				76	0.255689f, 0.261421f, 0.267193f, 0.273005f, 0.278856f, 0.284744f, 0.290670f, 0.296632f,
				77	0.302628f, 0.308658f, 0.314721f, 0.320816f, 0.326941f, 0.333097f, 0.339280f, 0.345492f,
				78	0.351729f, 0.357992f, 0.364280f, 0.370590f, 0.376923f, 0.383277f, 0.389651f, 0.396044f,
				79	0.402455f, 0.408882f, 0.415325f, 0.421783f, 0.428254f, 0.434737f, 0.441231f, 0.447736f,
				80	0.454249f, 0.460770f, 0.467298f, 0.473832f, 0.480370f, 0.486912f, 0.493455f, 0.500000f,
				81	0.506545f, 0.513088f, 0.519630f, 0.526168f, 0.532702f, 0.539230f, 0.545751f, 0.552264f,
				82	0.558769f, 0.565263f, 0.571746f, 0.578217f, 0.584675f, 0.591118f, 0.597545f, 0.603956f,
				83	0.610349f, 0.616723f, 0.623077f, 0.629410f, 0.635720f, 0.642008f, 0.648271f, 0.654508f,
				84	0.660720f, 0.666903f, 0.673059f, 0.679184f, 0.685279f, 0.691342f, 0.697372f, 0.703368f,
				85	0.709330f, 0.715256f, 0.721144f, 0.726995f, 0.732807f, 0.738579f, 0.744311f, 0.750000f,
				86	0.755647f, 0.761249f, 0.766807f, 0.772320f, 0.777785f, 0.783203f, 0.788573f, 0.793893f,
				87	0.799162f, 0.804381f, 0.809547f, 0.814660f, 0.819720f, 0.824724f, 0.829673f, 0.834565f,
				88	0.839400f, 0.844177f, 0.848895f, 0.853553f, 0.858151f, 0.862687f, 0.867161f, 0.871572f,
				89	0.875920f, 0.880203f, 0.884421f, 0.888573f, 0.892658f, 0.896677f, 0.900627f, 0.904508f,
				90	0.908321f, 0.912063f, 0.915735f, 0.919335f, 0.922864f, 0.926320f, 0.929703f, 0.933013f,
				91	0.936248f, 0.939409f, 0.942494f, 0.945503f, 0.948436f, 0.951293f, 0.954072f, 0.956773f,
				92	0.959396f, 0.961940f, 0.964405f, 0.966790f, 0.969096f, 0.971321f, 0.973465f, 0.975528f,
				93	0.977510f, 0.979410f, 0.981228f, 0.982963f, 0.984615f, 0.986185f, 0.987671f, 0.989074f,
				94	0.990393f, 0.991627f, 0.992778f, 0.993844f, 0.994826f, 0.995722f, 0.996534f, 0.997261f,
				95	0.997902f, 0.998459f, 0.998929f, 0.999315f, 0.999615f, 0.999829f, 0.999957f, 1.000000f,
				96	};
				97
				98	static const int tbands[NB_TBANDS+1] = {
				99	2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120
				100	};
				101
				102	static const int extra_bands[NB_TOT_BANDS+1] = {
				103	1, 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120, 160, 200
				104	};
				105
				106	/*static const float tweight[NB_TBANDS+1] = {
				107	.3, .4, .5, .6, .7, .8, .9, 1., 1., 1., 1., 1., 1., 1., .8, .7, .6, .5
				108	};*/
				109
				110	#define NB_TONAL_SKIP_BANDS 9
				111
				112	#define cA 0.43157974f
				113	#define cB 0.67848403f
				114	#define cC 0.08595542f
				115	#define cE ((float)M_PI/2)
				116	static OPUS_INLINE float fast_atan2f(float y, float x) {
				117	float x2, y2;
				118	/* Should avoid underflow on the values we'll get */
				119	if (ABS16(x)+ABS16(y)<1e-9f)
				120	{
				121	x*=1e12f;
				122	y*=1e12f;
				123	}
				124	x2 = x*x;
				125	y2 = y*y;
				126	if(x2<y2){
				127	float den = (y2 + cBx2) (y2 + cC*x2);
				128	if (den!=0)
				129	return -xy(y2 + cA*x2) / den + (y<0 ? -cE : cE);
				130	else
				131	return (y<0 ? -cE : cE);
				132	}else{
				133	float den = (x2 + cBy2) (x2 + cC*y2);
				134	if (den!=0)
				135	return xy(x2 + cAy2) / den + (y<0 ? -cE : cE) - (xy<0 ? -cE : cE);
				136	else
				137	return (y<0 ? -cE : cE) - (x*y<0 ? -cE : cE);
				138	}
				139	}
				140
flim	c91ee5b	2016-01-26 14:33:44 +0100	[diff] [blame]	141	void tonality_analysis_init(TonalityAnalysisState *tonal)
				142	{
				143	/* Initialize reusable fields. */
				144	tonal->arch = opus_select_arch();
				145	/* Clear remaining fields. */
				146	tonality_analysis_reset(tonal);
				147	}
				148
				149	void tonality_analysis_reset(TonalityAnalysisState *tonal)
				150	{
				151	/* Clear non-reusable fields. */
				152	char start = (char)&tonal->TONALITY_ANALYSIS_RESET_START;
				153	OPUS_CLEAR(start, sizeof(TonalityAnalysisState) - (start - (char*)tonal));
				154	}
				155
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	156	void tonality_get_info(TonalityAnalysisState tonal, AnalysisInfo info_out, int len)
				157	{
				158	int pos;
				159	int curr_lookahead;
				160	float psum;
				161	int i;
				162
				163	pos = tonal->read_pos;
				164	curr_lookahead = tonal->write_pos-tonal->read_pos;
				165	if (curr_lookahead<0)
				166	curr_lookahead += DETECT_SIZE;
				167
				168	if (len > 480 && pos != tonal->write_pos)
				169	{
				170	pos++;
				171	if (pos==DETECT_SIZE)
				172	pos=0;
				173	}
				174	if (pos == tonal->write_pos)
				175	pos--;
				176	if (pos<0)
				177	pos = DETECT_SIZE-1;
				178	OPUS_COPY(info_out, &tonal->info[pos], 1);
				179	tonal->read_subframe += len/120;
				180	while (tonal->read_subframe>=4)
				181	{
				182	tonal->read_subframe -= 4;
				183	tonal->read_pos++;
				184	}
				185	if (tonal->read_pos>=DETECT_SIZE)
				186	tonal->read_pos-=DETECT_SIZE;
				187
				188	/* Compensate for the delay in the features themselves.
				189	FIXME: Need a better estimate the 10 I just made up */
				190	curr_lookahead = IMAX(curr_lookahead-10, 0);
				191
				192	psum=0;
				193	/* Summing the probability of transition patterns that involve music at
				194	time (DETECT_SIZE-curr_lookahead-1) */
				195	for (i=0;i<DETECT_SIZE-curr_lookahead;i++)
				196	psum += tonal->pmusic[i];
				197	for (;i<DETECT_SIZE;i++)
				198	psum += tonal->pspeech[i];
				199	psum = psumtonal->music_confidence + (1-psum)tonal->speech_confidence;
				200	/printf("%f %f %f\n", psum, info_out->music_prob, info_out->tonality);/
				201
				202	info_out->music_prob = psum;
				203	}
				204
flim	c91ee5b	2016-01-26 14:33:44 +0100	[diff] [blame]	205	static void tonality_analysis(TonalityAnalysisState tonal, const CELTMode celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix)
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	206	{
				207	int i, b;
				208	const kiss_fft_state *kfft;
				209	VARDECL(kiss_fft_cpx, in);
				210	VARDECL(kiss_fft_cpx, out);
				211	int N = 480, N2=240;
				212	float * OPUS_RESTRICT A = tonal->angle;
				213	float * OPUS_RESTRICT dA = tonal->d_angle;
				214	float * OPUS_RESTRICT d2A = tonal->d2_angle;
				215	VARDECL(float, tonality);
				216	VARDECL(float, noisiness);
				217	float band_tonality[NB_TBANDS];
				218	float logE[NB_TBANDS];
				219	float BFCC[8];
				220	float features[25];
				221	float frame_tonality;
				222	float max_frame_tonality;
				223	/float tw_sum=0;/
				224	float frame_noisiness;
				225	const float pi4 = (float)(M_PIM_PIM_PI*M_PI);
				226	float slope=0;
				227	float frame_stationarity;
				228	float relativeE;
				229	float frame_probs[2];
				230	float alpha, alphaE, alphaE2;
				231	float frame_loudness;
				232	float bandwidth_mask;
				233	int bandwidth=0;
				234	float maxE = 0;
				235	float noise_floor;
				236	int remaining;
				237	AnalysisInfo *info;
				238	SAVE_STACK;
				239
				240	tonal->last_transition++;
				241	alpha = 1.f/IMIN(20, 1+tonal->count);
				242	alphaE = 1.f/IMIN(50, 1+tonal->count);
				243	alphaE2 = 1.f/IMIN(1000, 1+tonal->count);
				244
				245	if (tonal->count<4)
				246	tonal->music_prob = .5;
				247	kfft = celt_mode->mdct.kfft[0];
				248	if (tonal->count==0)
				249	tonal->mem_fill = 240;
				250	downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, c1, c2, C);
				251	if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE)
				252	{
				253	tonal->mem_fill += len;
				254	/* Don't have enough to update the analysis */
				255	RESTORE_STACK;
				256	return;
				257	}
				258	info = &tonal->info[tonal->write_pos++];
				259	if (tonal->write_pos>=DETECT_SIZE)
				260	tonal->write_pos-=DETECT_SIZE;
				261
				262	ALLOC(in, 480, kiss_fft_cpx);
				263	ALLOC(out, 480, kiss_fft_cpx);
				264	ALLOC(tonality, 240, float);
				265	ALLOC(noisiness, 240, float);
				266	for (i=0;i<N2;i++)
				267	{
				268	float w = analysis_window[i];
				269	in[i].r = (kiss_fft_scalar)(w*tonal->inmem[i]);
				270	in[i].i = (kiss_fft_scalar)(w*tonal->inmem[N2+i]);
				271	in[N-i-1].r = (kiss_fft_scalar)(w*tonal->inmem[N-i-1]);
				272	in[N-i-1].i = (kiss_fft_scalar)(w*tonal->inmem[N+N2-i-1]);
				273	}
				274	OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240);
				275	remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);
				276	downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C);
				277	tonal->mem_fill = 240 + remaining;
flim	c91ee5b	2016-01-26 14:33:44 +0100	[diff] [blame]	278	opus_fft(kfft, in, out, tonal->arch);
				279	#ifndef FIXED_POINT
				280	/* If there's any NaN on the input, the entire output will be NaN, so we only need to check one value. */
				281	if (celt_isnan(out[0].r))
				282	{
				283	info->valid = 0;
				284	RESTORE_STACK;
				285	return;
				286	}
				287	#endif
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	288
				289	for (i=1;i<N2;i++)
				290	{
				291	float X1r, X2r, X1i, X2i;
				292	float angle, d_angle, d2_angle;
				293	float angle2, d_angle2, d2_angle2;
				294	float mod1, mod2, avg_mod;
				295	X1r = (float)out[i].r+out[N-i].r;
				296	X1i = (float)out[i].i-out[N-i].i;
				297	X2r = (float)out[i].i+out[N-i].i;
				298	X2i = (float)out[N-i].r-out[i].r;
				299
				300	angle = (float)(.5f/M_PI)*fast_atan2f(X1i, X1r);
				301	d_angle = angle - A[i];
				302	d2_angle = d_angle - dA[i];
				303
				304	angle2 = (float)(.5f/M_PI)*fast_atan2f(X2i, X2r);
				305	d_angle2 = angle2 - angle;
				306	d2_angle2 = d_angle2 - d_angle;
				307
				308	mod1 = d2_angle - (float)floor(.5+d2_angle);
				309	noisiness[i] = ABS16(mod1);
				310	mod1 *= mod1;
				311	mod1 *= mod1;
				312
				313	mod2 = d2_angle2 - (float)floor(.5+d2_angle2);
				314	noisiness[i] += ABS16(mod2);
				315	mod2 *= mod2;
				316	mod2 *= mod2;
				317
				318	avg_mod = .25f(d2A[i]+2.fmod1+mod2);
				319	tonality[i] = 1.f/(1.f+40.f16.fpi4*avg_mod)-.015f;
				320
				321	A[i] = angle2;
				322	dA[i] = d_angle2;
				323	d2A[i] = mod2;
				324	}
				325
				326	frame_tonality = 0;
				327	max_frame_tonality = 0;
				328	/tw_sum = 0;/
				329	info->activity = 0;
				330	frame_noisiness = 0;
				331	frame_stationarity = 0;
				332	if (!tonal->count)
				333	{
				334	for (b=0;b<NB_TBANDS;b++)
				335	{
				336	tonal->lowE[b] = 1e10;
				337	tonal->highE[b] = -1e10;
				338	}
				339	}
				340	relativeE = 0;
				341	frame_loudness = 0;
				342	for (b=0;b<NB_TBANDS;b++)
				343	{
				344	float E=0, tE=0, nE=0;
				345	float L1, L2;
				346	float stationarity;
				347	for (i=tbands[b];i<tbands[b+1];i++)
				348	{
				349	float binE = out[i].r(float)out[i].r + out[N-i].r(float)out[N-i].r
				350	+ out[i].i(float)out[i].i + out[N-i].i(float)out[N-i].i;
				351	#ifdef FIXED_POINT
				352	/* FIXME: It's probably best to change the BFCC filter initial state instead */
				353	binE *= 5.55e-17f;
				354	#endif
				355	E += binE;
				356	tE += binE*tonality[i];
				357	nE += binE2.f(.5f-noisiness[i]);
				358	}
flim	c91ee5b	2016-01-26 14:33:44 +0100	[diff] [blame]	359	#ifndef FIXED_POINT
				360	/* Check for extreme band energies that could cause NaNs later. */
				361	if (!(E<1e9f) \|\| celt_isnan(E))
				362	{
				363	info->valid = 0;
				364	RESTORE_STACK;
				365	return;
				366	}
				367	#endif
				368
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	369	tonal->E[tonal->E_count][b] = E;
				370	frame_noisiness += nE/(1e-15f+E);
				371
				372	frame_loudness += (float)sqrt(E+1e-10f);
				373	logE[b] = (float)log(E+1e-10f);
				374	tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01f);
				375	tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1f);
				376	if (tonal->highE[b] < tonal->lowE[b]+1.f)
				377	{
				378	tonal->highE[b]+=.5f;
				379	tonal->lowE[b]-=.5f;
				380	}
				381	relativeE += (logE[b]-tonal->lowE[b])/(1e-15f+tonal->highE[b]-tonal->lowE[b]);
				382
				383	L1=L2=0;
				384	for (i=0;i<NB_FRAMES;i++)
				385	{
				386	L1 += (float)sqrt(tonal->E[i][b]);
				387	L2 += tonal->E[i][b];
				388	}
				389
				390	stationarity = MIN16(0.99f,L1/(float)sqrt(1e-15+NB_FRAMES*L2));
				391	stationarity *= stationarity;
				392	stationarity *= stationarity;
				393	frame_stationarity += stationarity;
				394	/band_tonality[b] = tE/(1e-15+E)/;
				395	band_tonality[b] = MAX16(tE/(1e-15f+E), stationarity*tonal->prev_band_tonality[b]);
				396	#if 0
				397	if (b>=NB_TONAL_SKIP_BANDS)
				398	{
				399	frame_tonality += tweight[b]*band_tonality[b];
				400	tw_sum += tweight[b];
				401	}
				402	#else
				403	frame_tonality += band_tonality[b];
				404	if (b>=NB_TBANDS-NB_TONAL_SKIP_BANDS)
				405	frame_tonality -= band_tonality[b-NB_TBANDS+NB_TONAL_SKIP_BANDS];
				406	#endif
				407	max_frame_tonality = MAX16(max_frame_tonality, (1.f+.03f(b-NB_TBANDS))frame_tonality);
				408	slope += band_tonality[b]*(b-8);
				409	/printf("%f %f ", band_tonality[b], stationarity);/
				410	tonal->prev_band_tonality[b] = band_tonality[b];
				411	}
				412
				413	bandwidth_mask = 0;
				414	bandwidth = 0;
				415	maxE = 0;
				416	noise_floor = 5.7e-4f/(1<<(IMAX(0,lsb_depth-8)));
				417	#ifdef FIXED_POINT
				418	noise_floor *= 1<<(15+SIG_SHIFT);
				419	#endif
				420	noise_floor *= noise_floor;
				421	for (b=0;b<NB_TOT_BANDS;b++)
				422	{
				423	float E=0;
				424	int band_start, band_end;
				425	/* Keep a margin of 300 Hz for aliasing */
				426	band_start = extra_bands[b];
				427	band_end = extra_bands[b+1];
				428	for (i=band_start;i<band_end;i++)
				429	{
				430	float binE = out[i].r(float)out[i].r + out[N-i].r(float)out[N-i].r
				431	+ out[i].i(float)out[i].i + out[N-i].i(float)out[N-i].i;
				432	E += binE;
				433	}
				434	maxE = MAX32(maxE, E);
				435	tonal->meanE[b] = MAX32((1-alphaE2)*tonal->meanE[b], E);
				436	E = MAX32(E, tonal->meanE[b]);
				437	/* Use a simple follower with 13 dB/Bark slope for spreading function */
				438	bandwidth_mask = MAX32(.05f*bandwidth_mask, E);
				439	/* Consider the band "active" only if all these conditions are met:
				440	1) less than 10 dB below the simple follower
				441	2) less than 90 dB below the peak band (maximal masking possible considering
				442	both the ATH and the loudness-dependent slope of the spreading function)
				443	3) above the PCM quantization noise floor
				444	*/
				445	if (E>.1bandwidth_mask && E1e9f > maxE && E > noise_floor*(band_end-band_start))
				446	bandwidth = b;
				447	}
				448	if (tonal->count<=2)
				449	bandwidth = 20;
				450	frame_loudness = 20*(float)log10(frame_loudness);
				451	tonal->Etracker = MAX32(tonal->Etracker-.03f, frame_loudness);
				452	tonal->lowECount *= (1-alphaE);
				453	if (frame_loudness < tonal->Etracker-30)
				454	tonal->lowECount += alphaE;
				455
				456	for (i=0;i<8;i++)
				457	{
				458	float sum=0;
				459	for (b=0;b<16;b++)
				460	sum += dct_table[i16+b]logE[b];
				461	BFCC[i] = sum;
				462	}
				463
				464	frame_stationarity /= NB_TBANDS;
				465	relativeE /= NB_TBANDS;
				466	if (tonal->count<10)
				467	relativeE = .5;
				468	frame_noisiness /= NB_TBANDS;
				469	#if 1
				470	info->activity = frame_noisiness + (1-frame_noisiness)*relativeE;
				471	#else
				472	info->activity = .5*(1+frame_noisiness-frame_stationarity);
				473	#endif
				474	frame_tonality = (max_frame_tonality/(NB_TBANDS-NB_TONAL_SKIP_BANDS));
				475	frame_tonality = MAX16(frame_tonality, tonal->prev_tonality*.8f);
				476	tonal->prev_tonality = frame_tonality;
				477
				478	slope /= 8*8;
				479	info->tonality_slope = slope;
				480
				481	tonal->E_count = (tonal->E_count+1)%NB_FRAMES;
				482	tonal->count++;
				483	info->tonality = frame_tonality;
				484
				485	for (i=0;i<4;i++)
				486	features[i] = -0.12299f(BFCC[i]+tonal->mem[i+24]) + 0.49195f(tonal->mem[i]+tonal->mem[i+16]) + 0.69693ftonal->mem[i+8] - 1.4349ftonal->cmean[i];
				487
				488	for (i=0;i<4;i++)
				489	tonal->cmean[i] = (1-alpha)tonal->cmean[i] + alphaBFCC[i];
				490
				491	for (i=0;i<4;i++)
				492	features[4+i] = 0.63246f(BFCC[i]-tonal->mem[i+24]) + 0.31623f(tonal->mem[i]-tonal->mem[i+16]);
				493	for (i=0;i<3;i++)
				494	features[8+i] = 0.53452f(BFCC[i]+tonal->mem[i+24]) - 0.26726f(tonal->mem[i]+tonal->mem[i+16]) -0.53452f*tonal->mem[i+8];
				495
				496	if (tonal->count > 5)
				497	{
				498	for (i=0;i<9;i++)
				499	tonal->std[i] = (1-alpha)tonal->std[i] + alphafeatures[i]*features[i];
				500	}
				501
				502	for (i=0;i<8;i++)
				503	{
				504	tonal->mem[i+24] = tonal->mem[i+16];
				505	tonal->mem[i+16] = tonal->mem[i+8];
				506	tonal->mem[i+8] = tonal->mem[i];
				507	tonal->mem[i] = BFCC[i];
				508	}
				509	for (i=0;i<9;i++)
				510	features[11+i] = (float)sqrt(tonal->std[i]);
				511	features[20] = info->tonality;
				512	features[21] = info->activity;
				513	features[22] = frame_stationarity;
				514	features[23] = info->tonality_slope;
				515	features[24] = tonal->lowECount;
				516
				517	#ifndef DISABLE_FLOAT_API
				518	mlp_process(&net, features, frame_probs);
				519	frame_probs[0] = .5f*(frame_probs[0]+1);
				520	/* Curve fitting between the MLP probability and the actual probability */
				521	frame_probs[0] = .01f + 1.21fframe_probs[0]frame_probs[0] - .23f*(float)pow(frame_probs[0], 10);
				522	/* Probability of active audio (as opposed to silence) */
				523	frame_probs[1] = .5f*frame_probs[1]+.5f;
				524	/* Consider that silence has a 50-50 probability. */
				525	frame_probs[0] = frame_probs[1]frame_probs[0] + (1-frame_probs[1]).5f;
				526
				527	/printf("%f %f ", frame_probs[0], frame_probs[1]);/
				528	{
				529	/* Probability of state transition */
				530	float tau;
				531	/* Represents independence of the MLP probabilities, where
				532	beta=1 means fully independent. */
				533	float beta;
				534	/* Denormalized probability of speech (p0) and music (p1) after update */
				535	float p0, p1;
				536	/* Probabilities for "all speech" and "all music" */
				537	float s0, m0;
				538	/* Probability sum for renormalisation */
				539	float psum;
				540	/* Instantaneous probability of speech and music, with beta pre-applied. */
				541	float speech0;
				542	float music0;
				543
				544	/* One transition every 3 minutes of active audio */
				545	tau = .00005f*frame_probs[1];
				546	beta = .05f;
				547	if (1) {
				548	/* Adapt beta based on how "unexpected" the new prob is */
				549	float p, q;
				550	p = MAX16(.05f,MIN16(.95f,frame_probs[0]));
				551	q = MAX16(.05f,MIN16(.95f,tonal->music_prob));
				552	beta = .01f+.05fABS16(p-q)/(p(1-q)+q*(1-p));
				553	}
				554	/* p0 and p1 are the probabilities of speech and music at this frame
				555	using only information from previous frame and applying the
				556	state transition model */
				557	p0 = (1-tonal->music_prob)(1-tau) + tonal->music_prob tau;
				558	p1 = tonal->music_prob (1-tau) + (1-tonal->music_prob)tau;
				559	/* We apply the current probability with exponent beta to work around
				560	the fact that the probability estimates aren't independent. */
				561	p0 *= (float)pow(1-frame_probs[0], beta);
				562	p1 *= (float)pow(frame_probs[0], beta);
				563	/* Normalise the probabilities to get the Marokv probability of music. */
				564	tonal->music_prob = p1/(p0+p1);
				565	info->music_prob = tonal->music_prob;
				566
				567	/* This chunk of code deals with delayed decision. */
				568	psum=1e-20f;
				569	/* Instantaneous probability of speech and music, with beta pre-applied. */
				570	speech0 = (float)pow(1-frame_probs[0], beta);
				571	music0 = (float)pow(frame_probs[0], beta);
				572	if (tonal->count==1)
				573	{
				574	tonal->pspeech[0]=.5;
				575	tonal->pmusic [0]=.5;
				576	}
				577	/* Updated probability of having only speech (s0) or only music (m0),
				578	before considering the new observation. */
				579	s0 = tonal->pspeech[0] + tonal->pspeech[1];
				580	m0 = tonal->pmusic [0] + tonal->pmusic [1];
				581	/* Updates s0 and m0 with instantaneous probability. */
				582	tonal->pspeech[0] = s0(1-tau)speech0;
				583	tonal->pmusic [0] = m0(1-tau)music0;
				584	/* Propagate the transition probabilities */
				585	for (i=1;i<DETECT_SIZE-1;i++)
				586	{
				587	tonal->pspeech[i] = tonal->pspeech[i+1]*speech0;
				588	tonal->pmusic [i] = tonal->pmusic [i+1]*music0;
				589	}
				590	/* Probability that the latest frame is speech, when all the previous ones were music. */
				591	tonal->pspeech[DETECT_SIZE-1] = m0tauspeech0;
				592	/* Probability that the latest frame is music, when all the previous ones were speech. */
				593	tonal->pmusic [DETECT_SIZE-1] = s0taumusic0;
				594
				595	/* Renormalise probabilities to 1 */
				596	for (i=0;i<DETECT_SIZE;i++)
				597	psum += tonal->pspeech[i] + tonal->pmusic[i];
				598	psum = 1.f/psum;
				599	for (i=0;i<DETECT_SIZE;i++)
				600	{
				601	tonal->pspeech[i] *= psum;
				602	tonal->pmusic [i] *= psum;
				603	}
				604	psum = tonal->pmusic[0];
				605	for (i=1;i<DETECT_SIZE;i++)
				606	psum += tonal->pspeech[i];
				607
				608	/* Estimate our confidence in the speech/music decisions */
				609	if (frame_probs[1]>.75)
				610	{
				611	if (tonal->music_prob>.9)
				612	{
				613	float adapt;
				614	adapt = 1.f/(++tonal->music_confidence_count);
				615	tonal->music_confidence_count = IMIN(tonal->music_confidence_count, 500);
				616	tonal->music_confidence += adapt*MAX16(-.2f,frame_probs[0]-tonal->music_confidence);
				617	}
				618	if (tonal->music_prob<.1)
				619	{
				620	float adapt;
				621	adapt = 1.f/(++tonal->speech_confidence_count);
				622	tonal->speech_confidence_count = IMIN(tonal->speech_confidence_count, 500);
				623	tonal->speech_confidence += adapt*MIN16(.2f,frame_probs[0]-tonal->speech_confidence);
				624	}
				625	} else {
				626	if (tonal->music_confidence_count==0)
				627	tonal->music_confidence = .9f;
				628	if (tonal->speech_confidence_count==0)
				629	tonal->speech_confidence = .1f;
				630	}
				631	}
				632	if (tonal->last_music != (tonal->music_prob>.5f))
				633	tonal->last_transition=0;
				634	tonal->last_music = tonal->music_prob>.5f;
				635	#else
				636	info->music_prob = 0;
				637	#endif
				638	/*for (i=0;i<25;i++)
				639	printf("%f ", features[i]);
				640	printf("\n");*/
				641
				642	info->bandwidth = bandwidth;
				643	/printf("%d %d\n", info->bandwidth, info->opus_bandwidth);/
				644	info->noisiness = frame_noisiness;
				645	info->valid = 1;
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	646	RESTORE_STACK;
				647	}
				648
				649	void run_analysis(TonalityAnalysisState analysis, const CELTMode celt_mode, const void *analysis_pcm,
				650	int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs,
				651	int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info)
				652	{
				653	int offset;
				654	int pcm_len;
				655
				656	if (analysis_pcm != NULL)
				657	{
				658	/* Avoid overflow/wrap-around of the analysis buffer */
				659	analysis_frame_size = IMIN((DETECT_SIZE-5)*Fs/100, analysis_frame_size);
				660
				661	pcm_len = analysis_frame_size - analysis->analysis_offset;
				662	offset = analysis->analysis_offset;
				663	do {
flim	c91ee5b	2016-01-26 14:33:44 +0100	[diff] [blame]	664	tonality_analysis(analysis, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix);
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	665	offset += 480;
				666	pcm_len -= 480;
				667	} while (pcm_len>0);
				668	analysis->analysis_offset = analysis_frame_size;
				669
				670	analysis->analysis_offset -= frame_size;
				671	}
				672
				673	analysis_info->valid = 0;
				674	tonality_get_info(analysis, analysis_info, frame_size);
				675	}