Blame - src/analysis.c - platform/external/libopus

blob: f4160e4b4ed5c114cf7d610dbd97a1569060a077 [file] [log] [blame]

Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	1	/* Copyright (c) 2011 Xiph.Org Foundation
				2	Written by Jean-Marc Valin */
				3	/*
				4	Redistribution and use in source and binary forms, with or without
				5	modification, are permitted provided that the following conditions
				6	are met:
				7
				8	- Redistributions of source code must retain the above copyright
				9	notice, this list of conditions and the following disclaimer.
				10
				11	- Redistributions in binary form must reproduce the above copyright
				12	notice, this list of conditions and the following disclaimer in the
				13	documentation and/or other materials provided with the distribution.
				14
				15	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				16	``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				17	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				18	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
				19	CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
				20	EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
				21	PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
				22	PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
				23	LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
				24	NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
				25	SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				26	*/
				27
				28	#ifdef HAVE_CONFIG_H
				29	#include "config.h"
				30	#endif
				31
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	32	#define ANALYSIS_C
				33
				34	#include <stdio.h>
				35
				36	#include "mathops.h"
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	37	#include "kiss_fft.h"
				38	#include "celt.h"
				39	#include "modes.h"
				40	#include "arch.h"
				41	#include "quant_bands.h"
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	42	#include "analysis.h"
				43	#include "mlp.h"
				44	#include "stack_alloc.h"
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	45	#include "float_cast.h"
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	46
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	47	#ifndef M_PI
				48	#define M_PI 3.141592653
				49	#endif
				50
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	51	#ifndef DISABLE_FLOAT_API
				52
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	53	static const float dct_table[128] = {
				54	0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f,
				55	0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f,
				56	0.351851f, 0.338330f, 0.311806f, 0.273300f, 0.224292f, 0.166664f, 0.102631f, 0.034654f,
				57	-0.034654f,-0.102631f,-0.166664f,-0.224292f,-0.273300f,-0.311806f,-0.338330f,-0.351851f,
				58	0.346760f, 0.293969f, 0.196424f, 0.068975f,-0.068975f,-0.196424f,-0.293969f,-0.346760f,
				59	-0.346760f,-0.293969f,-0.196424f,-0.068975f, 0.068975f, 0.196424f, 0.293969f, 0.346760f,
				60	0.338330f, 0.224292f, 0.034654f,-0.166664f,-0.311806f,-0.351851f,-0.273300f,-0.102631f,
				61	0.102631f, 0.273300f, 0.351851f, 0.311806f, 0.166664f,-0.034654f,-0.224292f,-0.338330f,
				62	0.326641f, 0.135299f,-0.135299f,-0.326641f,-0.326641f,-0.135299f, 0.135299f, 0.326641f,
				63	0.326641f, 0.135299f,-0.135299f,-0.326641f,-0.326641f,-0.135299f, 0.135299f, 0.326641f,
				64	0.311806f, 0.034654f,-0.273300f,-0.338330f,-0.102631f, 0.224292f, 0.351851f, 0.166664f,
				65	-0.166664f,-0.351851f,-0.224292f, 0.102631f, 0.338330f, 0.273300f,-0.034654f,-0.311806f,
				66	0.293969f,-0.068975f,-0.346760f,-0.196424f, 0.196424f, 0.346760f, 0.068975f,-0.293969f,
				67	-0.293969f, 0.068975f, 0.346760f, 0.196424f,-0.196424f,-0.346760f,-0.068975f, 0.293969f,
				68	0.273300f,-0.166664f,-0.338330f, 0.034654f, 0.351851f, 0.102631f,-0.311806f,-0.224292f,
				69	0.224292f, 0.311806f,-0.102631f,-0.351851f,-0.034654f, 0.338330f, 0.166664f,-0.273300f,
				70	};
				71
				72	static const float analysis_window[240] = {
				73	0.000043f, 0.000171f, 0.000385f, 0.000685f, 0.001071f, 0.001541f, 0.002098f, 0.002739f,
				74	0.003466f, 0.004278f, 0.005174f, 0.006156f, 0.007222f, 0.008373f, 0.009607f, 0.010926f,
				75	0.012329f, 0.013815f, 0.015385f, 0.017037f, 0.018772f, 0.020590f, 0.022490f, 0.024472f,
				76	0.026535f, 0.028679f, 0.030904f, 0.033210f, 0.035595f, 0.038060f, 0.040604f, 0.043227f,
				77	0.045928f, 0.048707f, 0.051564f, 0.054497f, 0.057506f, 0.060591f, 0.063752f, 0.066987f,
				78	0.070297f, 0.073680f, 0.077136f, 0.080665f, 0.084265f, 0.087937f, 0.091679f, 0.095492f,
				79	0.099373f, 0.103323f, 0.107342f, 0.111427f, 0.115579f, 0.119797f, 0.124080f, 0.128428f,
				80	0.132839f, 0.137313f, 0.141849f, 0.146447f, 0.151105f, 0.155823f, 0.160600f, 0.165435f,
				81	0.170327f, 0.175276f, 0.180280f, 0.185340f, 0.190453f, 0.195619f, 0.200838f, 0.206107f,
				82	0.211427f, 0.216797f, 0.222215f, 0.227680f, 0.233193f, 0.238751f, 0.244353f, 0.250000f,
				83	0.255689f, 0.261421f, 0.267193f, 0.273005f, 0.278856f, 0.284744f, 0.290670f, 0.296632f,
				84	0.302628f, 0.308658f, 0.314721f, 0.320816f, 0.326941f, 0.333097f, 0.339280f, 0.345492f,
				85	0.351729f, 0.357992f, 0.364280f, 0.370590f, 0.376923f, 0.383277f, 0.389651f, 0.396044f,
				86	0.402455f, 0.408882f, 0.415325f, 0.421783f, 0.428254f, 0.434737f, 0.441231f, 0.447736f,
				87	0.454249f, 0.460770f, 0.467298f, 0.473832f, 0.480370f, 0.486912f, 0.493455f, 0.500000f,
				88	0.506545f, 0.513088f, 0.519630f, 0.526168f, 0.532702f, 0.539230f, 0.545751f, 0.552264f,
				89	0.558769f, 0.565263f, 0.571746f, 0.578217f, 0.584675f, 0.591118f, 0.597545f, 0.603956f,
				90	0.610349f, 0.616723f, 0.623077f, 0.629410f, 0.635720f, 0.642008f, 0.648271f, 0.654508f,
				91	0.660720f, 0.666903f, 0.673059f, 0.679184f, 0.685279f, 0.691342f, 0.697372f, 0.703368f,
				92	0.709330f, 0.715256f, 0.721144f, 0.726995f, 0.732807f, 0.738579f, 0.744311f, 0.750000f,
				93	0.755647f, 0.761249f, 0.766807f, 0.772320f, 0.777785f, 0.783203f, 0.788573f, 0.793893f,
				94	0.799162f, 0.804381f, 0.809547f, 0.814660f, 0.819720f, 0.824724f, 0.829673f, 0.834565f,
				95	0.839400f, 0.844177f, 0.848895f, 0.853553f, 0.858151f, 0.862687f, 0.867161f, 0.871572f,
				96	0.875920f, 0.880203f, 0.884421f, 0.888573f, 0.892658f, 0.896677f, 0.900627f, 0.904508f,
				97	0.908321f, 0.912063f, 0.915735f, 0.919335f, 0.922864f, 0.926320f, 0.929703f, 0.933013f,
				98	0.936248f, 0.939409f, 0.942494f, 0.945503f, 0.948436f, 0.951293f, 0.954072f, 0.956773f,
				99	0.959396f, 0.961940f, 0.964405f, 0.966790f, 0.969096f, 0.971321f, 0.973465f, 0.975528f,
				100	0.977510f, 0.979410f, 0.981228f, 0.982963f, 0.984615f, 0.986185f, 0.987671f, 0.989074f,
				101	0.990393f, 0.991627f, 0.992778f, 0.993844f, 0.994826f, 0.995722f, 0.996534f, 0.997261f,
				102	0.997902f, 0.998459f, 0.998929f, 0.999315f, 0.999615f, 0.999829f, 0.999957f, 1.000000f,
				103	};
				104
				105	static const int tbands[NB_TBANDS+1] = {
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	106	4, 8, 12, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 136, 160, 192, 240
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	107	};
				108
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	109	#define NB_TONAL_SKIP_BANDS 9
				110
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	111	static opus_val32 silk_resampler_down2_hp(
				112	opus_val32 S, / I/O State vector [ 2 ] */
				113	opus_val32 out, / O Output signal [ floor(len/2) ] */
				114	const opus_val32 in, / I Input signal [ len ] */
				115	int inLen /* I Number of input samples */
				116	)
				117	{
				118	int k, len2 = inLen/2;
				119	opus_val32 in32, out32, out32_hp, Y, X;
				120	opus_val64 hp_ener = 0;
				121	/* Internal variables and state are in Q10 format */
				122	for( k = 0; k < len2; k++ ) {
				123	/* Convert to Q10 */
				124	in32 = in[ 2 * k ];
				125
				126	/* All-pass section for even input sample */
				127	Y = SUB32( in32, S[ 0 ] );
				128	X = MULT16_32_Q15(QCONST16(0.6074371f, 15), Y);
				129	out32 = ADD32( S[ 0 ], X );
				130	S[ 0 ] = ADD32( in32, X );
				131	out32_hp = out32;
				132	/* Convert to Q10 */
				133	in32 = in[ 2 * k + 1 ];
				134
				135	/* All-pass section for odd input sample, and add to output of previous section */
				136	Y = SUB32( in32, S[ 1 ] );
				137	X = MULT16_32_Q15(QCONST16(0.15063f, 15), Y);
				138	out32 = ADD32( out32, S[ 1 ] );
				139	out32 = ADD32( out32, X );
				140	S[ 1 ] = ADD32( in32, X );
				141
				142	Y = SUB32( -in32, S[ 2 ] );
				143	X = MULT16_32_Q15(QCONST16(0.15063f, 15), Y);
				144	out32_hp = ADD32( out32_hp, S[ 2 ] );
				145	out32_hp = ADD32( out32_hp, X );
				146	S[ 2 ] = ADD32( -in32, X );
				147
				148	hp_ener += out32_hp*(opus_val64)out32_hp;
				149	/* Add, convert back to int16 and store to output */
				150	out[ k ] = HALF32(out32);
				151	}
				152	#ifdef FIXED_POINT
				153	/* len2 can be up to 480, so we shift by 8 more to make it fit. */
				154	hp_ener = hp_ener >> (2*SIG_SHIFT + 8);
				155	#endif
				156	return (opus_val32)hp_ener;
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	157	}
				158
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	159	static opus_val32 downmix_and_resample(downmix_func downmix, const void _x, opus_val32 y, opus_val32 S[3], int subframe, int offset, int c1, int c2, int C, int Fs)
				160	{
				161	VARDECL(opus_val32, tmp);
				162	opus_val32 scale;
				163	int j;
				164	opus_val32 ret = 0;
				165	SAVE_STACK;
				166
				167	if (subframe==0) return 0;
				168	if (Fs == 48000)
				169	{
				170	subframe *= 2;
				171	offset *= 2;
				172	} else if (Fs == 16000) {
				173	subframe = subframe*2/3;
				174	offset = offset*2/3;
				175	}
				176	ALLOC(tmp, subframe, opus_val32);
				177
				178	downmix(_x, tmp, subframe, offset, c1, c2, C);
				179	#ifdef FIXED_POINT
				180	scale = (1<<SIG_SHIFT);
				181	#else
				182	scale = 1.f/32768;
				183	#endif
				184	if (c2==-2)
				185	scale /= C;
				186	else if (c2>-1)
				187	scale /= 2;
				188	for (j=0;j<subframe;j++)
				189	tmp[j] *= scale;
				190	if (Fs == 48000)
				191	{
				192	ret = silk_resampler_down2_hp(S, y, tmp, subframe);
				193	} else if (Fs == 24000) {
				194	OPUS_COPY(y, tmp, subframe);
				195	} else if (Fs == 16000) {
				196	VARDECL(opus_val32, tmp3x);
				197	ALLOC(tmp3x, 3*subframe, opus_val32);
				198	/* Don't do this at home! This resampler is horrible and it's only (barely)
				199	usable for the purpose of the analysis because we don't care about all
				200	the aliasing between 8 kHz and 12 kHz. */
				201	for (j=0;j<subframe;j++)
				202	{
				203	tmp3x[3*j] = tmp[j];
				204	tmp3x[3*j+1] = tmp[j];
				205	tmp3x[3*j+2] = tmp[j];
				206	}
				207	silk_resampler_down2_hp(S, y, tmp3x, 3*subframe);
				208	}
				209	RESTORE_STACK;
				210	return ret;
				211	}
				212
				213	void tonality_analysis_init(TonalityAnalysisState *tonal, opus_int32 Fs)
flim	c91ee5b	2016-01-26 14:33:44 +0100	[diff] [blame]	214	{
				215	/* Initialize reusable fields. */
				216	tonal->arch = opus_select_arch();
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	217	tonal->Fs = Fs;
flim	c91ee5b	2016-01-26 14:33:44 +0100	[diff] [blame]	218	/* Clear remaining fields. */
				219	tonality_analysis_reset(tonal);
				220	}
				221
				222	void tonality_analysis_reset(TonalityAnalysisState *tonal)
				223	{
				224	/* Clear non-reusable fields. */
				225	char start = (char)&tonal->TONALITY_ANALYSIS_RESET_START;
				226	OPUS_CLEAR(start, sizeof(TonalityAnalysisState) - (start - (char*)tonal));
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	227	tonal->music_confidence = .9f;
				228	tonal->speech_confidence = .1f;
flim	c91ee5b	2016-01-26 14:33:44 +0100	[diff] [blame]	229	}
				230
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	231	void tonality_get_info(TonalityAnalysisState tonal, AnalysisInfo info_out, int len)
				232	{
				233	int pos;
				234	int curr_lookahead;
				235	float psum;
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	236	float tonality_max;
				237	float tonality_avg;
				238	int tonality_count;
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	239	int i;
				240
				241	pos = tonal->read_pos;
				242	curr_lookahead = tonal->write_pos-tonal->read_pos;
				243	if (curr_lookahead<0)
				244	curr_lookahead += DETECT_SIZE;
				245
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	246	/* On long frames, look at the second analysis window rather than the first. */
				247	if (len > tonal->Fs/50 && pos != tonal->write_pos)
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	248	{
				249	pos++;
				250	if (pos==DETECT_SIZE)
				251	pos=0;
				252	}
				253	if (pos == tonal->write_pos)
				254	pos--;
				255	if (pos<0)
				256	pos = DETECT_SIZE-1;
				257	OPUS_COPY(info_out, &tonal->info[pos], 1);
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	258	tonality_max = tonality_avg = info_out->tonality;
				259	tonality_count = 1;
				260	/* If possible, look ahead for a tone to compensate for the delay in the tone detector. */
				261	for (i=0;i<3;i++)
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	262	{
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	263	pos++;
				264	if (pos==DETECT_SIZE)
				265	pos = 0;
				266	if (pos == tonal->write_pos)
				267	break;
				268	tonality_max = MAX32(tonality_max, tonal->info[pos].tonality);
				269	tonality_avg += tonal->info[pos].tonality;
				270	tonality_count++;
				271	}
				272	info_out->tonality = MAX32(tonality_avg/tonality_count, tonality_max-.2f);
				273	tonal->read_subframe += len/(tonal->Fs/400);
				274	while (tonal->read_subframe>=8)
				275	{
				276	tonal->read_subframe -= 8;
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	277	tonal->read_pos++;
				278	}
				279	if (tonal->read_pos>=DETECT_SIZE)
				280	tonal->read_pos-=DETECT_SIZE;
				281
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	282	/* The -1 is to compensate for the delay in the features themselves. */
				283	curr_lookahead = IMAX(curr_lookahead-1, 0);
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	284
				285	psum=0;
				286	/* Summing the probability of transition patterns that involve music at
				287	time (DETECT_SIZE-curr_lookahead-1) */
				288	for (i=0;i<DETECT_SIZE-curr_lookahead;i++)
				289	psum += tonal->pmusic[i];
				290	for (;i<DETECT_SIZE;i++)
				291	psum += tonal->pspeech[i];
				292	psum = psumtonal->music_confidence + (1-psum)tonal->speech_confidence;
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	293	/printf("%f %f %f %f %f\n", psum, info_out->music_prob, info_out->vad_prob, info_out->activity_probability, info_out->tonality);/
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	294
				295	info_out->music_prob = psum;
				296	}
				297
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	298	static const float std_feature_bias[9] = {
				299	5.684947f, 3.475288f, 1.770634f, 1.599784f, 3.773215f,
				300	2.163313f, 1.260756f, 1.116868f, 1.918795f
				301	};
				302
				303	#define LEAKAGE_OFFSET 2.5f
				304	#define LEAKAGE_SLOPE 2.f
				305
				306	#ifdef FIXED_POINT
				307	/* For fixed-point, the input is +/-2^15 shifted up by SIG_SHIFT, so we need to
				308	compensate for that in the energy. */
				309	#define SCALE_COMPENS (1.f/((opus_int32)1<<(15+SIG_SHIFT)))
				310	#define SCALE_ENER(e) ((SCALE_COMPENSSCALE_COMPENS)(e))
				311	#else
				312	#define SCALE_ENER(e) (e)
				313	#endif
				314
flim	c91ee5b	2016-01-26 14:33:44 +0100	[diff] [blame]	315	static void tonality_analysis(TonalityAnalysisState tonal, const CELTMode celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix)
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	316	{
				317	int i, b;
				318	const kiss_fft_state *kfft;
				319	VARDECL(kiss_fft_cpx, in);
				320	VARDECL(kiss_fft_cpx, out);
				321	int N = 480, N2=240;
				322	float * OPUS_RESTRICT A = tonal->angle;
				323	float * OPUS_RESTRICT dA = tonal->d_angle;
				324	float * OPUS_RESTRICT d2A = tonal->d2_angle;
				325	VARDECL(float, tonality);
				326	VARDECL(float, noisiness);
				327	float band_tonality[NB_TBANDS];
				328	float logE[NB_TBANDS];
				329	float BFCC[8];
				330	float features[25];
				331	float frame_tonality;
				332	float max_frame_tonality;
				333	/float tw_sum=0;/
				334	float frame_noisiness;
				335	const float pi4 = (float)(M_PIM_PIM_PI*M_PI);
				336	float slope=0;
				337	float frame_stationarity;
				338	float relativeE;
				339	float frame_probs[2];
				340	float alpha, alphaE, alphaE2;
				341	float frame_loudness;
				342	float bandwidth_mask;
				343	int bandwidth=0;
				344	float maxE = 0;
				345	float noise_floor;
				346	int remaining;
				347	AnalysisInfo *info;
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	348	float hp_ener;
				349	float tonality2[240];
				350	float midE[8];
				351	float spec_variability=0;
				352	float band_log2[NB_TBANDS+1];
				353	float leakage_from[NB_TBANDS+1];
				354	float leakage_to[NB_TBANDS+1];
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	355	SAVE_STACK;
				356
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	357	alpha = 1.f/IMIN(10, 1+tonal->count);
				358	alphaE = 1.f/IMIN(25, 1+tonal->count);
				359	alphaE2 = 1.f/IMIN(500, 1+tonal->count);
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	360
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	361	if (tonal->Fs == 48000)
				362	{
				363	/* len and offset are now at 24 kHz. */
				364	len/= 2;
				365	offset /= 2;
				366	} else if (tonal->Fs == 16000) {
				367	len = 3*len/2;
				368	offset = 3*offset/2;
				369	}
				370
				371	if (tonal->count<4) {
				372	if (tonal->application == OPUS_APPLICATION_VOIP)
				373	tonal->music_prob = .1f;
				374	else
				375	tonal->music_prob = .625f;
				376	}
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	377	kfft = celt_mode->mdct.kfft[0];
				378	if (tonal->count==0)
				379	tonal->mem_fill = 240;
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	380	tonal->hp_ener_accum += (float)downmix_and_resample(downmix, x,
				381	&tonal->inmem[tonal->mem_fill], tonal->downmix_state,
				382	IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, c1, c2, C, tonal->Fs);
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	383	if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE)
				384	{
				385	tonal->mem_fill += len;
				386	/* Don't have enough to update the analysis */
				387	RESTORE_STACK;
				388	return;
				389	}
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	390	hp_ener = tonal->hp_ener_accum;
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	391	info = &tonal->info[tonal->write_pos++];
				392	if (tonal->write_pos>=DETECT_SIZE)
				393	tonal->write_pos-=DETECT_SIZE;
				394
				395	ALLOC(in, 480, kiss_fft_cpx);
				396	ALLOC(out, 480, kiss_fft_cpx);
				397	ALLOC(tonality, 240, float);
				398	ALLOC(noisiness, 240, float);
				399	for (i=0;i<N2;i++)
				400	{
				401	float w = analysis_window[i];
				402	in[i].r = (kiss_fft_scalar)(w*tonal->inmem[i]);
				403	in[i].i = (kiss_fft_scalar)(w*tonal->inmem[N2+i]);
				404	in[N-i-1].r = (kiss_fft_scalar)(w*tonal->inmem[N-i-1]);
				405	in[N-i-1].i = (kiss_fft_scalar)(w*tonal->inmem[N+N2-i-1]);
				406	}
				407	OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240);
				408	remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	409	tonal->hp_ener_accum = (float)downmix_and_resample(downmix, x,
				410	&tonal->inmem[240], tonal->downmix_state, remaining,
				411	offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C, tonal->Fs);
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	412	tonal->mem_fill = 240 + remaining;
flim	c91ee5b	2016-01-26 14:33:44 +0100	[diff] [blame]	413	opus_fft(kfft, in, out, tonal->arch);
				414	#ifndef FIXED_POINT
				415	/* If there's any NaN on the input, the entire output will be NaN, so we only need to check one value. */
				416	if (celt_isnan(out[0].r))
				417	{
				418	info->valid = 0;
				419	RESTORE_STACK;
				420	return;
				421	}
				422	#endif
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	423
				424	for (i=1;i<N2;i++)
				425	{
				426	float X1r, X2r, X1i, X2i;
				427	float angle, d_angle, d2_angle;
				428	float angle2, d_angle2, d2_angle2;
				429	float mod1, mod2, avg_mod;
				430	X1r = (float)out[i].r+out[N-i].r;
				431	X1i = (float)out[i].i-out[N-i].i;
				432	X2r = (float)out[i].i+out[N-i].i;
				433	X2i = (float)out[N-i].r-out[i].r;
				434
				435	angle = (float)(.5f/M_PI)*fast_atan2f(X1i, X1r);
				436	d_angle = angle - A[i];
				437	d2_angle = d_angle - dA[i];
				438
				439	angle2 = (float)(.5f/M_PI)*fast_atan2f(X2i, X2r);
				440	d_angle2 = angle2 - angle;
				441	d2_angle2 = d_angle2 - d_angle;
				442
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	443	mod1 = d2_angle - (float)float2int(d2_angle);
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	444	noisiness[i] = ABS16(mod1);
				445	mod1 *= mod1;
				446	mod1 *= mod1;
				447
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	448	mod2 = d2_angle2 - (float)float2int(d2_angle2);
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	449	noisiness[i] += ABS16(mod2);
				450	mod2 *= mod2;
				451	mod2 *= mod2;
				452
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	453	avg_mod = .25f(d2A[i]+mod1+2mod2);
				454	/* This introduces an extra delay of 2 frames in the detection. */
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	455	tonality[i] = 1.f/(1.f+40.f16.fpi4*avg_mod)-.015f;
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	456	/* No delay on this detection, but it's less reliable. */
				457	tonality2[i] = 1.f/(1.f+40.f16.fpi4*mod2)-.015f;
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	458
				459	A[i] = angle2;
				460	dA[i] = d_angle2;
				461	d2A[i] = mod2;
				462	}
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	463	for (i=2;i<N2-1;i++)
				464	{
				465	float tt = MIN32(tonality2[i], MAX32(tonality2[i-1], tonality2[i+1]));
				466	tonality[i] = .9f*MAX32(tonality[i], tt-.1f);
				467	}
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	468	frame_tonality = 0;
				469	max_frame_tonality = 0;
				470	/tw_sum = 0;/
				471	info->activity = 0;
				472	frame_noisiness = 0;
				473	frame_stationarity = 0;
				474	if (!tonal->count)
				475	{
				476	for (b=0;b<NB_TBANDS;b++)
				477	{
				478	tonal->lowE[b] = 1e10;
				479	tonal->highE[b] = -1e10;
				480	}
				481	}
				482	relativeE = 0;
				483	frame_loudness = 0;
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	484	/* The energy of the very first band is special because of DC. */
				485	{
				486	float E = 0;
				487	float X1r, X2r;
				488	X1r = 2*(float)out[0].r;
				489	X2r = 2*(float)out[0].i;
				490	E = X1rX1r + X2rX2r;
				491	for (i=1;i<4;i++)
				492	{
				493	float binE = out[i].r(float)out[i].r + out[N-i].r(float)out[N-i].r
				494	+ out[i].i(float)out[i].i + out[N-i].i(float)out[N-i].i;
				495	E += binE;
				496	}
				497	E = SCALE_ENER(E);
				498	band_log2[0] = .5f1.442695f(float)log(E+1e-10f);
				499	}
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	500	for (b=0;b<NB_TBANDS;b++)
				501	{
				502	float E=0, tE=0, nE=0;
				503	float L1, L2;
				504	float stationarity;
				505	for (i=tbands[b];i<tbands[b+1];i++)
				506	{
				507	float binE = out[i].r(float)out[i].r + out[N-i].r(float)out[N-i].r
				508	+ out[i].i(float)out[i].i + out[N-i].i(float)out[N-i].i;
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	509	binE = SCALE_ENER(binE);
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	510	E += binE;
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	511	tE += binE*MAX32(0, tonality[i]);
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	512	nE += binE2.f(.5f-noisiness[i]);
				513	}
flim	c91ee5b	2016-01-26 14:33:44 +0100	[diff] [blame]	514	#ifndef FIXED_POINT
				515	/* Check for extreme band energies that could cause NaNs later. */
				516	if (!(E<1e9f) \|\| celt_isnan(E))
				517	{
				518	info->valid = 0;
				519	RESTORE_STACK;
				520	return;
				521	}
				522	#endif
				523
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	524	tonal->E[tonal->E_count][b] = E;
				525	frame_noisiness += nE/(1e-15f+E);
				526
				527	frame_loudness += (float)sqrt(E+1e-10f);
				528	logE[b] = (float)log(E+1e-10f);
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	529	band_log2[b+1] = .5f1.442695f(float)log(E+1e-10f);
				530	tonal->logE[tonal->E_count][b] = logE[b];
				531	if (tonal->count==0)
				532	tonal->highE[b] = tonal->lowE[b] = logE[b];
				533	if (tonal->highE[b] > tonal->lowE[b] + 7.5)
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	534	{
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	535	if (tonal->highE[b] - logE[b] > logE[b] - tonal->lowE[b])
				536	tonal->highE[b] -= .01f;
				537	else
				538	tonal->lowE[b] += .01f;
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	539	}
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	540	if (logE[b] > tonal->highE[b])
				541	{
				542	tonal->highE[b] = logE[b];
				543	tonal->lowE[b] = MAX32(tonal->highE[b]-15, tonal->lowE[b]);
				544	} else if (logE[b] < tonal->lowE[b])
				545	{
				546	tonal->lowE[b] = logE[b];
				547	tonal->highE[b] = MIN32(tonal->lowE[b]+15, tonal->highE[b]);
				548	}
				549	relativeE += (logE[b]-tonal->lowE[b])/(1e-15f + (tonal->highE[b]-tonal->lowE[b]));
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	550
				551	L1=L2=0;
				552	for (i=0;i<NB_FRAMES;i++)
				553	{
				554	L1 += (float)sqrt(tonal->E[i][b]);
				555	L2 += tonal->E[i][b];
				556	}
				557
				558	stationarity = MIN16(0.99f,L1/(float)sqrt(1e-15+NB_FRAMES*L2));
				559	stationarity *= stationarity;
				560	stationarity *= stationarity;
				561	frame_stationarity += stationarity;
				562	/band_tonality[b] = tE/(1e-15+E)/;
				563	band_tonality[b] = MAX16(tE/(1e-15f+E), stationarity*tonal->prev_band_tonality[b]);
				564	#if 0
				565	if (b>=NB_TONAL_SKIP_BANDS)
				566	{
				567	frame_tonality += tweight[b]*band_tonality[b];
				568	tw_sum += tweight[b];
				569	}
				570	#else
				571	frame_tonality += band_tonality[b];
				572	if (b>=NB_TBANDS-NB_TONAL_SKIP_BANDS)
				573	frame_tonality -= band_tonality[b-NB_TBANDS+NB_TONAL_SKIP_BANDS];
				574	#endif
				575	max_frame_tonality = MAX16(max_frame_tonality, (1.f+.03f(b-NB_TBANDS))frame_tonality);
				576	slope += band_tonality[b]*(b-8);
				577	/printf("%f %f ", band_tonality[b], stationarity);/
				578	tonal->prev_band_tonality[b] = band_tonality[b];
				579	}
				580
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	581	leakage_from[0] = band_log2[0];
				582	leakage_to[0] = band_log2[0] - LEAKAGE_OFFSET;
				583	for (b=1;b<NB_TBANDS+1;b++)
				584	{
				585	float leak_slope = LEAKAGE_SLOPE*(tbands[b]-tbands[b-1])/4;
				586	leakage_from[b] = MIN16(leakage_from[b-1]+leak_slope, band_log2[b]);
				587	leakage_to[b] = MAX16(leakage_to[b-1]-leak_slope, band_log2[b]-LEAKAGE_OFFSET);
				588	}
				589	for (b=NB_TBANDS-2;b>=0;b--)
				590	{
				591	float leak_slope = LEAKAGE_SLOPE*(tbands[b+1]-tbands[b])/4;
				592	leakage_from[b] = MIN16(leakage_from[b+1]+leak_slope, leakage_from[b]);
				593	leakage_to[b] = MAX16(leakage_to[b+1]-leak_slope, leakage_to[b]);
				594	}
				595	celt_assert(NB_TBANDS+1 <= LEAK_BANDS);
				596	for (b=0;b<NB_TBANDS+1;b++)
				597	{
				598	/* leak_boost[] is made up of two terms. The first, based on leakage_to[],
				599	represents the boost needed to overcome the amount of analysis leakage
				600	cause in a weaker band b by louder neighbouring bands.
				601	The second, based on leakage_from[], applies to a loud band b for
				602	which the quantization noise causes synthesis leakage to the weaker
				603	neighbouring bands. */
				604	float boost = MAX16(0, leakage_to[b] - band_log2[b]) +
				605	MAX16(0, band_log2[b] - (leakage_from[b]+LEAKAGE_OFFSET));
				606	info->leak_boost[b] = IMIN(255, (int)floor(.5 + 64.f*boost));
				607	}
				608	for (;b<LEAK_BANDS;b++) info->leak_boost[b] = 0;
				609
				610	for (i=0;i<NB_FRAMES;i++)
				611	{
				612	int j;
				613	float mindist = 1e15f;
				614	for (j=0;j<NB_FRAMES;j++)
				615	{
				616	int k;
				617	float dist=0;
				618	for (k=0;k<NB_TBANDS;k++)
				619	{
				620	float tmp;
				621	tmp = tonal->logE[i][k] - tonal->logE[j][k];
				622	dist += tmp*tmp;
				623	}
				624	if (j!=i)
				625	mindist = MIN32(mindist, dist);
				626	}
				627	spec_variability += mindist;
				628	}
				629	spec_variability = (float)sqrt(spec_variability/NB_FRAMES/NB_TBANDS);
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	630	bandwidth_mask = 0;
				631	bandwidth = 0;
				632	maxE = 0;
				633	noise_floor = 5.7e-4f/(1<<(IMAX(0,lsb_depth-8)));
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	634	noise_floor *= noise_floor;
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	635	for (b=0;b<NB_TBANDS;b++)
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	636	{
				637	float E=0;
				638	int band_start, band_end;
				639	/* Keep a margin of 300 Hz for aliasing */
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	640	band_start = tbands[b];
				641	band_end = tbands[b+1];
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	642	for (i=band_start;i<band_end;i++)
				643	{
				644	float binE = out[i].r(float)out[i].r + out[N-i].r(float)out[N-i].r
				645	+ out[i].i(float)out[i].i + out[N-i].i(float)out[N-i].i;
				646	E += binE;
				647	}
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	648	E = SCALE_ENER(E);
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	649	maxE = MAX32(maxE, E);
				650	tonal->meanE[b] = MAX32((1-alphaE2)*tonal->meanE[b], E);
				651	E = MAX32(E, tonal->meanE[b]);
				652	/* Use a simple follower with 13 dB/Bark slope for spreading function */
				653	bandwidth_mask = MAX32(.05f*bandwidth_mask, E);
				654	/* Consider the band "active" only if all these conditions are met:
				655	1) less than 10 dB below the simple follower
				656	2) less than 90 dB below the peak band (maximal masking possible considering
				657	both the ATH and the loudness-dependent slope of the spreading function)
				658	3) above the PCM quantization noise floor
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	659	We use b+1 because the first CELT band isn't included in tbands[]
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	660	*/
				661	if (E>.1bandwidth_mask && E1e9f > maxE && E > noise_floor*(band_end-band_start))
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	662	bandwidth = b+1;
				663	}
				664	/* Special case for the last two bands, for which we don't have spectrum but only
				665	the energy above 12 kHz. */
				666	if (tonal->Fs == 48000) {
				667	float ratio;
				668	float E = hp_ener(1.f/(240240));
				669	ratio = tonal->prev_bandwidth==20 ? 0.03f : 0.07f;
				670	#ifdef FIXED_POINT
				671	/* silk_resampler_down2_hp() shifted right by an extra 8 bits. */
				672	E = 256.f(1.f/Q15ONE)*(1.f/Q15ONE);
				673	#endif
				674	maxE = MAX32(maxE, E);
				675	tonal->meanE[b] = MAX32((1-alphaE2)*tonal->meanE[b], E);
				676	E = MAX32(E, tonal->meanE[b]);
				677	/* Use a simple follower with 13 dB/Bark slope for spreading function */
				678	bandwidth_mask = MAX32(.05f*bandwidth_mask, E);
				679	if (E>ratiobandwidth_mask && E1e9f > maxE && E > noise_floor*160)
				680	bandwidth = 20;
				681	/* This detector is unreliable, so if the bandwidth is close to SWB, assume it's FB. */
				682	if (bandwidth >= 17)
				683	bandwidth = 20;
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	684	}
				685	if (tonal->count<=2)
				686	bandwidth = 20;
				687	frame_loudness = 20*(float)log10(frame_loudness);
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	688	tonal->Etracker = MAX32(tonal->Etracker-.003f, frame_loudness);
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	689	tonal->lowECount *= (1-alphaE);
				690	if (frame_loudness < tonal->Etracker-30)
				691	tonal->lowECount += alphaE;
				692
				693	for (i=0;i<8;i++)
				694	{
				695	float sum=0;
				696	for (b=0;b<16;b++)
				697	sum += dct_table[i16+b]logE[b];
				698	BFCC[i] = sum;
				699	}
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	700	for (i=0;i<8;i++)
				701	{
				702	float sum=0;
				703	for (b=0;b<16;b++)
				704	sum += dct_table[i16+b].5f*(tonal->highE[b]+tonal->lowE[b]);
				705	midE[i] = sum;
				706	}
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	707
				708	frame_stationarity /= NB_TBANDS;
				709	relativeE /= NB_TBANDS;
				710	if (tonal->count<10)
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	711	relativeE = .5f;
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	712	frame_noisiness /= NB_TBANDS;
				713	#if 1
				714	info->activity = frame_noisiness + (1-frame_noisiness)*relativeE;
				715	#else
				716	info->activity = .5*(1+frame_noisiness-frame_stationarity);
				717	#endif
				718	frame_tonality = (max_frame_tonality/(NB_TBANDS-NB_TONAL_SKIP_BANDS));
				719	frame_tonality = MAX16(frame_tonality, tonal->prev_tonality*.8f);
				720	tonal->prev_tonality = frame_tonality;
				721
				722	slope /= 8*8;
				723	info->tonality_slope = slope;
				724
				725	tonal->E_count = (tonal->E_count+1)%NB_FRAMES;
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	726	tonal->count = IMIN(tonal->count+1, ANALYSIS_COUNT_MAX);
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	727	info->tonality = frame_tonality;
				728
				729	for (i=0;i<4;i++)
				730	features[i] = -0.12299f(BFCC[i]+tonal->mem[i+24]) + 0.49195f(tonal->mem[i]+tonal->mem[i+16]) + 0.69693ftonal->mem[i+8] - 1.4349ftonal->cmean[i];
				731
				732	for (i=0;i<4;i++)
				733	tonal->cmean[i] = (1-alpha)tonal->cmean[i] + alphaBFCC[i];
				734
				735	for (i=0;i<4;i++)
				736	features[4+i] = 0.63246f(BFCC[i]-tonal->mem[i+24]) + 0.31623f(tonal->mem[i]-tonal->mem[i+16]);
				737	for (i=0;i<3;i++)
				738	features[8+i] = 0.53452f(BFCC[i]+tonal->mem[i+24]) - 0.26726f(tonal->mem[i]+tonal->mem[i+16]) -0.53452f*tonal->mem[i+8];
				739
				740	if (tonal->count > 5)
				741	{
				742	for (i=0;i<9;i++)
				743	tonal->std[i] = (1-alpha)tonal->std[i] + alphafeatures[i]*features[i];
				744	}
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	745	for (i=0;i<4;i++)
				746	features[i] = BFCC[i]-midE[i];
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	747
				748	for (i=0;i<8;i++)
				749	{
				750	tonal->mem[i+24] = tonal->mem[i+16];
				751	tonal->mem[i+16] = tonal->mem[i+8];
				752	tonal->mem[i+8] = tonal->mem[i];
				753	tonal->mem[i] = BFCC[i];
				754	}
				755	for (i=0;i<9;i++)
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	756	features[11+i] = (float)sqrt(tonal->std[i]) - std_feature_bias[i];
				757	features[18] = spec_variability - 0.78f;
				758	features[20] = info->tonality - 0.154723f;
				759	features[21] = info->activity - 0.724643f;
				760	features[22] = frame_stationarity - 0.743717f;
				761	features[23] = info->tonality_slope + 0.069216f;
				762	features[24] = tonal->lowECount - 0.067930f;
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	763
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	764	mlp_process(&net, features, frame_probs);
				765	frame_probs[0] = .5f*(frame_probs[0]+1);
				766	/* Curve fitting between the MLP probability and the actual probability */
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	767	/frame_probs[0] = .01f + 1.21fframe_probs[0]frame_probs[0] - .23f(float)pow(frame_probs[0], 10);*/
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	768	/* Probability of active audio (as opposed to silence) */
				769	frame_probs[1] = .5f*frame_probs[1]+.5f;
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	770	frame_probs[1] *= frame_probs[1];
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	771
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	772	/* Probability of speech or music vs noise */
				773	info->activity_probability = frame_probs[1];
				774
				775	/printf("%f %f\n", frame_probs[0], frame_probs[1]);/
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	776	{
				777	/* Probability of state transition */
				778	float tau;
				779	/* Represents independence of the MLP probabilities, where
				780	beta=1 means fully independent. */
				781	float beta;
				782	/* Denormalized probability of speech (p0) and music (p1) after update */
				783	float p0, p1;
				784	/* Probabilities for "all speech" and "all music" */
				785	float s0, m0;
				786	/* Probability sum for renormalisation */
				787	float psum;
				788	/* Instantaneous probability of speech and music, with beta pre-applied. */
				789	float speech0;
				790	float music0;
Felicia Lim	d03c373	2016-07-25 20:28:37 +0200	[diff] [blame]	791	float p, q;
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	792
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	793	/* More silence transitions for speech than for music. */
				794	tau = .001ftonal->music_prob + .01f(1-tonal->music_prob);
				795	p = MAX16(.05f,MIN16(.95f,frame_probs[1]));
				796	q = MAX16(.05f,MIN16(.95f,tonal->vad_prob));
				797	beta = .02f+.05fABS16(p-q)/(p(1-q)+q*(1-p));
				798	/* p0 and p1 are the probabilities of speech and music at this frame
				799	using only information from previous frame and applying the
				800	state transition model */
				801	p0 = (1-tonal->vad_prob)(1-tau) + tonal->vad_prob tau;
				802	p1 = tonal->vad_prob (1-tau) + (1-tonal->vad_prob)tau;
				803	/* We apply the current probability with exponent beta to work around
				804	the fact that the probability estimates aren't independent. */
				805	p0 *= (float)pow(1-frame_probs[1], beta);
				806	p1 *= (float)pow(frame_probs[1], beta);
				807	/* Normalise the probabilities to get the Marokv probability of music. */
				808	tonal->vad_prob = p1/(p0+p1);
				809	info->vad_prob = tonal->vad_prob;
				810	/* Consider that silence has a 50-50 probability of being speech or music. */
				811	frame_probs[0] = tonal->vad_probframe_probs[0] + (1-tonal->vad_prob).5f;
				812
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	813	/* One transition every 3 minutes of active audio */
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	814	tau = .0001f;
Felicia Lim	d03c373	2016-07-25 20:28:37 +0200	[diff] [blame]	815	/* Adapt beta based on how "unexpected" the new prob is */
				816	p = MAX16(.05f,MIN16(.95f,frame_probs[0]));
				817	q = MAX16(.05f,MIN16(.95f,tonal->music_prob));
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	818	beta = .02f+.05fABS16(p-q)/(p(1-q)+q*(1-p));
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	819	/* p0 and p1 are the probabilities of speech and music at this frame
				820	using only information from previous frame and applying the
				821	state transition model */
				822	p0 = (1-tonal->music_prob)(1-tau) + tonal->music_prob tau;
				823	p1 = tonal->music_prob (1-tau) + (1-tonal->music_prob)tau;
				824	/* We apply the current probability with exponent beta to work around
				825	the fact that the probability estimates aren't independent. */
				826	p0 *= (float)pow(1-frame_probs[0], beta);
				827	p1 *= (float)pow(frame_probs[0], beta);
				828	/* Normalise the probabilities to get the Marokv probability of music. */
				829	tonal->music_prob = p1/(p0+p1);
				830	info->music_prob = tonal->music_prob;
				831
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	832	/printf("%f %f %f %f\n", frame_probs[0], frame_probs[1], tonal->music_prob, tonal->vad_prob);/
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	833	/* This chunk of code deals with delayed decision. */
				834	psum=1e-20f;
				835	/* Instantaneous probability of speech and music, with beta pre-applied. */
				836	speech0 = (float)pow(1-frame_probs[0], beta);
				837	music0 = (float)pow(frame_probs[0], beta);
				838	if (tonal->count==1)
				839	{
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	840	if (tonal->application == OPUS_APPLICATION_VOIP)
				841	tonal->pmusic[0] = .1f;
				842	else
				843	tonal->pmusic[0] = .625f;
				844	tonal->pspeech[0] = 1-tonal->pmusic[0];
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	845	}
				846	/* Updated probability of having only speech (s0) or only music (m0),
				847	before considering the new observation. */
				848	s0 = tonal->pspeech[0] + tonal->pspeech[1];
				849	m0 = tonal->pmusic [0] + tonal->pmusic [1];
				850	/* Updates s0 and m0 with instantaneous probability. */
				851	tonal->pspeech[0] = s0(1-tau)speech0;
				852	tonal->pmusic [0] = m0(1-tau)music0;
				853	/* Propagate the transition probabilities */
				854	for (i=1;i<DETECT_SIZE-1;i++)
				855	{
				856	tonal->pspeech[i] = tonal->pspeech[i+1]*speech0;
				857	tonal->pmusic [i] = tonal->pmusic [i+1]*music0;
				858	}
				859	/* Probability that the latest frame is speech, when all the previous ones were music. */
				860	tonal->pspeech[DETECT_SIZE-1] = m0tauspeech0;
				861	/* Probability that the latest frame is music, when all the previous ones were speech. */
				862	tonal->pmusic [DETECT_SIZE-1] = s0taumusic0;
				863
				864	/* Renormalise probabilities to 1 */
				865	for (i=0;i<DETECT_SIZE;i++)
				866	psum += tonal->pspeech[i] + tonal->pmusic[i];
				867	psum = 1.f/psum;
				868	for (i=0;i<DETECT_SIZE;i++)
				869	{
				870	tonal->pspeech[i] *= psum;
				871	tonal->pmusic [i] *= psum;
				872	}
				873	psum = tonal->pmusic[0];
				874	for (i=1;i<DETECT_SIZE;i++)
				875	psum += tonal->pspeech[i];
				876
				877	/* Estimate our confidence in the speech/music decisions */
				878	if (frame_probs[1]>.75)
				879	{
				880	if (tonal->music_prob>.9)
				881	{
				882	float adapt;
				883	adapt = 1.f/(++tonal->music_confidence_count);
				884	tonal->music_confidence_count = IMIN(tonal->music_confidence_count, 500);
				885	tonal->music_confidence += adapt*MAX16(-.2f,frame_probs[0]-tonal->music_confidence);
				886	}
				887	if (tonal->music_prob<.1)
				888	{
				889	float adapt;
				890	adapt = 1.f/(++tonal->speech_confidence_count);
				891	tonal->speech_confidence_count = IMIN(tonal->speech_confidence_count, 500);
				892	tonal->speech_confidence += adapt*MIN16(.2f,frame_probs[0]-tonal->speech_confidence);
				893	}
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	894	}
				895	}
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	896	tonal->last_music = tonal->music_prob>.5f;
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	897	#ifdef MLP_TRAINING
				898	for (i=0;i<25;i++)
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	899	printf("%f ", features[i]);
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	900	printf("\n");
				901	#endif
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	902
				903	info->bandwidth = bandwidth;
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	904	tonal->prev_bandwidth = bandwidth;
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	905	/printf("%d %d\n", info->bandwidth, info->opus_bandwidth);/
				906	info->noisiness = frame_noisiness;
				907	info->valid = 1;
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	908	RESTORE_STACK;
				909	}
				910
				911	void run_analysis(TonalityAnalysisState analysis, const CELTMode celt_mode, const void *analysis_pcm,
				912	int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs,
				913	int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info)
				914	{
				915	int offset;
				916	int pcm_len;
				917
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	918	analysis_frame_size -= analysis_frame_size&1;
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	919	if (analysis_pcm != NULL)
				920	{
				921	/* Avoid overflow/wrap-around of the analysis buffer */
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	922	analysis_frame_size = IMIN((DETECT_SIZE-5)*Fs/50, analysis_frame_size);
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	923
				924	pcm_len = analysis_frame_size - analysis->analysis_offset;
				925	offset = analysis->analysis_offset;
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	926	while (pcm_len>0) {
				927	tonality_analysis(analysis, celt_mode, analysis_pcm, IMIN(Fs/50, pcm_len), offset, c1, c2, C, lsb_depth, downmix);
				928	offset += Fs/50;
				929	pcm_len -= Fs/50;
				930	}
Vignesh Venkatasubramanian	2bd8b54	2014-02-20 10:50:35 -0800	[diff] [blame]	931	analysis->analysis_offset = analysis_frame_size;
				932
				933	analysis->analysis_offset -= frame_size;
				934	}
				935
				936	analysis_info->valid = 0;
				937	tonality_get_info(analysis, analysis_info, frame_size);
				938	}
Felicia Lim	0c2090c	2017-07-05 17:36:56 -0700	[diff] [blame^]	939
				940	#endif /* DISABLE_FLOAT_API */