blob: 764b370385db61602fbf8da77e5e90b5c376a8d6 [file] [log] [blame]
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -05001/* Copyright (c) 2011 Xiph.Org Foundation
2 Written by Jean-Marc Valin */
3/*
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
19 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*/
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31
32#include "kiss_fft.h"
33#include "celt.h"
34#include "modes.h"
35#include "arch.h"
36#include "quant_bands.h"
37#include <stdio.h>
Jean-Marc Valin9987a3b2011-11-17 19:21:07 +080038#ifndef FIXED_POINT
39#include "mlp.c"
40#include "mlp_data.c"
41#endif
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -050042
Jean-Marc Valin73eb3632011-11-16 10:47:15 +080043#ifndef M_PI
44#define M_PI 3.141592653
45#endif
46
47float dct_table[128] = {
48 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000,
49 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000,
50 0.351851, 0.338330, 0.311806, 0.273300, 0.224292, 0.166664, 0.102631, 0.034654,
51 -0.034654, -0.102631, -0.166664, -0.224292, -0.273300, -0.311806, -0.338330, -0.351851,
52 0.346760, 0.293969, 0.196424, 0.068975, -0.068975, -0.196424, -0.293969, -0.346760,
53 -0.346760, -0.293969, -0.196424, -0.068975, 0.068975, 0.196424, 0.293969, 0.346760,
54 0.338330, 0.224292, 0.034654, -0.166664, -0.311806, -0.351851, -0.273300, -0.102631,
55 0.102631, 0.273300, 0.351851, 0.311806, 0.166664, -0.034654, -0.224292, -0.338330,
56 0.326641, 0.135299, -0.135299, -0.326641, -0.326641, -0.135299, 0.135299, 0.326641,
57 0.326641, 0.135299, -0.135299, -0.326641, -0.326641, -0.135299, 0.135299, 0.326641,
58 0.311806, 0.034654, -0.273300, -0.338330, -0.102631, 0.224292, 0.351851, 0.166664,
59 -0.166664, -0.351851, -0.224292, 0.102631, 0.338330, 0.273300, -0.034654, -0.311806,
60 0.293969, -0.068975, -0.346760, -0.196424, 0.196424, 0.346760, 0.068975, -0.293969,
61 -0.293969, 0.068975, 0.346760, 0.196424, -0.196424, -0.346760, -0.068975, 0.293969,
62 0.273300, -0.166664, -0.338330, 0.034654, 0.351851, 0.102631, -0.311806, -0.224292,
63 0.224292, 0.311806, -0.102631, -0.351851, -0.034654, 0.338330, 0.166664, -0.273300,
64};
65
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -050066#define NB_FRAMES 8
67
Jean-Marc Valin73eb3632011-11-16 10:47:15 +080068#define NB_TBANDS 18
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -050069static const int tbands[NB_TBANDS+1] = {
Jean-Marc Valin971b0552011-12-02 16:08:02 -050070 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120
71};
72
73static const float tweight[NB_TBANDS+1] = {
74 .3, .4, .5, .6, .7, .8, .9, 1., 1., 1., 1., 1., 1., 1., .8, .7, .6, .5
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -050075};
76
Jean-Marc Valin70d90d12011-11-28 14:17:47 -050077#define NB_TONAL_SKIP_BANDS 0
Jean-Marc Valin73eb3632011-11-16 10:47:15 +080078
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -050079typedef struct {
80 float angle[240];
81 float d_angle[240];
82 float d2_angle[240];
83 float prev_band_tonality[NB_TBANDS];
84 float prev_tonality;
85 float E[NB_FRAMES][NB_TBANDS];
Jean-Marc Valine9c353a2011-11-14 17:58:29 +080086 float lowE[NB_TBANDS], highE[NB_TBANDS];
Jean-Marc Valina2054572011-11-25 23:07:46 -050087 float meanE[NB_TBANDS], meanRE[NB_TBANDS];
Jean-Marc Valin73eb3632011-11-16 10:47:15 +080088 float mem[32];
Jean-Marc Valinac3ac4a2011-11-21 14:35:41 -050089 float cmean[8];
Jean-Marc Valin747c8172011-11-22 22:44:56 -050090 float std[9];
91 float music_prob;
Jean-Marc Valin7609b672011-11-23 13:52:44 -050092 float Etracker;
93 float lowECount;
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -050094 int E_count;
Jean-Marc Valin7609b672011-11-23 13:52:44 -050095 int last_music;
96 int last_transition;
Jean-Marc Valine9c353a2011-11-14 17:58:29 +080097 int count;
Jean-Marc Valina2054572011-11-25 23:07:46 -050098 int opus_bandwidth;
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -050099} TonalityAnalysisState;
100
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800101void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEncoder *celt_enc, const opus_val16 *x, int C)
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500102{
103 int i, b;
104 const CELTMode *mode;
105 const kiss_fft_state *kfft;
106 kiss_fft_cpx in[480], out[480];
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500107 int N = 480, N2=240;
108 float * restrict A = tonal->angle;
109 float * restrict dA = tonal->d_angle;
110 float * restrict d2A = tonal->d2_angle;
111 float tonality[240];
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800112 float noisiness[240];
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500113 float band_tonality[NB_TBANDS];
Jean-Marc Valin73eb3632011-11-16 10:47:15 +0800114 float logE[NB_TBANDS];
115 float BFCC[8];
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500116 float features[100];
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500117 float frame_tonality;
Jean-Marc Valin971b0552011-12-02 16:08:02 -0500118 float max_frame_tonality;
119 float tw_sum=0;
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800120 float frame_noisiness;
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500121 const float pi4 = M_PI*M_PI*M_PI*M_PI;
122 float slope=0;
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800123 float frame_stationarity;
124 float relativeE;
Jean-Marc Valin9987a3b2011-11-17 19:21:07 +0800125 float frame_prob;
Jean-Marc Valina2054572011-11-25 23:07:46 -0500126 float alpha, alphaE, alphaE2;
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500127 float frame_loudness;
Jean-Marc Valina2054572011-11-25 23:07:46 -0500128 float bandwidth_mask;
129 int bandwidth=0;
130 float bandE[NB_TBANDS];
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500131 celt_encoder_ctl(celt_enc, CELT_GET_MODE(&mode));
132
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500133 tonal->last_transition++;
Jean-Marc Valin747c8172011-11-22 22:44:56 -0500134 alpha = 1.f/IMIN(20, 1+tonal->count);
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500135 alphaE = 1.f/IMIN(50, 1+tonal->count);
Jean-Marc Valina2054572011-11-25 23:07:46 -0500136 alphaE2 = 1.f/IMIN(6000, 1+tonal->count);
Jean-Marc Valin747c8172011-11-22 22:44:56 -0500137
138 if (tonal->count<4)
139 tonal->music_prob = .5;
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500140 kfft = mode->mdct.kfft[0];
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500141 if (C==1)
142 {
143 for (i=0;i<N2;i++)
144 {
145 float w = .5-.5*cos(M_PI*(i+1)/N2);
146 in[i].r = MULT16_16(w, x[i]);
147 in[i].i = MULT16_16(w, x[N-N2+i]);
148 in[N-i-1].r = MULT16_16(w, x[N-i-1]);
149 in[N-i-1].i = MULT16_16(w, x[2*N-N2-i-1]);
150 }
151 } else {
152 for (i=0;i<N2;i++)
153 {
154 float w = .5-.5*cos(M_PI*(i+1)/N2);
155 in[i].r = MULT16_16(w, x[2*i]+x[2*i+1]);
156 in[i].i = MULT16_16(w, x[2*(N-N2+i)]+x[2*(N-N2+i)+1]);
157 in[N-i-1].r = MULT16_16(w, x[2*(N-i-1)]+x[2*(N-i-1)+1]);
158 in[N-i-1].i = MULT16_16(w, x[2*(2*N-N2-i-1)]+x[2*(2*N-N2-i-1)+1]);
159 }
160 }
161 opus_fft(kfft, in, out);
162
163 for (i=1;i<N2;i++)
164 {
165 float X1r, X2r, X1i, X2i;
166 float angle, d_angle, d2_angle;
167 float angle2, d_angle2, d2_angle2;
168 float mod1, mod2, avg_mod;
169 X1r = out[i].r+out[N-i].r;
170 X1i = out[i].i-out[N-i].i;
171 X2r = out[i].i+out[N-i].i;
172 X2i = out[N-i].r-out[i].r;
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800173
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500174 angle = (.5/M_PI)*atan2(X1i, X1r);
175 d_angle = angle - A[i];
176 d2_angle = d_angle - dA[i];
177
178 angle2 = (.5/M_PI)*atan2(X2i, X2r);
179 d_angle2 = angle2 - angle;
180 d2_angle2 = d_angle2 - d_angle;
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500181
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500182 mod1 = d2_angle - floor(.5+d2_angle);
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800183 noisiness[i] = fabs(mod1);
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500184 mod1 *= mod1;
185 mod1 *= mod1;
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800186
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500187 mod2 = d2_angle2 - floor(.5+d2_angle2);
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800188 noisiness[i] += fabs(mod2);
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500189 mod2 *= mod2;
190 mod2 *= mod2;
191
192 avg_mod = .25*(d2A[i]+2*mod1+mod2);
193 tonality[i] = 1./(1+40*16*pi4*avg_mod)-.015;
194
195 A[i] = angle2;
196 dA[i] = d_angle2;
197 d2A[i] = mod2;
198 }
199
200 frame_tonality = 0;
Jean-Marc Valin971b0552011-12-02 16:08:02 -0500201 max_frame_tonality = 0;
202 tw_sum = 0;
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800203 info->activity = 0;
204 frame_noisiness = 0;
205 frame_stationarity = 0;
206 if (!tonal->count)
207 {
208 for (b=0;b<NB_TBANDS;b++)
209 {
210 tonal->lowE[b] = 1e10;
211 tonal->highE[b] = -1e10;
212 }
213 }
214 relativeE = 0;
215 info->boost_amount[0]=info->boost_amount[1]=0;
216 info->boost_band[0]=info->boost_band[1]=0;
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500217 frame_loudness = 0;
Jean-Marc Valina2054572011-11-25 23:07:46 -0500218 bandwidth_mask = 0;
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500219 for (b=0;b<NB_TBANDS;b++)
220 {
Jean-Marc Valin73eb3632011-11-16 10:47:15 +0800221 float E=0, tE=0, nE=0;
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500222 float L1, L2;
223 float stationarity;
224 for (i=tbands[b];i<tbands[b+1];i++)
225 {
226 float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r
227 + out[i].i*out[i].i + out[N-i].i*out[N-i].i;
228 E += binE;
229 tE += binE*tonality[i];
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800230 nE += binE*2*(.5-noisiness[i]);
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500231 }
Jean-Marc Valina2054572011-11-25 23:07:46 -0500232 bandE[b] = E;
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500233 tonal->E[tonal->E_count][b] = E;
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800234 frame_noisiness += nE/(1e-15+E);
235
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500236 frame_loudness += sqrt(E+1e-10);
237 /* Add a reasonable noise floor */
Jean-Marc Valina2054572011-11-25 23:07:46 -0500238 tonal->meanE[b] = (1-alphaE2)*tonal->meanE[b] + alphaE2*E;
239 tonal->meanRE[b] = (1-alphaE2)*tonal->meanRE[b] + alphaE2*sqrt(E);
240 /* 13 dB slope for spreading function */
241 bandwidth_mask = MAX32(.05*bandwidth_mask, E);
242 /* Checks if band looks like stationary noise or if it's below a (trivial) masking curve */
243 if (tonal->meanRE[b]*tonal->meanRE[b] < tonal->meanE[b]*.95 && E>.1*bandwidth_mask)
244 bandwidth = b;
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500245 logE[b] = log(E+1e-10);
Jean-Marc Valin73eb3632011-11-16 10:47:15 +0800246 tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01);
247 tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1);
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800248 if (tonal->highE[b] < tonal->lowE[b]+1)
249 {
250 tonal->highE[b]+=.5;
251 tonal->lowE[b]-=.5;
252 }
Jean-Marc Valin73eb3632011-11-16 10:47:15 +0800253 relativeE += (logE[b]-tonal->lowE[b])/(EPSILON+tonal->highE[b]-tonal->lowE[b]);
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800254
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500255 L1=L2=0;
256 for (i=0;i<NB_FRAMES;i++)
257 {
258 L1 += sqrt(tonal->E[i][b]);
259 L2 += tonal->E[i][b];
260 }
261
262 stationarity = MIN16(0.99,L1/sqrt(EPSILON+NB_FRAMES*L2));
263 stationarity *= stationarity;
264 stationarity *= stationarity;
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800265 frame_stationarity += stationarity;
266 /*band_tonality[b] = tE/(1e-15+E)*/;
267 band_tonality[b] = MAX16(tE/(EPSILON+E), stationarity*tonal->prev_band_tonality[b]);
Jean-Marc Valin971b0552011-12-02 16:08:02 -0500268 //printf("%f ", band_tonality[b]);
269#if 1
Jean-Marc Valin73eb3632011-11-16 10:47:15 +0800270 if (b>=NB_TONAL_SKIP_BANDS)
Jean-Marc Valin971b0552011-12-02 16:08:02 -0500271 {
272 frame_tonality += tweight[b]*band_tonality[b];
273 tw_sum += tweight[b];
274 }
275#else
276 frame_tonality += band_tonality[b];
277 if (b>=NB_TBANDS-NB_TONAL_SKIP_BANDS)
278 frame_tonality -= band_tonality[b-NB_TBANDS+NB_TONAL_SKIP_BANDS];
279#endif
280 max_frame_tonality = MAX16(max_frame_tonality, frame_tonality);
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500281 slope += band_tonality[b]*(b-8);
Jean-Marc Valin70d90d12011-11-28 14:17:47 -0500282 /*printf("%f %f ", band_tonality[b], stationarity);*/
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800283 if (band_tonality[b] > info->boost_amount[1] && b>=7 && b < NB_TBANDS-1)
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500284 {
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800285 if (band_tonality[b] > info->boost_amount[0])
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500286 {
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800287 info->boost_amount[1] = info->boost_amount[0];
288 info->boost_band[1] = info->boost_band[0];
289 info->boost_amount[0] = band_tonality[b];
290 info->boost_band[0] = b;
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500291 } else {
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800292 info->boost_amount[1] = band_tonality[b];
293 info->boost_band[1] = b;
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500294 }
295 }
296 tonal->prev_band_tonality[b] = band_tonality[b];
297 }
Jean-Marc Valin971b0552011-12-02 16:08:02 -0500298 //printf("\n");
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500299 frame_loudness = 20*log10(frame_loudness);
300 tonal->Etracker = MAX32(tonal->Etracker-.03, frame_loudness);
301 tonal->lowECount *= (1-alphaE);
302 if (frame_loudness < tonal->Etracker-30)
303 tonal->lowECount += alphaE;
Jean-Marc Valin73eb3632011-11-16 10:47:15 +0800304
305 for (i=0;i<8;i++)
306 {
307 float sum=0;
308 for (b=0;b<16;b++)
309 sum += dct_table[i*16+b]*logE[b];
310 BFCC[i] = sum;
311 }
312
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800313 frame_stationarity /= NB_TBANDS;
314 relativeE /= NB_TBANDS;
315 if (tonal->count<10)
316 relativeE = .5;
317 frame_noisiness /= NB_TBANDS;
318#if 1
319 info->activity = frame_noisiness + (1-frame_noisiness)*relativeE;
320#else
321 info->activity = .5*(1+frame_noisiness-frame_stationarity);
322#endif
Jean-Marc Valin971b0552011-12-02 16:08:02 -0500323 frame_tonality = (max_frame_tonality/(tw_sum));
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500324 frame_tonality = MAX16(frame_tonality, tonal->prev_tonality*.8);
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500325 tonal->prev_tonality = frame_tonality;
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800326 info->boost_amount[0] -= frame_tonality+.2;
327 info->boost_amount[1] -= frame_tonality+.2;
328 if (band_tonality[info->boost_band[0]] < band_tonality[info->boost_band[0]+1]+.15
329 || band_tonality[info->boost_band[0]] < band_tonality[info->boost_band[0]-1]+.15)
330 info->boost_amount[0]=0;
331 if (band_tonality[info->boost_band[1]] < band_tonality[info->boost_band[1]+1]+.15
332 || band_tonality[info->boost_band[1]] < band_tonality[info->boost_band[1]-1]+.15)
333 info->boost_amount[1]=0;
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500334
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500335 slope /= 8*8;
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800336 info->tonality_slope = slope;
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500337
338 tonal->E_count = (tonal->E_count+1)%NB_FRAMES;
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800339 tonal->count++;
340 info->tonality = frame_tonality;
Jean-Marc Valin73eb3632011-11-16 10:47:15 +0800341
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500342 for (i=0;i<4;i++)
Jean-Marc Valinac3ac4a2011-11-21 14:35:41 -0500343 features[i] = -0.12299*(BFCC[i]+tonal->mem[i+24]) + 0.49195*(tonal->mem[i]+tonal->mem[i+16]) + 0.69693*tonal->mem[i+8] - 1.4349*tonal->cmean[i];
Jean-Marc Valin747c8172011-11-22 22:44:56 -0500344
Jean-Marc Valincd213ea2011-11-21 21:57:10 -0500345 for (i=0;i<4;i++)
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500346 tonal->cmean[i] = (1-alpha)*tonal->cmean[i] + alpha*BFCC[i];
347
348 for (i=0;i<4;i++)
349 features[4+i] = 0.63246*(BFCC[i]-tonal->mem[i+24]) + 0.31623*(tonal->mem[i]-tonal->mem[i+16]);
350 for (i=0;i<3;i++)
351 features[8+i] = 0.53452*(BFCC[i]+tonal->mem[i+24]) - 0.26726*(tonal->mem[i]+tonal->mem[i+16]) -0.53452*tonal->mem[i+8];
Jean-Marc Valincd213ea2011-11-21 21:57:10 -0500352
Jean-Marc Valin747c8172011-11-22 22:44:56 -0500353 if (tonal->count > 5)
354 {
355 for (i=0;i<9;i++)
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500356 tonal->std[i] = (1-alpha)*tonal->std[i] + alpha*features[i]*features[i];
Jean-Marc Valin747c8172011-11-22 22:44:56 -0500357 }
358
Jean-Marc Valin73eb3632011-11-16 10:47:15 +0800359 for (i=0;i<8;i++)
360 {
361 tonal->mem[i+24] = tonal->mem[i+16];
362 tonal->mem[i+16] = tonal->mem[i+8];
363 tonal->mem[i+8] = tonal->mem[i];
364 tonal->mem[i] = BFCC[i];
365 }
Jean-Marc Valin747c8172011-11-22 22:44:56 -0500366 for (i=0;i<9;i++)
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500367 features[11+i] = sqrt(tonal->std[i]);
368 features[20] = info->tonality;
369 features[21] = info->activity;
370 features[22] = frame_stationarity;
371 features[23] = info->tonality_slope;
372 features[24] = tonal->lowECount;
373
Jean-Marc Valin9987a3b2011-11-17 19:21:07 +0800374#ifndef FIXED_POINT
375 mlp_process(&net, features, &frame_prob);
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500376 /* Adds a "probability dead zone", with a cap on certainty */
377 frame_prob = .90*frame_prob*frame_prob*frame_prob;
378
Jean-Marc Valin9987a3b2011-11-17 19:21:07 +0800379 frame_prob = .5*(frame_prob+1);
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500380
Jean-Marc Valin9987a3b2011-11-17 19:21:07 +0800381 /*printf("%f\n", frame_prob);*/
382 {
Jean-Marc Valin747c8172011-11-22 22:44:56 -0500383 float tau, beta;
Jean-Marc Valin9987a3b2011-11-17 19:21:07 +0800384 float p0, p1;
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500385 float max_certainty;
386 /* One transition every 3 minutes */
387 tau = .00005;
Jean-Marc Valin747c8172011-11-22 22:44:56 -0500388 beta = .1;
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500389 max_certainty = 1.f/(10+1*tonal->last_transition);
Jean-Marc Valin747c8172011-11-22 22:44:56 -0500390 p0 = (1-tonal->music_prob)*(1-tau) + tonal->music_prob *tau;
391 p1 = tonal->music_prob *(1-tau) + (1-tonal->music_prob)*tau;
Jean-Marc Valin9987a3b2011-11-17 19:21:07 +0800392 p0 *= pow(1-frame_prob, beta);
393 p1 *= pow(frame_prob, beta);
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500394 tonal->music_prob = MAX16(max_certainty, MIN16(1-max_certainty, p1/(p0+p1)));
Jean-Marc Valin747c8172011-11-22 22:44:56 -0500395 info->music_prob = tonal->music_prob;
396 /*printf("%f %f\n", frame_prob, info->music_prob);*/
Jean-Marc Valin9987a3b2011-11-17 19:21:07 +0800397 }
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500398 if (tonal->last_music != (tonal->music_prob>.5))
399 tonal->last_transition=0;
400 tonal->last_music = tonal->music_prob>.5;
Jean-Marc Valin9987a3b2011-11-17 19:21:07 +0800401#else
402 info->music_prob = 0;
403#endif
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500404 /*for (i=0;i<25;i++)
Jean-Marc Valin73eb3632011-11-16 10:47:15 +0800405 printf("%f ", features[i]);
406 printf("\n");*/
407
Jean-Marc Valina2054572011-11-25 23:07:46 -0500408 /* FIXME: Can't detect SWB for now because the last band ends at 12 kHz */
409 if (bandwidth == NB_TBANDS-1 || tonal->count<100)
410 {
411 tonal->opus_bandwidth = OPUS_BANDWIDTH_FULLBAND;
412 } else {
413 int close_enough = 0;
414 if (bandE[bandwidth-1] < 3000*bandE[NB_TBANDS-1] && bandwidth < NB_TBANDS-1)
415 close_enough=1;
416 if (bandwidth<=11 || (bandwidth==12 && close_enough))
417 tonal->opus_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
418 else if (bandwidth<=13)
419 tonal->opus_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
420 else if (bandwidth<=15 || (bandwidth==16 && close_enough))
421 tonal->opus_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
422 }
Jean-Marc Valin2a9fdbc2011-12-07 17:39:40 -0500423 info->noisiness = frame_noisiness;
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800424 info->valid = 1;
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500425}