blob: df2c6918ee2e971fac1b173da21934055d4ec908 [file] [log] [blame]
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -05001/* Copyright (c) 2011 Xiph.Org Foundation
2 Written by Jean-Marc Valin */
3/*
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
19 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*/
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31
32#include "kiss_fft.h"
33#include "celt.h"
34#include "modes.h"
35#include "arch.h"
36#include "quant_bands.h"
37#include <stdio.h>
Jean-Marc Valin9987a3b2011-11-17 19:21:07 +080038#ifndef FIXED_POINT
39#include "mlp.c"
40#include "mlp_data.c"
41#endif
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -050042
Jean-Marc Valin73eb3632011-11-16 10:47:15 +080043#ifndef M_PI
44#define M_PI 3.141592653
45#endif
46
47float dct_table[128] = {
48 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000,
49 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000,
50 0.351851, 0.338330, 0.311806, 0.273300, 0.224292, 0.166664, 0.102631, 0.034654,
51 -0.034654, -0.102631, -0.166664, -0.224292, -0.273300, -0.311806, -0.338330, -0.351851,
52 0.346760, 0.293969, 0.196424, 0.068975, -0.068975, -0.196424, -0.293969, -0.346760,
53 -0.346760, -0.293969, -0.196424, -0.068975, 0.068975, 0.196424, 0.293969, 0.346760,
54 0.338330, 0.224292, 0.034654, -0.166664, -0.311806, -0.351851, -0.273300, -0.102631,
55 0.102631, 0.273300, 0.351851, 0.311806, 0.166664, -0.034654, -0.224292, -0.338330,
56 0.326641, 0.135299, -0.135299, -0.326641, -0.326641, -0.135299, 0.135299, 0.326641,
57 0.326641, 0.135299, -0.135299, -0.326641, -0.326641, -0.135299, 0.135299, 0.326641,
58 0.311806, 0.034654, -0.273300, -0.338330, -0.102631, 0.224292, 0.351851, 0.166664,
59 -0.166664, -0.351851, -0.224292, 0.102631, 0.338330, 0.273300, -0.034654, -0.311806,
60 0.293969, -0.068975, -0.346760, -0.196424, 0.196424, 0.346760, 0.068975, -0.293969,
61 -0.293969, 0.068975, 0.346760, 0.196424, -0.196424, -0.346760, -0.068975, 0.293969,
62 0.273300, -0.166664, -0.338330, 0.034654, 0.351851, 0.102631, -0.311806, -0.224292,
63 0.224292, 0.311806, -0.102631, -0.351851, -0.034654, 0.338330, 0.166664, -0.273300,
64};
65
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -050066#define NB_FRAMES 8
67
Jean-Marc Valin73eb3632011-11-16 10:47:15 +080068#define NB_TBANDS 18
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -050069static const int tbands[NB_TBANDS+1] = {
Jean-Marc Valin73eb3632011-11-16 10:47:15 +080070 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -050071};
72
Jean-Marc Valin70d90d12011-11-28 14:17:47 -050073#define NB_TONAL_SKIP_BANDS 0
Jean-Marc Valin73eb3632011-11-16 10:47:15 +080074
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -050075typedef struct {
76 float angle[240];
77 float d_angle[240];
78 float d2_angle[240];
79 float prev_band_tonality[NB_TBANDS];
80 float prev_tonality;
81 float E[NB_FRAMES][NB_TBANDS];
Jean-Marc Valine9c353a2011-11-14 17:58:29 +080082 float lowE[NB_TBANDS], highE[NB_TBANDS];
Jean-Marc Valina2054572011-11-25 23:07:46 -050083 float meanE[NB_TBANDS], meanRE[NB_TBANDS];
Jean-Marc Valin73eb3632011-11-16 10:47:15 +080084 float mem[32];
Jean-Marc Valinac3ac4a2011-11-21 14:35:41 -050085 float cmean[8];
Jean-Marc Valin747c8172011-11-22 22:44:56 -050086 float std[9];
87 float music_prob;
Jean-Marc Valin7609b672011-11-23 13:52:44 -050088 float Etracker;
89 float lowECount;
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -050090 int E_count;
Jean-Marc Valin7609b672011-11-23 13:52:44 -050091 int last_music;
92 int last_transition;
Jean-Marc Valine9c353a2011-11-14 17:58:29 +080093 int count;
Jean-Marc Valina2054572011-11-25 23:07:46 -050094 int opus_bandwidth;
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -050095} TonalityAnalysisState;
96
Jean-Marc Valine9c353a2011-11-14 17:58:29 +080097void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEncoder *celt_enc, const opus_val16 *x, int C)
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -050098{
99 int i, b;
100 const CELTMode *mode;
101 const kiss_fft_state *kfft;
102 kiss_fft_cpx in[480], out[480];
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500103 int N = 480, N2=240;
104 float * restrict A = tonal->angle;
105 float * restrict dA = tonal->d_angle;
106 float * restrict d2A = tonal->d2_angle;
107 float tonality[240];
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800108 float noisiness[240];
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500109 float band_tonality[NB_TBANDS];
Jean-Marc Valin73eb3632011-11-16 10:47:15 +0800110 float logE[NB_TBANDS];
111 float BFCC[8];
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500112 float features[100];
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500113 float frame_tonality;
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800114 float frame_noisiness;
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500115 const float pi4 = M_PI*M_PI*M_PI*M_PI;
116 float slope=0;
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800117 float frame_stationarity;
118 float relativeE;
Jean-Marc Valin9987a3b2011-11-17 19:21:07 +0800119 float frame_prob;
Jean-Marc Valina2054572011-11-25 23:07:46 -0500120 float alpha, alphaE, alphaE2;
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500121 float frame_loudness;
Jean-Marc Valina2054572011-11-25 23:07:46 -0500122 float bandwidth_mask;
123 int bandwidth=0;
124 float bandE[NB_TBANDS];
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500125 celt_encoder_ctl(celt_enc, CELT_GET_MODE(&mode));
126
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500127 tonal->last_transition++;
Jean-Marc Valin747c8172011-11-22 22:44:56 -0500128 alpha = 1.f/IMIN(20, 1+tonal->count);
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500129 alphaE = 1.f/IMIN(50, 1+tonal->count);
Jean-Marc Valina2054572011-11-25 23:07:46 -0500130 alphaE2 = 1.f/IMIN(6000, 1+tonal->count);
Jean-Marc Valin747c8172011-11-22 22:44:56 -0500131
132 if (tonal->count<4)
133 tonal->music_prob = .5;
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500134 kfft = mode->mdct.kfft[0];
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500135 if (C==1)
136 {
137 for (i=0;i<N2;i++)
138 {
139 float w = .5-.5*cos(M_PI*(i+1)/N2);
140 in[i].r = MULT16_16(w, x[i]);
141 in[i].i = MULT16_16(w, x[N-N2+i]);
142 in[N-i-1].r = MULT16_16(w, x[N-i-1]);
143 in[N-i-1].i = MULT16_16(w, x[2*N-N2-i-1]);
144 }
145 } else {
146 for (i=0;i<N2;i++)
147 {
148 float w = .5-.5*cos(M_PI*(i+1)/N2);
149 in[i].r = MULT16_16(w, x[2*i]+x[2*i+1]);
150 in[i].i = MULT16_16(w, x[2*(N-N2+i)]+x[2*(N-N2+i)+1]);
151 in[N-i-1].r = MULT16_16(w, x[2*(N-i-1)]+x[2*(N-i-1)+1]);
152 in[N-i-1].i = MULT16_16(w, x[2*(2*N-N2-i-1)]+x[2*(2*N-N2-i-1)+1]);
153 }
154 }
155 opus_fft(kfft, in, out);
156
157 for (i=1;i<N2;i++)
158 {
159 float X1r, X2r, X1i, X2i;
160 float angle, d_angle, d2_angle;
161 float angle2, d_angle2, d2_angle2;
162 float mod1, mod2, avg_mod;
163 X1r = out[i].r+out[N-i].r;
164 X1i = out[i].i-out[N-i].i;
165 X2r = out[i].i+out[N-i].i;
166 X2i = out[N-i].r-out[i].r;
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800167
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500168 angle = (.5/M_PI)*atan2(X1i, X1r);
169 d_angle = angle - A[i];
170 d2_angle = d_angle - dA[i];
171
172 angle2 = (.5/M_PI)*atan2(X2i, X2r);
173 d_angle2 = angle2 - angle;
174 d2_angle2 = d_angle2 - d_angle;
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500175
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500176 mod1 = d2_angle - floor(.5+d2_angle);
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800177 noisiness[i] = fabs(mod1);
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500178 mod1 *= mod1;
179 mod1 *= mod1;
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800180
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500181 mod2 = d2_angle2 - floor(.5+d2_angle2);
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800182 noisiness[i] += fabs(mod2);
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500183 mod2 *= mod2;
184 mod2 *= mod2;
185
186 avg_mod = .25*(d2A[i]+2*mod1+mod2);
187 tonality[i] = 1./(1+40*16*pi4*avg_mod)-.015;
188
189 A[i] = angle2;
190 dA[i] = d_angle2;
191 d2A[i] = mod2;
192 }
193
194 frame_tonality = 0;
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800195 info->activity = 0;
196 frame_noisiness = 0;
197 frame_stationarity = 0;
198 if (!tonal->count)
199 {
200 for (b=0;b<NB_TBANDS;b++)
201 {
202 tonal->lowE[b] = 1e10;
203 tonal->highE[b] = -1e10;
204 }
205 }
206 relativeE = 0;
207 info->boost_amount[0]=info->boost_amount[1]=0;
208 info->boost_band[0]=info->boost_band[1]=0;
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500209 frame_loudness = 0;
Jean-Marc Valina2054572011-11-25 23:07:46 -0500210 bandwidth_mask = 0;
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500211 for (b=0;b<NB_TBANDS;b++)
212 {
Jean-Marc Valin73eb3632011-11-16 10:47:15 +0800213 float E=0, tE=0, nE=0;
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500214 float L1, L2;
215 float stationarity;
216 for (i=tbands[b];i<tbands[b+1];i++)
217 {
218 float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r
219 + out[i].i*out[i].i + out[N-i].i*out[N-i].i;
220 E += binE;
221 tE += binE*tonality[i];
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800222 nE += binE*2*(.5-noisiness[i]);
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500223 }
Jean-Marc Valina2054572011-11-25 23:07:46 -0500224 bandE[b] = E;
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500225 tonal->E[tonal->E_count][b] = E;
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800226 frame_noisiness += nE/(1e-15+E);
227
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500228 frame_loudness += sqrt(E+1e-10);
229 /* Add a reasonable noise floor */
Jean-Marc Valina2054572011-11-25 23:07:46 -0500230 tonal->meanE[b] = (1-alphaE2)*tonal->meanE[b] + alphaE2*E;
231 tonal->meanRE[b] = (1-alphaE2)*tonal->meanRE[b] + alphaE2*sqrt(E);
232 /* 13 dB slope for spreading function */
233 bandwidth_mask = MAX32(.05*bandwidth_mask, E);
234 /* Checks if band looks like stationary noise or if it's below a (trivial) masking curve */
235 if (tonal->meanRE[b]*tonal->meanRE[b] < tonal->meanE[b]*.95 && E>.1*bandwidth_mask)
236 bandwidth = b;
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500237 logE[b] = log(E+1e-10);
Jean-Marc Valin73eb3632011-11-16 10:47:15 +0800238 tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01);
239 tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1);
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800240 if (tonal->highE[b] < tonal->lowE[b]+1)
241 {
242 tonal->highE[b]+=.5;
243 tonal->lowE[b]-=.5;
244 }
Jean-Marc Valin73eb3632011-11-16 10:47:15 +0800245 relativeE += (logE[b]-tonal->lowE[b])/(EPSILON+tonal->highE[b]-tonal->lowE[b]);
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800246
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500247 L1=L2=0;
248 for (i=0;i<NB_FRAMES;i++)
249 {
250 L1 += sqrt(tonal->E[i][b]);
251 L2 += tonal->E[i][b];
252 }
253
254 stationarity = MIN16(0.99,L1/sqrt(EPSILON+NB_FRAMES*L2));
255 stationarity *= stationarity;
256 stationarity *= stationarity;
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800257 frame_stationarity += stationarity;
258 /*band_tonality[b] = tE/(1e-15+E)*/;
259 band_tonality[b] = MAX16(tE/(EPSILON+E), stationarity*tonal->prev_band_tonality[b]);
Jean-Marc Valin73eb3632011-11-16 10:47:15 +0800260 if (b>=NB_TONAL_SKIP_BANDS)
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500261 frame_tonality += band_tonality[b];
262 slope += band_tonality[b]*(b-8);
Jean-Marc Valin70d90d12011-11-28 14:17:47 -0500263 /*printf("%f %f ", band_tonality[b], stationarity);*/
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800264 if (band_tonality[b] > info->boost_amount[1] && b>=7 && b < NB_TBANDS-1)
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500265 {
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800266 if (band_tonality[b] > info->boost_amount[0])
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500267 {
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800268 info->boost_amount[1] = info->boost_amount[0];
269 info->boost_band[1] = info->boost_band[0];
270 info->boost_amount[0] = band_tonality[b];
271 info->boost_band[0] = b;
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500272 } else {
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800273 info->boost_amount[1] = band_tonality[b];
274 info->boost_band[1] = b;
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500275 }
276 }
277 tonal->prev_band_tonality[b] = band_tonality[b];
278 }
Jean-Marc Valina2054572011-11-25 23:07:46 -0500279
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500280 frame_loudness = 20*log10(frame_loudness);
281 tonal->Etracker = MAX32(tonal->Etracker-.03, frame_loudness);
282 tonal->lowECount *= (1-alphaE);
283 if (frame_loudness < tonal->Etracker-30)
284 tonal->lowECount += alphaE;
Jean-Marc Valin73eb3632011-11-16 10:47:15 +0800285
286 for (i=0;i<8;i++)
287 {
288 float sum=0;
289 for (b=0;b<16;b++)
290 sum += dct_table[i*16+b]*logE[b];
291 BFCC[i] = sum;
292 }
293
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800294 frame_stationarity /= NB_TBANDS;
295 relativeE /= NB_TBANDS;
296 if (tonal->count<10)
297 relativeE = .5;
298 frame_noisiness /= NB_TBANDS;
299#if 1
300 info->activity = frame_noisiness + (1-frame_noisiness)*relativeE;
301#else
302 info->activity = .5*(1+frame_noisiness-frame_stationarity);
303#endif
Jean-Marc Valin73eb3632011-11-16 10:47:15 +0800304 frame_tonality /= NB_TBANDS-NB_TONAL_SKIP_BANDS;
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500305 frame_tonality = MAX16(frame_tonality, tonal->prev_tonality*.8);
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500306 tonal->prev_tonality = frame_tonality;
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800307 info->boost_amount[0] -= frame_tonality+.2;
308 info->boost_amount[1] -= frame_tonality+.2;
309 if (band_tonality[info->boost_band[0]] < band_tonality[info->boost_band[0]+1]+.15
310 || band_tonality[info->boost_band[0]] < band_tonality[info->boost_band[0]-1]+.15)
311 info->boost_amount[0]=0;
312 if (band_tonality[info->boost_band[1]] < band_tonality[info->boost_band[1]+1]+.15
313 || band_tonality[info->boost_band[1]] < band_tonality[info->boost_band[1]-1]+.15)
314 info->boost_amount[1]=0;
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500315
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500316 slope /= 8*8;
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800317 info->tonality_slope = slope;
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500318
319 tonal->E_count = (tonal->E_count+1)%NB_FRAMES;
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800320 tonal->count++;
321 info->tonality = frame_tonality;
Jean-Marc Valin73eb3632011-11-16 10:47:15 +0800322
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500323 for (i=0;i<4;i++)
Jean-Marc Valinac3ac4a2011-11-21 14:35:41 -0500324 features[i] = -0.12299*(BFCC[i]+tonal->mem[i+24]) + 0.49195*(tonal->mem[i]+tonal->mem[i+16]) + 0.69693*tonal->mem[i+8] - 1.4349*tonal->cmean[i];
Jean-Marc Valin747c8172011-11-22 22:44:56 -0500325
Jean-Marc Valincd213ea2011-11-21 21:57:10 -0500326 for (i=0;i<4;i++)
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500327 tonal->cmean[i] = (1-alpha)*tonal->cmean[i] + alpha*BFCC[i];
328
329 for (i=0;i<4;i++)
330 features[4+i] = 0.63246*(BFCC[i]-tonal->mem[i+24]) + 0.31623*(tonal->mem[i]-tonal->mem[i+16]);
331 for (i=0;i<3;i++)
332 features[8+i] = 0.53452*(BFCC[i]+tonal->mem[i+24]) - 0.26726*(tonal->mem[i]+tonal->mem[i+16]) -0.53452*tonal->mem[i+8];
Jean-Marc Valincd213ea2011-11-21 21:57:10 -0500333
Jean-Marc Valin747c8172011-11-22 22:44:56 -0500334 if (tonal->count > 5)
335 {
336 for (i=0;i<9;i++)
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500337 tonal->std[i] = (1-alpha)*tonal->std[i] + alpha*features[i]*features[i];
Jean-Marc Valin747c8172011-11-22 22:44:56 -0500338 }
339
Jean-Marc Valin73eb3632011-11-16 10:47:15 +0800340 for (i=0;i<8;i++)
341 {
342 tonal->mem[i+24] = tonal->mem[i+16];
343 tonal->mem[i+16] = tonal->mem[i+8];
344 tonal->mem[i+8] = tonal->mem[i];
345 tonal->mem[i] = BFCC[i];
346 }
Jean-Marc Valin747c8172011-11-22 22:44:56 -0500347 for (i=0;i<9;i++)
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500348 features[11+i] = sqrt(tonal->std[i]);
349 features[20] = info->tonality;
350 features[21] = info->activity;
351 features[22] = frame_stationarity;
352 features[23] = info->tonality_slope;
353 features[24] = tonal->lowECount;
354
Jean-Marc Valin9987a3b2011-11-17 19:21:07 +0800355#ifndef FIXED_POINT
356 mlp_process(&net, features, &frame_prob);
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500357 /* Adds a "probability dead zone", with a cap on certainty */
358 frame_prob = .90*frame_prob*frame_prob*frame_prob;
359
Jean-Marc Valin9987a3b2011-11-17 19:21:07 +0800360 frame_prob = .5*(frame_prob+1);
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500361
Jean-Marc Valin9987a3b2011-11-17 19:21:07 +0800362 /*printf("%f\n", frame_prob);*/
363 {
Jean-Marc Valin747c8172011-11-22 22:44:56 -0500364 float tau, beta;
Jean-Marc Valin9987a3b2011-11-17 19:21:07 +0800365 float p0, p1;
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500366 float max_certainty;
367 /* One transition every 3 minutes */
368 tau = .00005;
Jean-Marc Valin747c8172011-11-22 22:44:56 -0500369 beta = .1;
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500370 max_certainty = 1.f/(10+1*tonal->last_transition);
Jean-Marc Valin747c8172011-11-22 22:44:56 -0500371 p0 = (1-tonal->music_prob)*(1-tau) + tonal->music_prob *tau;
372 p1 = tonal->music_prob *(1-tau) + (1-tonal->music_prob)*tau;
Jean-Marc Valin9987a3b2011-11-17 19:21:07 +0800373 p0 *= pow(1-frame_prob, beta);
374 p1 *= pow(frame_prob, beta);
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500375 tonal->music_prob = MAX16(max_certainty, MIN16(1-max_certainty, p1/(p0+p1)));
Jean-Marc Valin747c8172011-11-22 22:44:56 -0500376 info->music_prob = tonal->music_prob;
377 /*printf("%f %f\n", frame_prob, info->music_prob);*/
Jean-Marc Valin9987a3b2011-11-17 19:21:07 +0800378 }
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500379 if (tonal->last_music != (tonal->music_prob>.5))
380 tonal->last_transition=0;
381 tonal->last_music = tonal->music_prob>.5;
Jean-Marc Valin9987a3b2011-11-17 19:21:07 +0800382#else
383 info->music_prob = 0;
384#endif
Jean-Marc Valin7609b672011-11-23 13:52:44 -0500385 /*for (i=0;i<25;i++)
Jean-Marc Valin73eb3632011-11-16 10:47:15 +0800386 printf("%f ", features[i]);
387 printf("\n");*/
388
Jean-Marc Valina2054572011-11-25 23:07:46 -0500389 /* FIXME: Can't detect SWB for now because the last band ends at 12 kHz */
390 if (bandwidth == NB_TBANDS-1 || tonal->count<100)
391 {
392 tonal->opus_bandwidth = OPUS_BANDWIDTH_FULLBAND;
393 } else {
394 int close_enough = 0;
395 if (bandE[bandwidth-1] < 3000*bandE[NB_TBANDS-1] && bandwidth < NB_TBANDS-1)
396 close_enough=1;
397 if (bandwidth<=11 || (bandwidth==12 && close_enough))
398 tonal->opus_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
399 else if (bandwidth<=13)
400 tonal->opus_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
401 else if (bandwidth<=15 || (bandwidth==16 && close_enough))
402 tonal->opus_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
403 }
Jean-Marc Valine9c353a2011-11-14 17:58:29 +0800404 info->valid = 1;
Jean-Marc Valin1a2e7652011-11-06 23:27:16 -0500405}