Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 1 | /* Copyright (c) 2011 Xiph.Org Foundation |
| 2 | Written by Jean-Marc Valin */ |
| 3 | /* |
| 4 | Redistribution and use in source and binary forms, with or without |
| 5 | modification, are permitted provided that the following conditions |
| 6 | are met: |
| 7 | |
| 8 | - Redistributions of source code must retain the above copyright |
| 9 | notice, this list of conditions and the following disclaimer. |
| 10 | |
| 11 | - Redistributions in binary form must reproduce the above copyright |
| 12 | notice, this list of conditions and the following disclaimer in the |
| 13 | documentation and/or other materials provided with the distribution. |
| 14 | |
| 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 16 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 17 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 18 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR |
| 19 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 20 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 21 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 22 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
| 23 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
| 24 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| 25 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 26 | */ |
| 27 | |
| 28 | #ifdef HAVE_CONFIG_H |
| 29 | #include "config.h" |
| 30 | #endif |
| 31 | |
| 32 | #include "kiss_fft.h" |
| 33 | #include "celt.h" |
| 34 | #include "modes.h" |
| 35 | #include "arch.h" |
| 36 | #include "quant_bands.h" |
| 37 | #include <stdio.h> |
Jean-Marc Valin | 9987a3b | 2011-11-17 19:21:07 +0800 | [diff] [blame] | 38 | #ifndef FIXED_POINT |
| 39 | #include "mlp.c" |
| 40 | #include "mlp_data.c" |
| 41 | #endif |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 42 | |
Jean-Marc Valin | 73eb363 | 2011-11-16 10:47:15 +0800 | [diff] [blame] | 43 | #ifndef M_PI |
| 44 | #define M_PI 3.141592653 |
| 45 | #endif |
| 46 | |
| 47 | float dct_table[128] = { |
| 48 | 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, |
| 49 | 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, |
| 50 | 0.351851, 0.338330, 0.311806, 0.273300, 0.224292, 0.166664, 0.102631, 0.034654, |
| 51 | -0.034654, -0.102631, -0.166664, -0.224292, -0.273300, -0.311806, -0.338330, -0.351851, |
| 52 | 0.346760, 0.293969, 0.196424, 0.068975, -0.068975, -0.196424, -0.293969, -0.346760, |
| 53 | -0.346760, -0.293969, -0.196424, -0.068975, 0.068975, 0.196424, 0.293969, 0.346760, |
| 54 | 0.338330, 0.224292, 0.034654, -0.166664, -0.311806, -0.351851, -0.273300, -0.102631, |
| 55 | 0.102631, 0.273300, 0.351851, 0.311806, 0.166664, -0.034654, -0.224292, -0.338330, |
| 56 | 0.326641, 0.135299, -0.135299, -0.326641, -0.326641, -0.135299, 0.135299, 0.326641, |
| 57 | 0.326641, 0.135299, -0.135299, -0.326641, -0.326641, -0.135299, 0.135299, 0.326641, |
| 58 | 0.311806, 0.034654, -0.273300, -0.338330, -0.102631, 0.224292, 0.351851, 0.166664, |
| 59 | -0.166664, -0.351851, -0.224292, 0.102631, 0.338330, 0.273300, -0.034654, -0.311806, |
| 60 | 0.293969, -0.068975, -0.346760, -0.196424, 0.196424, 0.346760, 0.068975, -0.293969, |
| 61 | -0.293969, 0.068975, 0.346760, 0.196424, -0.196424, -0.346760, -0.068975, 0.293969, |
| 62 | 0.273300, -0.166664, -0.338330, 0.034654, 0.351851, 0.102631, -0.311806, -0.224292, |
| 63 | 0.224292, 0.311806, -0.102631, -0.351851, -0.034654, 0.338330, 0.166664, -0.273300, |
| 64 | }; |
| 65 | |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 66 | #define NB_FRAMES 8 |
| 67 | |
Jean-Marc Valin | 73eb363 | 2011-11-16 10:47:15 +0800 | [diff] [blame] | 68 | #define NB_TBANDS 18 |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 69 | static const int tbands[NB_TBANDS+1] = { |
Jean-Marc Valin | 73eb363 | 2011-11-16 10:47:15 +0800 | [diff] [blame] | 70 | 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120 |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 71 | }; |
| 72 | |
Jean-Marc Valin | 70d90d1 | 2011-11-28 14:17:47 -0500 | [diff] [blame^] | 73 | #define NB_TONAL_SKIP_BANDS 0 |
Jean-Marc Valin | 73eb363 | 2011-11-16 10:47:15 +0800 | [diff] [blame] | 74 | |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 75 | typedef struct { |
| 76 | float angle[240]; |
| 77 | float d_angle[240]; |
| 78 | float d2_angle[240]; |
| 79 | float prev_band_tonality[NB_TBANDS]; |
| 80 | float prev_tonality; |
| 81 | float E[NB_FRAMES][NB_TBANDS]; |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 82 | float lowE[NB_TBANDS], highE[NB_TBANDS]; |
Jean-Marc Valin | a205457 | 2011-11-25 23:07:46 -0500 | [diff] [blame] | 83 | float meanE[NB_TBANDS], meanRE[NB_TBANDS]; |
Jean-Marc Valin | 73eb363 | 2011-11-16 10:47:15 +0800 | [diff] [blame] | 84 | float mem[32]; |
Jean-Marc Valin | ac3ac4a | 2011-11-21 14:35:41 -0500 | [diff] [blame] | 85 | float cmean[8]; |
Jean-Marc Valin | 747c817 | 2011-11-22 22:44:56 -0500 | [diff] [blame] | 86 | float std[9]; |
| 87 | float music_prob; |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 88 | float Etracker; |
| 89 | float lowECount; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 90 | int E_count; |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 91 | int last_music; |
| 92 | int last_transition; |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 93 | int count; |
Jean-Marc Valin | a205457 | 2011-11-25 23:07:46 -0500 | [diff] [blame] | 94 | int opus_bandwidth; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 95 | } TonalityAnalysisState; |
| 96 | |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 97 | void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEncoder *celt_enc, const opus_val16 *x, int C) |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 98 | { |
| 99 | int i, b; |
| 100 | const CELTMode *mode; |
| 101 | const kiss_fft_state *kfft; |
| 102 | kiss_fft_cpx in[480], out[480]; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 103 | int N = 480, N2=240; |
| 104 | float * restrict A = tonal->angle; |
| 105 | float * restrict dA = tonal->d_angle; |
| 106 | float * restrict d2A = tonal->d2_angle; |
| 107 | float tonality[240]; |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 108 | float noisiness[240]; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 109 | float band_tonality[NB_TBANDS]; |
Jean-Marc Valin | 73eb363 | 2011-11-16 10:47:15 +0800 | [diff] [blame] | 110 | float logE[NB_TBANDS]; |
| 111 | float BFCC[8]; |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 112 | float features[100]; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 113 | float frame_tonality; |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 114 | float frame_noisiness; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 115 | const float pi4 = M_PI*M_PI*M_PI*M_PI; |
| 116 | float slope=0; |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 117 | float frame_stationarity; |
| 118 | float relativeE; |
Jean-Marc Valin | 9987a3b | 2011-11-17 19:21:07 +0800 | [diff] [blame] | 119 | float frame_prob; |
Jean-Marc Valin | a205457 | 2011-11-25 23:07:46 -0500 | [diff] [blame] | 120 | float alpha, alphaE, alphaE2; |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 121 | float frame_loudness; |
Jean-Marc Valin | a205457 | 2011-11-25 23:07:46 -0500 | [diff] [blame] | 122 | float bandwidth_mask; |
| 123 | int bandwidth=0; |
| 124 | float bandE[NB_TBANDS]; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 125 | celt_encoder_ctl(celt_enc, CELT_GET_MODE(&mode)); |
| 126 | |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 127 | tonal->last_transition++; |
Jean-Marc Valin | 747c817 | 2011-11-22 22:44:56 -0500 | [diff] [blame] | 128 | alpha = 1.f/IMIN(20, 1+tonal->count); |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 129 | alphaE = 1.f/IMIN(50, 1+tonal->count); |
Jean-Marc Valin | a205457 | 2011-11-25 23:07:46 -0500 | [diff] [blame] | 130 | alphaE2 = 1.f/IMIN(6000, 1+tonal->count); |
Jean-Marc Valin | 747c817 | 2011-11-22 22:44:56 -0500 | [diff] [blame] | 131 | |
| 132 | if (tonal->count<4) |
| 133 | tonal->music_prob = .5; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 134 | kfft = mode->mdct.kfft[0]; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 135 | if (C==1) |
| 136 | { |
| 137 | for (i=0;i<N2;i++) |
| 138 | { |
| 139 | float w = .5-.5*cos(M_PI*(i+1)/N2); |
| 140 | in[i].r = MULT16_16(w, x[i]); |
| 141 | in[i].i = MULT16_16(w, x[N-N2+i]); |
| 142 | in[N-i-1].r = MULT16_16(w, x[N-i-1]); |
| 143 | in[N-i-1].i = MULT16_16(w, x[2*N-N2-i-1]); |
| 144 | } |
| 145 | } else { |
| 146 | for (i=0;i<N2;i++) |
| 147 | { |
| 148 | float w = .5-.5*cos(M_PI*(i+1)/N2); |
| 149 | in[i].r = MULT16_16(w, x[2*i]+x[2*i+1]); |
| 150 | in[i].i = MULT16_16(w, x[2*(N-N2+i)]+x[2*(N-N2+i)+1]); |
| 151 | in[N-i-1].r = MULT16_16(w, x[2*(N-i-1)]+x[2*(N-i-1)+1]); |
| 152 | in[N-i-1].i = MULT16_16(w, x[2*(2*N-N2-i-1)]+x[2*(2*N-N2-i-1)+1]); |
| 153 | } |
| 154 | } |
| 155 | opus_fft(kfft, in, out); |
| 156 | |
| 157 | for (i=1;i<N2;i++) |
| 158 | { |
| 159 | float X1r, X2r, X1i, X2i; |
| 160 | float angle, d_angle, d2_angle; |
| 161 | float angle2, d_angle2, d2_angle2; |
| 162 | float mod1, mod2, avg_mod; |
| 163 | X1r = out[i].r+out[N-i].r; |
| 164 | X1i = out[i].i-out[N-i].i; |
| 165 | X2r = out[i].i+out[N-i].i; |
| 166 | X2i = out[N-i].r-out[i].r; |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 167 | |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 168 | angle = (.5/M_PI)*atan2(X1i, X1r); |
| 169 | d_angle = angle - A[i]; |
| 170 | d2_angle = d_angle - dA[i]; |
| 171 | |
| 172 | angle2 = (.5/M_PI)*atan2(X2i, X2r); |
| 173 | d_angle2 = angle2 - angle; |
| 174 | d2_angle2 = d_angle2 - d_angle; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 175 | |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 176 | mod1 = d2_angle - floor(.5+d2_angle); |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 177 | noisiness[i] = fabs(mod1); |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 178 | mod1 *= mod1; |
| 179 | mod1 *= mod1; |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 180 | |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 181 | mod2 = d2_angle2 - floor(.5+d2_angle2); |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 182 | noisiness[i] += fabs(mod2); |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 183 | mod2 *= mod2; |
| 184 | mod2 *= mod2; |
| 185 | |
| 186 | avg_mod = .25*(d2A[i]+2*mod1+mod2); |
| 187 | tonality[i] = 1./(1+40*16*pi4*avg_mod)-.015; |
| 188 | |
| 189 | A[i] = angle2; |
| 190 | dA[i] = d_angle2; |
| 191 | d2A[i] = mod2; |
| 192 | } |
| 193 | |
| 194 | frame_tonality = 0; |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 195 | info->activity = 0; |
| 196 | frame_noisiness = 0; |
| 197 | frame_stationarity = 0; |
| 198 | if (!tonal->count) |
| 199 | { |
| 200 | for (b=0;b<NB_TBANDS;b++) |
| 201 | { |
| 202 | tonal->lowE[b] = 1e10; |
| 203 | tonal->highE[b] = -1e10; |
| 204 | } |
| 205 | } |
| 206 | relativeE = 0; |
| 207 | info->boost_amount[0]=info->boost_amount[1]=0; |
| 208 | info->boost_band[0]=info->boost_band[1]=0; |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 209 | frame_loudness = 0; |
Jean-Marc Valin | a205457 | 2011-11-25 23:07:46 -0500 | [diff] [blame] | 210 | bandwidth_mask = 0; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 211 | for (b=0;b<NB_TBANDS;b++) |
| 212 | { |
Jean-Marc Valin | 73eb363 | 2011-11-16 10:47:15 +0800 | [diff] [blame] | 213 | float E=0, tE=0, nE=0; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 214 | float L1, L2; |
| 215 | float stationarity; |
| 216 | for (i=tbands[b];i<tbands[b+1];i++) |
| 217 | { |
| 218 | float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r |
| 219 | + out[i].i*out[i].i + out[N-i].i*out[N-i].i; |
| 220 | E += binE; |
| 221 | tE += binE*tonality[i]; |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 222 | nE += binE*2*(.5-noisiness[i]); |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 223 | } |
Jean-Marc Valin | a205457 | 2011-11-25 23:07:46 -0500 | [diff] [blame] | 224 | bandE[b] = E; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 225 | tonal->E[tonal->E_count][b] = E; |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 226 | frame_noisiness += nE/(1e-15+E); |
| 227 | |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 228 | frame_loudness += sqrt(E+1e-10); |
| 229 | /* Add a reasonable noise floor */ |
Jean-Marc Valin | a205457 | 2011-11-25 23:07:46 -0500 | [diff] [blame] | 230 | tonal->meanE[b] = (1-alphaE2)*tonal->meanE[b] + alphaE2*E; |
| 231 | tonal->meanRE[b] = (1-alphaE2)*tonal->meanRE[b] + alphaE2*sqrt(E); |
| 232 | /* 13 dB slope for spreading function */ |
| 233 | bandwidth_mask = MAX32(.05*bandwidth_mask, E); |
| 234 | /* Checks if band looks like stationary noise or if it's below a (trivial) masking curve */ |
| 235 | if (tonal->meanRE[b]*tonal->meanRE[b] < tonal->meanE[b]*.95 && E>.1*bandwidth_mask) |
| 236 | bandwidth = b; |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 237 | logE[b] = log(E+1e-10); |
Jean-Marc Valin | 73eb363 | 2011-11-16 10:47:15 +0800 | [diff] [blame] | 238 | tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01); |
| 239 | tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1); |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 240 | if (tonal->highE[b] < tonal->lowE[b]+1) |
| 241 | { |
| 242 | tonal->highE[b]+=.5; |
| 243 | tonal->lowE[b]-=.5; |
| 244 | } |
Jean-Marc Valin | 73eb363 | 2011-11-16 10:47:15 +0800 | [diff] [blame] | 245 | relativeE += (logE[b]-tonal->lowE[b])/(EPSILON+tonal->highE[b]-tonal->lowE[b]); |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 246 | |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 247 | L1=L2=0; |
| 248 | for (i=0;i<NB_FRAMES;i++) |
| 249 | { |
| 250 | L1 += sqrt(tonal->E[i][b]); |
| 251 | L2 += tonal->E[i][b]; |
| 252 | } |
| 253 | |
| 254 | stationarity = MIN16(0.99,L1/sqrt(EPSILON+NB_FRAMES*L2)); |
| 255 | stationarity *= stationarity; |
| 256 | stationarity *= stationarity; |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 257 | frame_stationarity += stationarity; |
| 258 | /*band_tonality[b] = tE/(1e-15+E)*/; |
| 259 | band_tonality[b] = MAX16(tE/(EPSILON+E), stationarity*tonal->prev_band_tonality[b]); |
Jean-Marc Valin | 73eb363 | 2011-11-16 10:47:15 +0800 | [diff] [blame] | 260 | if (b>=NB_TONAL_SKIP_BANDS) |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 261 | frame_tonality += band_tonality[b]; |
| 262 | slope += band_tonality[b]*(b-8); |
Jean-Marc Valin | 70d90d1 | 2011-11-28 14:17:47 -0500 | [diff] [blame^] | 263 | /*printf("%f %f ", band_tonality[b], stationarity);*/ |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 264 | if (band_tonality[b] > info->boost_amount[1] && b>=7 && b < NB_TBANDS-1) |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 265 | { |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 266 | if (band_tonality[b] > info->boost_amount[0]) |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 267 | { |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 268 | info->boost_amount[1] = info->boost_amount[0]; |
| 269 | info->boost_band[1] = info->boost_band[0]; |
| 270 | info->boost_amount[0] = band_tonality[b]; |
| 271 | info->boost_band[0] = b; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 272 | } else { |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 273 | info->boost_amount[1] = band_tonality[b]; |
| 274 | info->boost_band[1] = b; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 275 | } |
| 276 | } |
| 277 | tonal->prev_band_tonality[b] = band_tonality[b]; |
| 278 | } |
Jean-Marc Valin | a205457 | 2011-11-25 23:07:46 -0500 | [diff] [blame] | 279 | |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 280 | frame_loudness = 20*log10(frame_loudness); |
| 281 | tonal->Etracker = MAX32(tonal->Etracker-.03, frame_loudness); |
| 282 | tonal->lowECount *= (1-alphaE); |
| 283 | if (frame_loudness < tonal->Etracker-30) |
| 284 | tonal->lowECount += alphaE; |
Jean-Marc Valin | 73eb363 | 2011-11-16 10:47:15 +0800 | [diff] [blame] | 285 | |
| 286 | for (i=0;i<8;i++) |
| 287 | { |
| 288 | float sum=0; |
| 289 | for (b=0;b<16;b++) |
| 290 | sum += dct_table[i*16+b]*logE[b]; |
| 291 | BFCC[i] = sum; |
| 292 | } |
| 293 | |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 294 | frame_stationarity /= NB_TBANDS; |
| 295 | relativeE /= NB_TBANDS; |
| 296 | if (tonal->count<10) |
| 297 | relativeE = .5; |
| 298 | frame_noisiness /= NB_TBANDS; |
| 299 | #if 1 |
| 300 | info->activity = frame_noisiness + (1-frame_noisiness)*relativeE; |
| 301 | #else |
| 302 | info->activity = .5*(1+frame_noisiness-frame_stationarity); |
| 303 | #endif |
Jean-Marc Valin | 73eb363 | 2011-11-16 10:47:15 +0800 | [diff] [blame] | 304 | frame_tonality /= NB_TBANDS-NB_TONAL_SKIP_BANDS; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 305 | frame_tonality = MAX16(frame_tonality, tonal->prev_tonality*.8); |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 306 | tonal->prev_tonality = frame_tonality; |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 307 | info->boost_amount[0] -= frame_tonality+.2; |
| 308 | info->boost_amount[1] -= frame_tonality+.2; |
| 309 | if (band_tonality[info->boost_band[0]] < band_tonality[info->boost_band[0]+1]+.15 |
| 310 | || band_tonality[info->boost_band[0]] < band_tonality[info->boost_band[0]-1]+.15) |
| 311 | info->boost_amount[0]=0; |
| 312 | if (band_tonality[info->boost_band[1]] < band_tonality[info->boost_band[1]+1]+.15 |
| 313 | || band_tonality[info->boost_band[1]] < band_tonality[info->boost_band[1]-1]+.15) |
| 314 | info->boost_amount[1]=0; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 315 | |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 316 | slope /= 8*8; |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 317 | info->tonality_slope = slope; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 318 | |
| 319 | tonal->E_count = (tonal->E_count+1)%NB_FRAMES; |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 320 | tonal->count++; |
| 321 | info->tonality = frame_tonality; |
Jean-Marc Valin | 73eb363 | 2011-11-16 10:47:15 +0800 | [diff] [blame] | 322 | |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 323 | for (i=0;i<4;i++) |
Jean-Marc Valin | ac3ac4a | 2011-11-21 14:35:41 -0500 | [diff] [blame] | 324 | features[i] = -0.12299*(BFCC[i]+tonal->mem[i+24]) + 0.49195*(tonal->mem[i]+tonal->mem[i+16]) + 0.69693*tonal->mem[i+8] - 1.4349*tonal->cmean[i]; |
Jean-Marc Valin | 747c817 | 2011-11-22 22:44:56 -0500 | [diff] [blame] | 325 | |
Jean-Marc Valin | cd213ea | 2011-11-21 21:57:10 -0500 | [diff] [blame] | 326 | for (i=0;i<4;i++) |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 327 | tonal->cmean[i] = (1-alpha)*tonal->cmean[i] + alpha*BFCC[i]; |
| 328 | |
| 329 | for (i=0;i<4;i++) |
| 330 | features[4+i] = 0.63246*(BFCC[i]-tonal->mem[i+24]) + 0.31623*(tonal->mem[i]-tonal->mem[i+16]); |
| 331 | for (i=0;i<3;i++) |
| 332 | features[8+i] = 0.53452*(BFCC[i]+tonal->mem[i+24]) - 0.26726*(tonal->mem[i]+tonal->mem[i+16]) -0.53452*tonal->mem[i+8]; |
Jean-Marc Valin | cd213ea | 2011-11-21 21:57:10 -0500 | [diff] [blame] | 333 | |
Jean-Marc Valin | 747c817 | 2011-11-22 22:44:56 -0500 | [diff] [blame] | 334 | if (tonal->count > 5) |
| 335 | { |
| 336 | for (i=0;i<9;i++) |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 337 | tonal->std[i] = (1-alpha)*tonal->std[i] + alpha*features[i]*features[i]; |
Jean-Marc Valin | 747c817 | 2011-11-22 22:44:56 -0500 | [diff] [blame] | 338 | } |
| 339 | |
Jean-Marc Valin | 73eb363 | 2011-11-16 10:47:15 +0800 | [diff] [blame] | 340 | for (i=0;i<8;i++) |
| 341 | { |
| 342 | tonal->mem[i+24] = tonal->mem[i+16]; |
| 343 | tonal->mem[i+16] = tonal->mem[i+8]; |
| 344 | tonal->mem[i+8] = tonal->mem[i]; |
| 345 | tonal->mem[i] = BFCC[i]; |
| 346 | } |
Jean-Marc Valin | 747c817 | 2011-11-22 22:44:56 -0500 | [diff] [blame] | 347 | for (i=0;i<9;i++) |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 348 | features[11+i] = sqrt(tonal->std[i]); |
| 349 | features[20] = info->tonality; |
| 350 | features[21] = info->activity; |
| 351 | features[22] = frame_stationarity; |
| 352 | features[23] = info->tonality_slope; |
| 353 | features[24] = tonal->lowECount; |
| 354 | |
Jean-Marc Valin | 9987a3b | 2011-11-17 19:21:07 +0800 | [diff] [blame] | 355 | #ifndef FIXED_POINT |
| 356 | mlp_process(&net, features, &frame_prob); |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 357 | /* Adds a "probability dead zone", with a cap on certainty */ |
| 358 | frame_prob = .90*frame_prob*frame_prob*frame_prob; |
| 359 | |
Jean-Marc Valin | 9987a3b | 2011-11-17 19:21:07 +0800 | [diff] [blame] | 360 | frame_prob = .5*(frame_prob+1); |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 361 | |
Jean-Marc Valin | 9987a3b | 2011-11-17 19:21:07 +0800 | [diff] [blame] | 362 | /*printf("%f\n", frame_prob);*/ |
| 363 | { |
Jean-Marc Valin | 747c817 | 2011-11-22 22:44:56 -0500 | [diff] [blame] | 364 | float tau, beta; |
Jean-Marc Valin | 9987a3b | 2011-11-17 19:21:07 +0800 | [diff] [blame] | 365 | float p0, p1; |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 366 | float max_certainty; |
| 367 | /* One transition every 3 minutes */ |
| 368 | tau = .00005; |
Jean-Marc Valin | 747c817 | 2011-11-22 22:44:56 -0500 | [diff] [blame] | 369 | beta = .1; |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 370 | max_certainty = 1.f/(10+1*tonal->last_transition); |
Jean-Marc Valin | 747c817 | 2011-11-22 22:44:56 -0500 | [diff] [blame] | 371 | p0 = (1-tonal->music_prob)*(1-tau) + tonal->music_prob *tau; |
| 372 | p1 = tonal->music_prob *(1-tau) + (1-tonal->music_prob)*tau; |
Jean-Marc Valin | 9987a3b | 2011-11-17 19:21:07 +0800 | [diff] [blame] | 373 | p0 *= pow(1-frame_prob, beta); |
| 374 | p1 *= pow(frame_prob, beta); |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 375 | tonal->music_prob = MAX16(max_certainty, MIN16(1-max_certainty, p1/(p0+p1))); |
Jean-Marc Valin | 747c817 | 2011-11-22 22:44:56 -0500 | [diff] [blame] | 376 | info->music_prob = tonal->music_prob; |
| 377 | /*printf("%f %f\n", frame_prob, info->music_prob);*/ |
Jean-Marc Valin | 9987a3b | 2011-11-17 19:21:07 +0800 | [diff] [blame] | 378 | } |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 379 | if (tonal->last_music != (tonal->music_prob>.5)) |
| 380 | tonal->last_transition=0; |
| 381 | tonal->last_music = tonal->music_prob>.5; |
Jean-Marc Valin | 9987a3b | 2011-11-17 19:21:07 +0800 | [diff] [blame] | 382 | #else |
| 383 | info->music_prob = 0; |
| 384 | #endif |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 385 | /*for (i=0;i<25;i++) |
Jean-Marc Valin | 73eb363 | 2011-11-16 10:47:15 +0800 | [diff] [blame] | 386 | printf("%f ", features[i]); |
| 387 | printf("\n");*/ |
| 388 | |
Jean-Marc Valin | a205457 | 2011-11-25 23:07:46 -0500 | [diff] [blame] | 389 | /* FIXME: Can't detect SWB for now because the last band ends at 12 kHz */ |
| 390 | if (bandwidth == NB_TBANDS-1 || tonal->count<100) |
| 391 | { |
| 392 | tonal->opus_bandwidth = OPUS_BANDWIDTH_FULLBAND; |
| 393 | } else { |
| 394 | int close_enough = 0; |
| 395 | if (bandE[bandwidth-1] < 3000*bandE[NB_TBANDS-1] && bandwidth < NB_TBANDS-1) |
| 396 | close_enough=1; |
| 397 | if (bandwidth<=11 || (bandwidth==12 && close_enough)) |
| 398 | tonal->opus_bandwidth = OPUS_BANDWIDTH_NARROWBAND; |
| 399 | else if (bandwidth<=13) |
| 400 | tonal->opus_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; |
| 401 | else if (bandwidth<=15 || (bandwidth==16 && close_enough)) |
| 402 | tonal->opus_bandwidth = OPUS_BANDWIDTH_WIDEBAND; |
| 403 | } |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 404 | info->valid = 1; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 405 | } |