Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 1 | /* Copyright (c) 2011 Xiph.Org Foundation |
| 2 | Written by Jean-Marc Valin */ |
| 3 | /* |
| 4 | Redistribution and use in source and binary forms, with or without |
| 5 | modification, are permitted provided that the following conditions |
| 6 | are met: |
| 7 | |
| 8 | - Redistributions of source code must retain the above copyright |
| 9 | notice, this list of conditions and the following disclaimer. |
| 10 | |
| 11 | - Redistributions in binary form must reproduce the above copyright |
| 12 | notice, this list of conditions and the following disclaimer in the |
| 13 | documentation and/or other materials provided with the distribution. |
| 14 | |
| 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 16 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 17 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 18 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR |
| 19 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 20 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 21 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 22 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
| 23 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
| 24 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| 25 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 26 | */ |
| 27 | |
| 28 | #ifdef HAVE_CONFIG_H |
| 29 | #include "config.h" |
| 30 | #endif |
| 31 | |
| 32 | #include "kiss_fft.h" |
| 33 | #include "celt.h" |
| 34 | #include "modes.h" |
| 35 | #include "arch.h" |
| 36 | #include "quant_bands.h" |
| 37 | #include <stdio.h> |
Jean-Marc Valin | 9987a3b | 2011-11-17 19:21:07 +0800 | [diff] [blame] | 38 | #ifndef FIXED_POINT |
| 39 | #include "mlp.c" |
| 40 | #include "mlp_data.c" |
| 41 | #endif |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 42 | |
Jean-Marc Valin | 73eb363 | 2011-11-16 10:47:15 +0800 | [diff] [blame] | 43 | #ifndef M_PI |
| 44 | #define M_PI 3.141592653 |
| 45 | #endif |
| 46 | |
| 47 | float dct_table[128] = { |
| 48 | 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, |
| 49 | 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, |
| 50 | 0.351851, 0.338330, 0.311806, 0.273300, 0.224292, 0.166664, 0.102631, 0.034654, |
| 51 | -0.034654, -0.102631, -0.166664, -0.224292, -0.273300, -0.311806, -0.338330, -0.351851, |
| 52 | 0.346760, 0.293969, 0.196424, 0.068975, -0.068975, -0.196424, -0.293969, -0.346760, |
| 53 | -0.346760, -0.293969, -0.196424, -0.068975, 0.068975, 0.196424, 0.293969, 0.346760, |
| 54 | 0.338330, 0.224292, 0.034654, -0.166664, -0.311806, -0.351851, -0.273300, -0.102631, |
| 55 | 0.102631, 0.273300, 0.351851, 0.311806, 0.166664, -0.034654, -0.224292, -0.338330, |
| 56 | 0.326641, 0.135299, -0.135299, -0.326641, -0.326641, -0.135299, 0.135299, 0.326641, |
| 57 | 0.326641, 0.135299, -0.135299, -0.326641, -0.326641, -0.135299, 0.135299, 0.326641, |
| 58 | 0.311806, 0.034654, -0.273300, -0.338330, -0.102631, 0.224292, 0.351851, 0.166664, |
| 59 | -0.166664, -0.351851, -0.224292, 0.102631, 0.338330, 0.273300, -0.034654, -0.311806, |
| 60 | 0.293969, -0.068975, -0.346760, -0.196424, 0.196424, 0.346760, 0.068975, -0.293969, |
| 61 | -0.293969, 0.068975, 0.346760, 0.196424, -0.196424, -0.346760, -0.068975, 0.293969, |
| 62 | 0.273300, -0.166664, -0.338330, 0.034654, 0.351851, 0.102631, -0.311806, -0.224292, |
| 63 | 0.224292, 0.311806, -0.102631, -0.351851, -0.034654, 0.338330, 0.166664, -0.273300, |
| 64 | }; |
| 65 | |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 66 | #define NB_FRAMES 8 |
| 67 | |
Jean-Marc Valin | 73eb363 | 2011-11-16 10:47:15 +0800 | [diff] [blame] | 68 | #define NB_TBANDS 18 |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 69 | static const int tbands[NB_TBANDS+1] = { |
Jean-Marc Valin | 971b055 | 2011-12-02 16:08:02 -0500 | [diff] [blame] | 70 | 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120 |
| 71 | }; |
| 72 | |
| 73 | static const float tweight[NB_TBANDS+1] = { |
| 74 | .3, .4, .5, .6, .7, .8, .9, 1., 1., 1., 1., 1., 1., 1., .8, .7, .6, .5 |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 75 | }; |
| 76 | |
Jean-Marc Valin | 70d90d1 | 2011-11-28 14:17:47 -0500 | [diff] [blame] | 77 | #define NB_TONAL_SKIP_BANDS 0 |
Jean-Marc Valin | 73eb363 | 2011-11-16 10:47:15 +0800 | [diff] [blame] | 78 | |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 79 | typedef struct { |
| 80 | float angle[240]; |
| 81 | float d_angle[240]; |
| 82 | float d2_angle[240]; |
| 83 | float prev_band_tonality[NB_TBANDS]; |
| 84 | float prev_tonality; |
| 85 | float E[NB_FRAMES][NB_TBANDS]; |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 86 | float lowE[NB_TBANDS], highE[NB_TBANDS]; |
Jean-Marc Valin | a205457 | 2011-11-25 23:07:46 -0500 | [diff] [blame] | 87 | float meanE[NB_TBANDS], meanRE[NB_TBANDS]; |
Jean-Marc Valin | 73eb363 | 2011-11-16 10:47:15 +0800 | [diff] [blame] | 88 | float mem[32]; |
Jean-Marc Valin | ac3ac4a | 2011-11-21 14:35:41 -0500 | [diff] [blame] | 89 | float cmean[8]; |
Jean-Marc Valin | 747c817 | 2011-11-22 22:44:56 -0500 | [diff] [blame] | 90 | float std[9]; |
| 91 | float music_prob; |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 92 | float Etracker; |
| 93 | float lowECount; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 94 | int E_count; |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 95 | int last_music; |
| 96 | int last_transition; |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 97 | int count; |
Jean-Marc Valin | a205457 | 2011-11-25 23:07:46 -0500 | [diff] [blame] | 98 | int opus_bandwidth; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 99 | } TonalityAnalysisState; |
| 100 | |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 101 | void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEncoder *celt_enc, const opus_val16 *x, int C) |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 102 | { |
| 103 | int i, b; |
| 104 | const CELTMode *mode; |
| 105 | const kiss_fft_state *kfft; |
| 106 | kiss_fft_cpx in[480], out[480]; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 107 | int N = 480, N2=240; |
| 108 | float * restrict A = tonal->angle; |
| 109 | float * restrict dA = tonal->d_angle; |
| 110 | float * restrict d2A = tonal->d2_angle; |
| 111 | float tonality[240]; |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 112 | float noisiness[240]; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 113 | float band_tonality[NB_TBANDS]; |
Jean-Marc Valin | 73eb363 | 2011-11-16 10:47:15 +0800 | [diff] [blame] | 114 | float logE[NB_TBANDS]; |
| 115 | float BFCC[8]; |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 116 | float features[100]; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 117 | float frame_tonality; |
Jean-Marc Valin | 971b055 | 2011-12-02 16:08:02 -0500 | [diff] [blame] | 118 | float max_frame_tonality; |
| 119 | float tw_sum=0; |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 120 | float frame_noisiness; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 121 | const float pi4 = M_PI*M_PI*M_PI*M_PI; |
| 122 | float slope=0; |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 123 | float frame_stationarity; |
| 124 | float relativeE; |
Jean-Marc Valin | 9987a3b | 2011-11-17 19:21:07 +0800 | [diff] [blame] | 125 | float frame_prob; |
Jean-Marc Valin | a205457 | 2011-11-25 23:07:46 -0500 | [diff] [blame] | 126 | float alpha, alphaE, alphaE2; |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 127 | float frame_loudness; |
Jean-Marc Valin | a205457 | 2011-11-25 23:07:46 -0500 | [diff] [blame] | 128 | float bandwidth_mask; |
| 129 | int bandwidth=0; |
| 130 | float bandE[NB_TBANDS]; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 131 | celt_encoder_ctl(celt_enc, CELT_GET_MODE(&mode)); |
| 132 | |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 133 | tonal->last_transition++; |
Jean-Marc Valin | 747c817 | 2011-11-22 22:44:56 -0500 | [diff] [blame] | 134 | alpha = 1.f/IMIN(20, 1+tonal->count); |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 135 | alphaE = 1.f/IMIN(50, 1+tonal->count); |
Jean-Marc Valin | a205457 | 2011-11-25 23:07:46 -0500 | [diff] [blame] | 136 | alphaE2 = 1.f/IMIN(6000, 1+tonal->count); |
Jean-Marc Valin | 747c817 | 2011-11-22 22:44:56 -0500 | [diff] [blame] | 137 | |
| 138 | if (tonal->count<4) |
| 139 | tonal->music_prob = .5; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 140 | kfft = mode->mdct.kfft[0]; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 141 | if (C==1) |
| 142 | { |
| 143 | for (i=0;i<N2;i++) |
| 144 | { |
| 145 | float w = .5-.5*cos(M_PI*(i+1)/N2); |
| 146 | in[i].r = MULT16_16(w, x[i]); |
| 147 | in[i].i = MULT16_16(w, x[N-N2+i]); |
| 148 | in[N-i-1].r = MULT16_16(w, x[N-i-1]); |
| 149 | in[N-i-1].i = MULT16_16(w, x[2*N-N2-i-1]); |
| 150 | } |
| 151 | } else { |
| 152 | for (i=0;i<N2;i++) |
| 153 | { |
| 154 | float w = .5-.5*cos(M_PI*(i+1)/N2); |
| 155 | in[i].r = MULT16_16(w, x[2*i]+x[2*i+1]); |
| 156 | in[i].i = MULT16_16(w, x[2*(N-N2+i)]+x[2*(N-N2+i)+1]); |
| 157 | in[N-i-1].r = MULT16_16(w, x[2*(N-i-1)]+x[2*(N-i-1)+1]); |
| 158 | in[N-i-1].i = MULT16_16(w, x[2*(2*N-N2-i-1)]+x[2*(2*N-N2-i-1)+1]); |
| 159 | } |
| 160 | } |
| 161 | opus_fft(kfft, in, out); |
| 162 | |
| 163 | for (i=1;i<N2;i++) |
| 164 | { |
| 165 | float X1r, X2r, X1i, X2i; |
| 166 | float angle, d_angle, d2_angle; |
| 167 | float angle2, d_angle2, d2_angle2; |
| 168 | float mod1, mod2, avg_mod; |
| 169 | X1r = out[i].r+out[N-i].r; |
| 170 | X1i = out[i].i-out[N-i].i; |
| 171 | X2r = out[i].i+out[N-i].i; |
| 172 | X2i = out[N-i].r-out[i].r; |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 173 | |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 174 | angle = (.5/M_PI)*atan2(X1i, X1r); |
| 175 | d_angle = angle - A[i]; |
| 176 | d2_angle = d_angle - dA[i]; |
| 177 | |
| 178 | angle2 = (.5/M_PI)*atan2(X2i, X2r); |
| 179 | d_angle2 = angle2 - angle; |
| 180 | d2_angle2 = d_angle2 - d_angle; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 181 | |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 182 | mod1 = d2_angle - floor(.5+d2_angle); |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 183 | noisiness[i] = fabs(mod1); |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 184 | mod1 *= mod1; |
| 185 | mod1 *= mod1; |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 186 | |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 187 | mod2 = d2_angle2 - floor(.5+d2_angle2); |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 188 | noisiness[i] += fabs(mod2); |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 189 | mod2 *= mod2; |
| 190 | mod2 *= mod2; |
| 191 | |
| 192 | avg_mod = .25*(d2A[i]+2*mod1+mod2); |
| 193 | tonality[i] = 1./(1+40*16*pi4*avg_mod)-.015; |
| 194 | |
| 195 | A[i] = angle2; |
| 196 | dA[i] = d_angle2; |
| 197 | d2A[i] = mod2; |
| 198 | } |
| 199 | |
| 200 | frame_tonality = 0; |
Jean-Marc Valin | 971b055 | 2011-12-02 16:08:02 -0500 | [diff] [blame] | 201 | max_frame_tonality = 0; |
| 202 | tw_sum = 0; |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 203 | info->activity = 0; |
| 204 | frame_noisiness = 0; |
| 205 | frame_stationarity = 0; |
| 206 | if (!tonal->count) |
| 207 | { |
| 208 | for (b=0;b<NB_TBANDS;b++) |
| 209 | { |
| 210 | tonal->lowE[b] = 1e10; |
| 211 | tonal->highE[b] = -1e10; |
| 212 | } |
| 213 | } |
| 214 | relativeE = 0; |
| 215 | info->boost_amount[0]=info->boost_amount[1]=0; |
| 216 | info->boost_band[0]=info->boost_band[1]=0; |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 217 | frame_loudness = 0; |
Jean-Marc Valin | a205457 | 2011-11-25 23:07:46 -0500 | [diff] [blame] | 218 | bandwidth_mask = 0; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 219 | for (b=0;b<NB_TBANDS;b++) |
| 220 | { |
Jean-Marc Valin | 73eb363 | 2011-11-16 10:47:15 +0800 | [diff] [blame] | 221 | float E=0, tE=0, nE=0; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 222 | float L1, L2; |
| 223 | float stationarity; |
| 224 | for (i=tbands[b];i<tbands[b+1];i++) |
| 225 | { |
| 226 | float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r |
| 227 | + out[i].i*out[i].i + out[N-i].i*out[N-i].i; |
| 228 | E += binE; |
| 229 | tE += binE*tonality[i]; |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 230 | nE += binE*2*(.5-noisiness[i]); |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 231 | } |
Jean-Marc Valin | a205457 | 2011-11-25 23:07:46 -0500 | [diff] [blame] | 232 | bandE[b] = E; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 233 | tonal->E[tonal->E_count][b] = E; |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 234 | frame_noisiness += nE/(1e-15+E); |
| 235 | |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 236 | frame_loudness += sqrt(E+1e-10); |
| 237 | /* Add a reasonable noise floor */ |
Jean-Marc Valin | a205457 | 2011-11-25 23:07:46 -0500 | [diff] [blame] | 238 | tonal->meanE[b] = (1-alphaE2)*tonal->meanE[b] + alphaE2*E; |
| 239 | tonal->meanRE[b] = (1-alphaE2)*tonal->meanRE[b] + alphaE2*sqrt(E); |
| 240 | /* 13 dB slope for spreading function */ |
| 241 | bandwidth_mask = MAX32(.05*bandwidth_mask, E); |
| 242 | /* Checks if band looks like stationary noise or if it's below a (trivial) masking curve */ |
| 243 | if (tonal->meanRE[b]*tonal->meanRE[b] < tonal->meanE[b]*.95 && E>.1*bandwidth_mask) |
| 244 | bandwidth = b; |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 245 | logE[b] = log(E+1e-10); |
Jean-Marc Valin | 73eb363 | 2011-11-16 10:47:15 +0800 | [diff] [blame] | 246 | tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01); |
| 247 | tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1); |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 248 | if (tonal->highE[b] < tonal->lowE[b]+1) |
| 249 | { |
| 250 | tonal->highE[b]+=.5; |
| 251 | tonal->lowE[b]-=.5; |
| 252 | } |
Jean-Marc Valin | 73eb363 | 2011-11-16 10:47:15 +0800 | [diff] [blame] | 253 | relativeE += (logE[b]-tonal->lowE[b])/(EPSILON+tonal->highE[b]-tonal->lowE[b]); |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 254 | |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 255 | L1=L2=0; |
| 256 | for (i=0;i<NB_FRAMES;i++) |
| 257 | { |
| 258 | L1 += sqrt(tonal->E[i][b]); |
| 259 | L2 += tonal->E[i][b]; |
| 260 | } |
| 261 | |
| 262 | stationarity = MIN16(0.99,L1/sqrt(EPSILON+NB_FRAMES*L2)); |
| 263 | stationarity *= stationarity; |
| 264 | stationarity *= stationarity; |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 265 | frame_stationarity += stationarity; |
| 266 | /*band_tonality[b] = tE/(1e-15+E)*/; |
| 267 | band_tonality[b] = MAX16(tE/(EPSILON+E), stationarity*tonal->prev_band_tonality[b]); |
Jean-Marc Valin | 971b055 | 2011-12-02 16:08:02 -0500 | [diff] [blame] | 268 | //printf("%f ", band_tonality[b]); |
| 269 | #if 1 |
Jean-Marc Valin | 73eb363 | 2011-11-16 10:47:15 +0800 | [diff] [blame] | 270 | if (b>=NB_TONAL_SKIP_BANDS) |
Jean-Marc Valin | 971b055 | 2011-12-02 16:08:02 -0500 | [diff] [blame] | 271 | { |
| 272 | frame_tonality += tweight[b]*band_tonality[b]; |
| 273 | tw_sum += tweight[b]; |
| 274 | } |
| 275 | #else |
| 276 | frame_tonality += band_tonality[b]; |
| 277 | if (b>=NB_TBANDS-NB_TONAL_SKIP_BANDS) |
| 278 | frame_tonality -= band_tonality[b-NB_TBANDS+NB_TONAL_SKIP_BANDS]; |
| 279 | #endif |
| 280 | max_frame_tonality = MAX16(max_frame_tonality, frame_tonality); |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 281 | slope += band_tonality[b]*(b-8); |
Jean-Marc Valin | 70d90d1 | 2011-11-28 14:17:47 -0500 | [diff] [blame] | 282 | /*printf("%f %f ", band_tonality[b], stationarity);*/ |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 283 | if (band_tonality[b] > info->boost_amount[1] && b>=7 && b < NB_TBANDS-1) |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 284 | { |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 285 | if (band_tonality[b] > info->boost_amount[0]) |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 286 | { |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 287 | info->boost_amount[1] = info->boost_amount[0]; |
| 288 | info->boost_band[1] = info->boost_band[0]; |
| 289 | info->boost_amount[0] = band_tonality[b]; |
| 290 | info->boost_band[0] = b; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 291 | } else { |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 292 | info->boost_amount[1] = band_tonality[b]; |
| 293 | info->boost_band[1] = b; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 294 | } |
| 295 | } |
| 296 | tonal->prev_band_tonality[b] = band_tonality[b]; |
| 297 | } |
Jean-Marc Valin | 971b055 | 2011-12-02 16:08:02 -0500 | [diff] [blame] | 298 | //printf("\n"); |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 299 | frame_loudness = 20*log10(frame_loudness); |
| 300 | tonal->Etracker = MAX32(tonal->Etracker-.03, frame_loudness); |
| 301 | tonal->lowECount *= (1-alphaE); |
| 302 | if (frame_loudness < tonal->Etracker-30) |
| 303 | tonal->lowECount += alphaE; |
Jean-Marc Valin | 73eb363 | 2011-11-16 10:47:15 +0800 | [diff] [blame] | 304 | |
| 305 | for (i=0;i<8;i++) |
| 306 | { |
| 307 | float sum=0; |
| 308 | for (b=0;b<16;b++) |
| 309 | sum += dct_table[i*16+b]*logE[b]; |
| 310 | BFCC[i] = sum; |
| 311 | } |
| 312 | |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 313 | frame_stationarity /= NB_TBANDS; |
| 314 | relativeE /= NB_TBANDS; |
| 315 | if (tonal->count<10) |
| 316 | relativeE = .5; |
| 317 | frame_noisiness /= NB_TBANDS; |
| 318 | #if 1 |
| 319 | info->activity = frame_noisiness + (1-frame_noisiness)*relativeE; |
| 320 | #else |
| 321 | info->activity = .5*(1+frame_noisiness-frame_stationarity); |
| 322 | #endif |
Jean-Marc Valin | 971b055 | 2011-12-02 16:08:02 -0500 | [diff] [blame] | 323 | frame_tonality = (max_frame_tonality/(tw_sum)); |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 324 | frame_tonality = MAX16(frame_tonality, tonal->prev_tonality*.8); |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 325 | tonal->prev_tonality = frame_tonality; |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 326 | info->boost_amount[0] -= frame_tonality+.2; |
| 327 | info->boost_amount[1] -= frame_tonality+.2; |
| 328 | if (band_tonality[info->boost_band[0]] < band_tonality[info->boost_band[0]+1]+.15 |
| 329 | || band_tonality[info->boost_band[0]] < band_tonality[info->boost_band[0]-1]+.15) |
| 330 | info->boost_amount[0]=0; |
| 331 | if (band_tonality[info->boost_band[1]] < band_tonality[info->boost_band[1]+1]+.15 |
| 332 | || band_tonality[info->boost_band[1]] < band_tonality[info->boost_band[1]-1]+.15) |
| 333 | info->boost_amount[1]=0; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 334 | |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 335 | slope /= 8*8; |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 336 | info->tonality_slope = slope; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 337 | |
| 338 | tonal->E_count = (tonal->E_count+1)%NB_FRAMES; |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 339 | tonal->count++; |
| 340 | info->tonality = frame_tonality; |
Jean-Marc Valin | 73eb363 | 2011-11-16 10:47:15 +0800 | [diff] [blame] | 341 | |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 342 | for (i=0;i<4;i++) |
Jean-Marc Valin | ac3ac4a | 2011-11-21 14:35:41 -0500 | [diff] [blame] | 343 | features[i] = -0.12299*(BFCC[i]+tonal->mem[i+24]) + 0.49195*(tonal->mem[i]+tonal->mem[i+16]) + 0.69693*tonal->mem[i+8] - 1.4349*tonal->cmean[i]; |
Jean-Marc Valin | 747c817 | 2011-11-22 22:44:56 -0500 | [diff] [blame] | 344 | |
Jean-Marc Valin | cd213ea | 2011-11-21 21:57:10 -0500 | [diff] [blame] | 345 | for (i=0;i<4;i++) |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 346 | tonal->cmean[i] = (1-alpha)*tonal->cmean[i] + alpha*BFCC[i]; |
| 347 | |
| 348 | for (i=0;i<4;i++) |
| 349 | features[4+i] = 0.63246*(BFCC[i]-tonal->mem[i+24]) + 0.31623*(tonal->mem[i]-tonal->mem[i+16]); |
| 350 | for (i=0;i<3;i++) |
| 351 | features[8+i] = 0.53452*(BFCC[i]+tonal->mem[i+24]) - 0.26726*(tonal->mem[i]+tonal->mem[i+16]) -0.53452*tonal->mem[i+8]; |
Jean-Marc Valin | cd213ea | 2011-11-21 21:57:10 -0500 | [diff] [blame] | 352 | |
Jean-Marc Valin | 747c817 | 2011-11-22 22:44:56 -0500 | [diff] [blame] | 353 | if (tonal->count > 5) |
| 354 | { |
| 355 | for (i=0;i<9;i++) |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 356 | tonal->std[i] = (1-alpha)*tonal->std[i] + alpha*features[i]*features[i]; |
Jean-Marc Valin | 747c817 | 2011-11-22 22:44:56 -0500 | [diff] [blame] | 357 | } |
| 358 | |
Jean-Marc Valin | 73eb363 | 2011-11-16 10:47:15 +0800 | [diff] [blame] | 359 | for (i=0;i<8;i++) |
| 360 | { |
| 361 | tonal->mem[i+24] = tonal->mem[i+16]; |
| 362 | tonal->mem[i+16] = tonal->mem[i+8]; |
| 363 | tonal->mem[i+8] = tonal->mem[i]; |
| 364 | tonal->mem[i] = BFCC[i]; |
| 365 | } |
Jean-Marc Valin | 747c817 | 2011-11-22 22:44:56 -0500 | [diff] [blame] | 366 | for (i=0;i<9;i++) |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 367 | features[11+i] = sqrt(tonal->std[i]); |
| 368 | features[20] = info->tonality; |
| 369 | features[21] = info->activity; |
| 370 | features[22] = frame_stationarity; |
| 371 | features[23] = info->tonality_slope; |
| 372 | features[24] = tonal->lowECount; |
| 373 | |
Jean-Marc Valin | 9987a3b | 2011-11-17 19:21:07 +0800 | [diff] [blame] | 374 | #ifndef FIXED_POINT |
| 375 | mlp_process(&net, features, &frame_prob); |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 376 | /* Adds a "probability dead zone", with a cap on certainty */ |
| 377 | frame_prob = .90*frame_prob*frame_prob*frame_prob; |
| 378 | |
Jean-Marc Valin | 9987a3b | 2011-11-17 19:21:07 +0800 | [diff] [blame] | 379 | frame_prob = .5*(frame_prob+1); |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 380 | |
Jean-Marc Valin | 9987a3b | 2011-11-17 19:21:07 +0800 | [diff] [blame] | 381 | /*printf("%f\n", frame_prob);*/ |
| 382 | { |
Jean-Marc Valin | 747c817 | 2011-11-22 22:44:56 -0500 | [diff] [blame] | 383 | float tau, beta; |
Jean-Marc Valin | 9987a3b | 2011-11-17 19:21:07 +0800 | [diff] [blame] | 384 | float p0, p1; |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 385 | float max_certainty; |
| 386 | /* One transition every 3 minutes */ |
| 387 | tau = .00005; |
Jean-Marc Valin | 747c817 | 2011-11-22 22:44:56 -0500 | [diff] [blame] | 388 | beta = .1; |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 389 | max_certainty = 1.f/(10+1*tonal->last_transition); |
Jean-Marc Valin | 747c817 | 2011-11-22 22:44:56 -0500 | [diff] [blame] | 390 | p0 = (1-tonal->music_prob)*(1-tau) + tonal->music_prob *tau; |
| 391 | p1 = tonal->music_prob *(1-tau) + (1-tonal->music_prob)*tau; |
Jean-Marc Valin | 9987a3b | 2011-11-17 19:21:07 +0800 | [diff] [blame] | 392 | p0 *= pow(1-frame_prob, beta); |
| 393 | p1 *= pow(frame_prob, beta); |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 394 | tonal->music_prob = MAX16(max_certainty, MIN16(1-max_certainty, p1/(p0+p1))); |
Jean-Marc Valin | 747c817 | 2011-11-22 22:44:56 -0500 | [diff] [blame] | 395 | info->music_prob = tonal->music_prob; |
| 396 | /*printf("%f %f\n", frame_prob, info->music_prob);*/ |
Jean-Marc Valin | 9987a3b | 2011-11-17 19:21:07 +0800 | [diff] [blame] | 397 | } |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 398 | if (tonal->last_music != (tonal->music_prob>.5)) |
| 399 | tonal->last_transition=0; |
| 400 | tonal->last_music = tonal->music_prob>.5; |
Jean-Marc Valin | 9987a3b | 2011-11-17 19:21:07 +0800 | [diff] [blame] | 401 | #else |
| 402 | info->music_prob = 0; |
| 403 | #endif |
Jean-Marc Valin | 7609b67 | 2011-11-23 13:52:44 -0500 | [diff] [blame] | 404 | /*for (i=0;i<25;i++) |
Jean-Marc Valin | 73eb363 | 2011-11-16 10:47:15 +0800 | [diff] [blame] | 405 | printf("%f ", features[i]); |
| 406 | printf("\n");*/ |
| 407 | |
Jean-Marc Valin | a205457 | 2011-11-25 23:07:46 -0500 | [diff] [blame] | 408 | /* FIXME: Can't detect SWB for now because the last band ends at 12 kHz */ |
| 409 | if (bandwidth == NB_TBANDS-1 || tonal->count<100) |
| 410 | { |
| 411 | tonal->opus_bandwidth = OPUS_BANDWIDTH_FULLBAND; |
| 412 | } else { |
| 413 | int close_enough = 0; |
| 414 | if (bandE[bandwidth-1] < 3000*bandE[NB_TBANDS-1] && bandwidth < NB_TBANDS-1) |
| 415 | close_enough=1; |
| 416 | if (bandwidth<=11 || (bandwidth==12 && close_enough)) |
| 417 | tonal->opus_bandwidth = OPUS_BANDWIDTH_NARROWBAND; |
| 418 | else if (bandwidth<=13) |
| 419 | tonal->opus_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; |
| 420 | else if (bandwidth<=15 || (bandwidth==16 && close_enough)) |
| 421 | tonal->opus_bandwidth = OPUS_BANDWIDTH_WIDEBAND; |
| 422 | } |
Jean-Marc Valin | 2a9fdbc | 2011-12-07 17:39:40 -0500 | [diff] [blame^] | 423 | info->noisiness = frame_noisiness; |
Jean-Marc Valin | e9c353a | 2011-11-14 17:58:29 +0800 | [diff] [blame] | 424 | info->valid = 1; |
Jean-Marc Valin | 1a2e765 | 2011-11-06 23:27:16 -0500 | [diff] [blame] | 425 | } |