Vignesh Venkatasubramanian | 2bd8b54 | 2014-02-20 10:50:35 -0800 | [diff] [blame] | 1 | /* Copyright (c) 2007-2008 CSIRO |
| 2 | Copyright (c) 2007-2009 Xiph.Org Foundation |
| 3 | Written by Jean-Marc Valin */ |
| 4 | /* |
| 5 | Redistribution and use in source and binary forms, with or without |
| 6 | modification, are permitted provided that the following conditions |
| 7 | are met: |
| 8 | |
| 9 | - Redistributions of source code must retain the above copyright |
| 10 | notice, this list of conditions and the following disclaimer. |
| 11 | |
| 12 | - Redistributions in binary form must reproduce the above copyright |
| 13 | notice, this list of conditions and the following disclaimer in the |
| 14 | documentation and/or other materials provided with the distribution. |
| 15 | |
| 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 17 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER |
| 20 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 21 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 22 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 23 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
| 24 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
| 25 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| 26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | #ifdef HAVE_CONFIG_H |
| 30 | #include "config.h" |
| 31 | #endif |
| 32 | |
| 33 | #include "mathops.h" |
| 34 | #include "cwrs.h" |
| 35 | #include "vq.h" |
| 36 | #include "arch.h" |
| 37 | #include "os_support.h" |
| 38 | #include "bands.h" |
| 39 | #include "rate.h" |
flim | c91ee5b | 2016-01-26 14:33:44 +0100 | [diff] [blame] | 40 | #include "pitch.h" |
Vignesh Venkatasubramanian | 2bd8b54 | 2014-02-20 10:50:35 -0800 | [diff] [blame] | 41 | |
flim | c91ee5b | 2016-01-26 14:33:44 +0100 | [diff] [blame] | 42 | #ifndef OVERRIDE_vq_exp_rotation1 |
Vignesh Venkatasubramanian | 2bd8b54 | 2014-02-20 10:50:35 -0800 | [diff] [blame] | 43 | static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) |
| 44 | { |
| 45 | int i; |
flim | c91ee5b | 2016-01-26 14:33:44 +0100 | [diff] [blame] | 46 | opus_val16 ms; |
Vignesh Venkatasubramanian | 2bd8b54 | 2014-02-20 10:50:35 -0800 | [diff] [blame] | 47 | celt_norm *Xptr; |
| 48 | Xptr = X; |
flim | c91ee5b | 2016-01-26 14:33:44 +0100 | [diff] [blame] | 49 | ms = NEG16(s); |
Vignesh Venkatasubramanian | 2bd8b54 | 2014-02-20 10:50:35 -0800 | [diff] [blame] | 50 | for (i=0;i<len-stride;i++) |
| 51 | { |
| 52 | celt_norm x1, x2; |
| 53 | x1 = Xptr[0]; |
| 54 | x2 = Xptr[stride]; |
flim | c91ee5b | 2016-01-26 14:33:44 +0100 | [diff] [blame] | 55 | Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); |
| 56 | *Xptr++ = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); |
Vignesh Venkatasubramanian | 2bd8b54 | 2014-02-20 10:50:35 -0800 | [diff] [blame] | 57 | } |
| 58 | Xptr = &X[len-2*stride-1]; |
| 59 | for (i=len-2*stride-1;i>=0;i--) |
| 60 | { |
| 61 | celt_norm x1, x2; |
| 62 | x1 = Xptr[0]; |
| 63 | x2 = Xptr[stride]; |
flim | c91ee5b | 2016-01-26 14:33:44 +0100 | [diff] [blame] | 64 | Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); |
| 65 | *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); |
Vignesh Venkatasubramanian | 2bd8b54 | 2014-02-20 10:50:35 -0800 | [diff] [blame] | 66 | } |
| 67 | } |
flim | c91ee5b | 2016-01-26 14:33:44 +0100 | [diff] [blame] | 68 | #endif /* OVERRIDE_vq_exp_rotation1 */ |
Vignesh Venkatasubramanian | 2bd8b54 | 2014-02-20 10:50:35 -0800 | [diff] [blame] | 69 | |
| 70 | static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread) |
| 71 | { |
| 72 | static const int SPREAD_FACTOR[3]={15,10,5}; |
| 73 | int i; |
| 74 | opus_val16 c, s; |
| 75 | opus_val16 gain, theta; |
| 76 | int stride2=0; |
| 77 | int factor; |
| 78 | |
| 79 | if (2*K>=len || spread==SPREAD_NONE) |
| 80 | return; |
| 81 | factor = SPREAD_FACTOR[spread-1]; |
| 82 | |
| 83 | gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K)); |
| 84 | theta = HALF16(MULT16_16_Q15(gain,gain)); |
| 85 | |
| 86 | c = celt_cos_norm(EXTEND32(theta)); |
| 87 | s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /* sin(theta) */ |
| 88 | |
| 89 | if (len>=8*stride) |
| 90 | { |
| 91 | stride2 = 1; |
| 92 | /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding. |
| 93 | It's basically incrementing long as (stride2+0.5)^2 < len/stride. */ |
| 94 | while ((stride2*stride2+stride2)*stride + (stride>>2) < len) |
| 95 | stride2++; |
| 96 | } |
| 97 | /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for |
| 98 | extract_collapse_mask().*/ |
flim | c91ee5b | 2016-01-26 14:33:44 +0100 | [diff] [blame] | 99 | len = celt_udiv(len, stride); |
Vignesh Venkatasubramanian | 2bd8b54 | 2014-02-20 10:50:35 -0800 | [diff] [blame] | 100 | for (i=0;i<stride;i++) |
| 101 | { |
| 102 | if (dir < 0) |
| 103 | { |
| 104 | if (stride2) |
| 105 | exp_rotation1(X+i*len, len, stride2, s, c); |
| 106 | exp_rotation1(X+i*len, len, 1, c, s); |
| 107 | } else { |
| 108 | exp_rotation1(X+i*len, len, 1, c, -s); |
| 109 | if (stride2) |
| 110 | exp_rotation1(X+i*len, len, stride2, s, -c); |
| 111 | } |
| 112 | } |
| 113 | } |
| 114 | |
| 115 | /** Takes the pitch vector and the decoded residual vector, computes the gain |
| 116 | that will give ||p+g*y||=1 and mixes the residual with the pitch. */ |
| 117 | static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X, |
| 118 | int N, opus_val32 Ryy, opus_val16 gain) |
| 119 | { |
| 120 | int i; |
| 121 | #ifdef FIXED_POINT |
| 122 | int k; |
| 123 | #endif |
| 124 | opus_val32 t; |
| 125 | opus_val16 g; |
| 126 | |
| 127 | #ifdef FIXED_POINT |
| 128 | k = celt_ilog2(Ryy)>>1; |
| 129 | #endif |
| 130 | t = VSHR32(Ryy, 2*(k-7)); |
| 131 | g = MULT16_16_P15(celt_rsqrt_norm(t),gain); |
| 132 | |
| 133 | i=0; |
| 134 | do |
| 135 | X[i] = EXTRACT16(PSHR32(MULT16_16(g, iy[i]), k+1)); |
| 136 | while (++i < N); |
| 137 | } |
| 138 | |
| 139 | static unsigned extract_collapse_mask(int *iy, int N, int B) |
| 140 | { |
| 141 | unsigned collapse_mask; |
| 142 | int N0; |
| 143 | int i; |
| 144 | if (B<=1) |
| 145 | return 1; |
| 146 | /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for |
| 147 | exp_rotation().*/ |
flim | c91ee5b | 2016-01-26 14:33:44 +0100 | [diff] [blame] | 148 | N0 = celt_udiv(N, B); |
Vignesh Venkatasubramanian | 2bd8b54 | 2014-02-20 10:50:35 -0800 | [diff] [blame] | 149 | collapse_mask = 0; |
| 150 | i=0; do { |
| 151 | int j; |
flim | c91ee5b | 2016-01-26 14:33:44 +0100 | [diff] [blame] | 152 | unsigned tmp=0; |
Vignesh Venkatasubramanian | 2bd8b54 | 2014-02-20 10:50:35 -0800 | [diff] [blame] | 153 | j=0; do { |
flim | c91ee5b | 2016-01-26 14:33:44 +0100 | [diff] [blame] | 154 | tmp |= iy[i*N0+j]; |
Vignesh Venkatasubramanian | 2bd8b54 | 2014-02-20 10:50:35 -0800 | [diff] [blame] | 155 | } while (++j<N0); |
flim | c91ee5b | 2016-01-26 14:33:44 +0100 | [diff] [blame] | 156 | collapse_mask |= (tmp!=0)<<i; |
Vignesh Venkatasubramanian | 2bd8b54 | 2014-02-20 10:50:35 -0800 | [diff] [blame] | 157 | } while (++i<B); |
| 158 | return collapse_mask; |
| 159 | } |
| 160 | |
| 161 | unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc |
| 162 | #ifdef RESYNTH |
| 163 | , opus_val16 gain |
| 164 | #endif |
| 165 | ) |
| 166 | { |
| 167 | VARDECL(celt_norm, y); |
| 168 | VARDECL(int, iy); |
| 169 | VARDECL(opus_val16, signx); |
| 170 | int i, j; |
| 171 | opus_val16 s; |
| 172 | int pulsesLeft; |
| 173 | opus_val32 sum; |
| 174 | opus_val32 xy; |
| 175 | opus_val16 yy; |
| 176 | unsigned collapse_mask; |
| 177 | SAVE_STACK; |
| 178 | |
| 179 | celt_assert2(K>0, "alg_quant() needs at least one pulse"); |
| 180 | celt_assert2(N>1, "alg_quant() needs at least two dimensions"); |
| 181 | |
| 182 | ALLOC(y, N, celt_norm); |
| 183 | ALLOC(iy, N, int); |
| 184 | ALLOC(signx, N, opus_val16); |
| 185 | |
| 186 | exp_rotation(X, N, 1, B, K, spread); |
| 187 | |
| 188 | /* Get rid of the sign */ |
| 189 | sum = 0; |
| 190 | j=0; do { |
| 191 | if (X[j]>0) |
| 192 | signx[j]=1; |
| 193 | else { |
| 194 | signx[j]=-1; |
| 195 | X[j]=-X[j]; |
| 196 | } |
| 197 | iy[j] = 0; |
| 198 | y[j] = 0; |
| 199 | } while (++j<N); |
| 200 | |
| 201 | xy = yy = 0; |
| 202 | |
| 203 | pulsesLeft = K; |
| 204 | |
| 205 | /* Do a pre-search by projecting on the pyramid */ |
| 206 | if (K > (N>>1)) |
| 207 | { |
| 208 | opus_val16 rcp; |
| 209 | j=0; do { |
| 210 | sum += X[j]; |
| 211 | } while (++j<N); |
| 212 | |
| 213 | /* If X is too small, just replace it with a pulse at 0 */ |
| 214 | #ifdef FIXED_POINT |
| 215 | if (sum <= K) |
| 216 | #else |
| 217 | /* Prevents infinities and NaNs from causing too many pulses |
| 218 | to be allocated. 64 is an approximation of infinity here. */ |
| 219 | if (!(sum > EPSILON && sum < 64)) |
| 220 | #endif |
| 221 | { |
| 222 | X[0] = QCONST16(1.f,14); |
| 223 | j=1; do |
| 224 | X[j]=0; |
| 225 | while (++j<N); |
| 226 | sum = QCONST16(1.f,14); |
| 227 | } |
| 228 | rcp = EXTRACT16(MULT16_32_Q16(K-1, celt_rcp(sum))); |
| 229 | j=0; do { |
| 230 | #ifdef FIXED_POINT |
| 231 | /* It's really important to round *towards zero* here */ |
| 232 | iy[j] = MULT16_16_Q15(X[j],rcp); |
| 233 | #else |
| 234 | iy[j] = (int)floor(rcp*X[j]); |
| 235 | #endif |
| 236 | y[j] = (celt_norm)iy[j]; |
| 237 | yy = MAC16_16(yy, y[j],y[j]); |
| 238 | xy = MAC16_16(xy, X[j],y[j]); |
| 239 | y[j] *= 2; |
| 240 | pulsesLeft -= iy[j]; |
| 241 | } while (++j<N); |
| 242 | } |
| 243 | celt_assert2(pulsesLeft>=1, "Allocated too many pulses in the quick pass"); |
| 244 | |
| 245 | /* This should never happen, but just in case it does (e.g. on silence) |
| 246 | we fill the first bin with pulses. */ |
| 247 | #ifdef FIXED_POINT_DEBUG |
| 248 | celt_assert2(pulsesLeft<=N+3, "Not enough pulses in the quick pass"); |
| 249 | #endif |
| 250 | if (pulsesLeft > N+3) |
| 251 | { |
| 252 | opus_val16 tmp = (opus_val16)pulsesLeft; |
| 253 | yy = MAC16_16(yy, tmp, tmp); |
| 254 | yy = MAC16_16(yy, tmp, y[0]); |
| 255 | iy[0] += pulsesLeft; |
| 256 | pulsesLeft=0; |
| 257 | } |
| 258 | |
| 259 | s = 1; |
| 260 | for (i=0;i<pulsesLeft;i++) |
| 261 | { |
| 262 | int best_id; |
| 263 | opus_val32 best_num = -VERY_LARGE16; |
| 264 | opus_val16 best_den = 0; |
| 265 | #ifdef FIXED_POINT |
| 266 | int rshift; |
| 267 | #endif |
| 268 | #ifdef FIXED_POINT |
| 269 | rshift = 1+celt_ilog2(K-pulsesLeft+i+1); |
| 270 | #endif |
| 271 | best_id = 0; |
| 272 | /* The squared magnitude term gets added anyway, so we might as well |
| 273 | add it outside the loop */ |
Felicia Lim | d03c373 | 2016-07-25 20:28:37 +0200 | [diff] [blame] | 274 | yy = ADD16(yy, 1); |
Vignesh Venkatasubramanian | 2bd8b54 | 2014-02-20 10:50:35 -0800 | [diff] [blame] | 275 | j=0; |
| 276 | do { |
| 277 | opus_val16 Rxy, Ryy; |
| 278 | /* Temporary sums of the new pulse(s) */ |
| 279 | Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift)); |
| 280 | /* We're multiplying y[j] by two so we don't have to do it here */ |
| 281 | Ryy = ADD16(yy, y[j]); |
| 282 | |
| 283 | /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that |
| 284 | Rxy is positive because the sign is pre-computed) */ |
| 285 | Rxy = MULT16_16_Q15(Rxy,Rxy); |
| 286 | /* The idea is to check for num/den >= best_num/best_den, but that way |
| 287 | we can do it without any division */ |
| 288 | /* OPT: Make sure to use conditional moves here */ |
| 289 | if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num)) |
| 290 | { |
| 291 | best_den = Ryy; |
| 292 | best_num = Rxy; |
| 293 | best_id = j; |
| 294 | } |
| 295 | } while (++j<N); |
| 296 | |
| 297 | /* Updating the sums of the new pulse(s) */ |
| 298 | xy = ADD32(xy, EXTEND32(X[best_id])); |
| 299 | /* We're multiplying y[j] by two so we don't have to do it here */ |
| 300 | yy = ADD16(yy, y[best_id]); |
| 301 | |
| 302 | /* Only now that we've made the final choice, update y/iy */ |
| 303 | /* Multiplying y[j] by 2 so we don't have to do it everywhere else */ |
| 304 | y[best_id] += 2*s; |
| 305 | iy[best_id]++; |
| 306 | } |
| 307 | |
| 308 | /* Put the original sign back */ |
| 309 | j=0; |
| 310 | do { |
| 311 | X[j] = MULT16_16(signx[j],X[j]); |
| 312 | if (signx[j] < 0) |
| 313 | iy[j] = -iy[j]; |
| 314 | } while (++j<N); |
| 315 | encode_pulses(iy, N, K, enc); |
| 316 | |
| 317 | #ifdef RESYNTH |
| 318 | normalise_residual(iy, X, N, yy, gain); |
| 319 | exp_rotation(X, N, -1, B, K, spread); |
| 320 | #endif |
| 321 | |
| 322 | collapse_mask = extract_collapse_mask(iy, N, B); |
| 323 | RESTORE_STACK; |
| 324 | return collapse_mask; |
| 325 | } |
| 326 | |
| 327 | /** Decode pulse vector and combine the result with the pitch vector to produce |
| 328 | the final normalised signal in the current band. */ |
| 329 | unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B, |
| 330 | ec_dec *dec, opus_val16 gain) |
| 331 | { |
Vignesh Venkatasubramanian | 2bd8b54 | 2014-02-20 10:50:35 -0800 | [diff] [blame] | 332 | opus_val32 Ryy; |
| 333 | unsigned collapse_mask; |
| 334 | VARDECL(int, iy); |
| 335 | SAVE_STACK; |
| 336 | |
| 337 | celt_assert2(K>0, "alg_unquant() needs at least one pulse"); |
| 338 | celt_assert2(N>1, "alg_unquant() needs at least two dimensions"); |
| 339 | ALLOC(iy, N, int); |
flim | c91ee5b | 2016-01-26 14:33:44 +0100 | [diff] [blame] | 340 | Ryy = decode_pulses(iy, N, K, dec); |
Vignesh Venkatasubramanian | 2bd8b54 | 2014-02-20 10:50:35 -0800 | [diff] [blame] | 341 | normalise_residual(iy, X, N, Ryy, gain); |
| 342 | exp_rotation(X, N, -1, B, K, spread); |
| 343 | collapse_mask = extract_collapse_mask(iy, N, B); |
| 344 | RESTORE_STACK; |
| 345 | return collapse_mask; |
| 346 | } |
| 347 | |
flim | c91ee5b | 2016-01-26 14:33:44 +0100 | [diff] [blame] | 348 | #ifndef OVERRIDE_renormalise_vector |
| 349 | void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch) |
Vignesh Venkatasubramanian | 2bd8b54 | 2014-02-20 10:50:35 -0800 | [diff] [blame] | 350 | { |
| 351 | int i; |
| 352 | #ifdef FIXED_POINT |
| 353 | int k; |
| 354 | #endif |
flim | c91ee5b | 2016-01-26 14:33:44 +0100 | [diff] [blame] | 355 | opus_val32 E; |
Vignesh Venkatasubramanian | 2bd8b54 | 2014-02-20 10:50:35 -0800 | [diff] [blame] | 356 | opus_val16 g; |
| 357 | opus_val32 t; |
flim | c91ee5b | 2016-01-26 14:33:44 +0100 | [diff] [blame] | 358 | celt_norm *xptr; |
| 359 | E = EPSILON + celt_inner_prod(X, X, N, arch); |
Vignesh Venkatasubramanian | 2bd8b54 | 2014-02-20 10:50:35 -0800 | [diff] [blame] | 360 | #ifdef FIXED_POINT |
| 361 | k = celt_ilog2(E)>>1; |
| 362 | #endif |
| 363 | t = VSHR32(E, 2*(k-7)); |
| 364 | g = MULT16_16_P15(celt_rsqrt_norm(t),gain); |
| 365 | |
| 366 | xptr = X; |
| 367 | for (i=0;i<N;i++) |
| 368 | { |
| 369 | *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1)); |
| 370 | xptr++; |
| 371 | } |
| 372 | /*return celt_sqrt(E);*/ |
| 373 | } |
flim | c91ee5b | 2016-01-26 14:33:44 +0100 | [diff] [blame] | 374 | #endif /* OVERRIDE_renormalise_vector */ |
Vignesh Venkatasubramanian | 2bd8b54 | 2014-02-20 10:50:35 -0800 | [diff] [blame] | 375 | |
flim | c91ee5b | 2016-01-26 14:33:44 +0100 | [diff] [blame] | 376 | int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int arch) |
Vignesh Venkatasubramanian | 2bd8b54 | 2014-02-20 10:50:35 -0800 | [diff] [blame] | 377 | { |
| 378 | int i; |
| 379 | int itheta; |
| 380 | opus_val16 mid, side; |
| 381 | opus_val32 Emid, Eside; |
| 382 | |
| 383 | Emid = Eside = EPSILON; |
| 384 | if (stereo) |
| 385 | { |
| 386 | for (i=0;i<N;i++) |
| 387 | { |
| 388 | celt_norm m, s; |
| 389 | m = ADD16(SHR16(X[i],1),SHR16(Y[i],1)); |
| 390 | s = SUB16(SHR16(X[i],1),SHR16(Y[i],1)); |
| 391 | Emid = MAC16_16(Emid, m, m); |
| 392 | Eside = MAC16_16(Eside, s, s); |
| 393 | } |
| 394 | } else { |
flim | c91ee5b | 2016-01-26 14:33:44 +0100 | [diff] [blame] | 395 | Emid += celt_inner_prod(X, X, N, arch); |
| 396 | Eside += celt_inner_prod(Y, Y, N, arch); |
Vignesh Venkatasubramanian | 2bd8b54 | 2014-02-20 10:50:35 -0800 | [diff] [blame] | 397 | } |
| 398 | mid = celt_sqrt(Emid); |
| 399 | side = celt_sqrt(Eside); |
| 400 | #ifdef FIXED_POINT |
| 401 | /* 0.63662 = 2/pi */ |
| 402 | itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid)); |
| 403 | #else |
| 404 | itheta = (int)floor(.5f+16384*0.63662f*atan2(side,mid)); |
| 405 | #endif |
| 406 | |
| 407 | return itheta; |
| 408 | } |