blob: 667796ebac20099c5483c10873628a7efd017d72 [file] [log] [blame]
Jean-Marc Valin35a1f882008-03-26 10:34:23 +11001/* (C) 2007-2008 Jean-Marc Valin, CSIRO
Jean-Marc Valin41af4212007-11-30 18:35:37 +11002*/
3/*
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 - Neither the name of the Xiph.org Foundation nor the names of its
16 contributors may be used to endorse or promote products derived from
17 this software without specific prior written permission.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
23 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30*/
31
Jean-Marc Valin02fa9132008-02-20 12:09:29 +110032#ifdef HAVE_CONFIG_H
33#include "config.h"
34#endif
35
Jean-Marc Valin3ca9b1d2008-02-27 23:50:31 +110036#include "mathops.h"
Jean-Marc Valin29ccab82007-12-06 15:39:38 +110037#include "cwrs.h"
Jean-Marc Valin9cace642007-12-06 17:44:09 +110038#include "vq.h"
Jean-Marc Valin9a0bba12008-02-20 14:08:50 +110039#include "arch.h"
Jean-Marc Valinb60340f2008-02-26 15:41:51 +110040#include "os_support.h"
Jean-Marc Valin41af4212007-11-30 18:35:37 +110041
Jean-Marc Valin35a1f882008-03-26 10:34:23 +110042/** Takes the pitch vector and the decoded residual vector, computes the gain
43 that will give ||p+g*y||=1 and mixes the residual with the pitch. */
Jean-Marc Valin5de868c2008-03-25 22:38:58 +110044static void mix_pitch_and_residual(int * restrict iy, celt_norm_t * restrict X, int N, int K, const celt_norm_t * restrict P)
Jean-Marc Valind4018c32008-02-27 10:09:48 +110045{
46 int i;
Jean-Marc Valinb50c5412008-02-27 17:05:43 +110047 celt_word32_t Ryp, Ryy, Rpp;
Jean-Marc Valina847b772008-02-27 17:46:49 +110048 celt_word32_t g;
Jean-Marc Valin31b79d12008-03-12 17:17:23 +110049 VARDECL(celt_norm_t, y);
Jean-Marc Valind9de5932008-03-05 08:11:57 +110050#ifdef FIXED_POINT
51 int yshift;
52#endif
Jean-Marc Valin8600f692008-02-29 15:14:12 +110053 SAVE_STACK;
Jean-Marc Valind17edd32008-02-27 16:52:30 +110054#ifdef FIXED_POINT
Jean-Marc Valin98c86c72008-03-27 08:40:45 +110055 yshift = 13-celt_ilog2(K);
Jean-Marc Valind17edd32008-02-27 16:52:30 +110056#endif
57 ALLOC(y, N, celt_norm_t);
Jean-Marc Valind4018c32008-02-27 10:09:48 +110058
59 /*for (i=0;i<N;i++)
60 printf ("%d ", iy[i]);*/
Jean-Marc Valinb50c5412008-02-27 17:05:43 +110061 Rpp = 0;
Jean-Marc Valin6ea8bae2008-04-15 08:01:33 +100062 i=0;
63 do {
Jean-Marc Valinb50c5412008-02-27 17:05:43 +110064 Rpp = MAC16_16(Rpp,P[i],P[i]);
Jean-Marc Valinbd718ba2008-03-25 14:15:41 +110065 y[i] = SHL16(iy[i],yshift);
Jean-Marc Valin6ea8bae2008-04-15 08:01:33 +100066 } while (++i < N);
67
Jean-Marc Valind4018c32008-02-27 10:09:48 +110068 Ryp = 0;
Jean-Marc Valinb50c5412008-02-27 17:05:43 +110069 Ryy = 0;
Jean-Marc Valindf7ab432008-03-26 18:03:22 +110070 /* If this doesn't generate a dual MAC (on supported archs), fire the compiler guy */
Jean-Marc Valin6ea8bae2008-04-15 08:01:33 +100071 i=0;
72 do {
Jean-Marc Valindf7ab432008-03-26 18:03:22 +110073 Ryp = MAC16_16(Ryp, y[i], P[i]);
74 Ryy = MAC16_16(Ryy, y[i], y[i]);
Jean-Marc Valin6ea8bae2008-04-15 08:01:33 +100075 } while (++i < N);
76
Jean-Marc Valin1ca07222008-02-27 17:23:04 +110077 /* g = (sqrt(Ryp^2 + Ryy - Rpp*Ryy)-Ryp)/Ryy */
Jean-Marc Valin9d5b4a62008-03-13 11:36:45 +110078 g = MULT16_32_Q15(
Jean-Marc Valinf5b05872008-03-21 10:46:17 +110079 celt_sqrt(MULT16_16(ROUND16(Ryp,14),ROUND16(Ryp,14)) + Ryy -
80 MULT16_16(ROUND16(Ryy,14),ROUND16(Rpp,14)))
81 - ROUND16(Ryp,14),
Jean-Marc Valin9d5b4a62008-03-13 11:36:45 +110082 celt_rcp(SHR32(Ryy,9)));
Jean-Marc Valind4018c32008-02-27 10:09:48 +110083
Jean-Marc Valin6ea8bae2008-04-15 08:01:33 +100084 i=0;
85 do
Jean-Marc Valinf5b05872008-03-21 10:46:17 +110086 X[i] = P[i] + ROUND16(MULT16_16(y[i], g),11);
Jean-Marc Valin6ea8bae2008-04-15 08:01:33 +100087 while (++i < N);
88
Jean-Marc Valin8600f692008-02-29 15:14:12 +110089 RESTORE_STACK;
Jean-Marc Valind4018c32008-02-27 10:09:48 +110090}
91
Jean-Marc Valin41af4212007-11-30 18:35:37 +110092
Jean-Marc Valinbd718ba2008-03-25 14:15:41 +110093void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t *P, ec_enc *enc)
Jean-Marc Valin41af4212007-11-30 18:35:37 +110094{
Jean-Marc Valin44c63352008-03-25 21:28:40 +110095 VARDECL(celt_norm_t, y);
96 VARDECL(int, iy);
Jean-Marc Valin49134382008-03-25 16:07:05 +110097 VARDECL(int, signx);
Jean-Marc Valin6ea8bae2008-04-15 08:01:33 +100098 int j, is;
Jean-Marc Valin44c63352008-03-25 21:28:40 +110099 celt_word16_t s;
Jean-Marc Valin846d4e22008-02-12 13:48:48 +1100100 int pulsesLeft;
Jean-Marc Valin44c63352008-03-25 21:28:40 +1100101 celt_word32_t sum;
Jean-Marc Valinbd718ba2008-03-25 14:15:41 +1100102 celt_word32_t xy, yy, yp;
Jean-Marc Valin44c63352008-03-25 21:28:40 +1100103 celt_word16_t Rpp;
Jean-Marc Valinf9584772008-03-27 12:22:44 +1100104 int N_1; /* Inverse of N, in Q14 format (even for float) */
Jean-Marc Valinf675adc2008-02-28 12:15:17 +1100105#ifdef FIXED_POINT
Jean-Marc Valind748cd52008-03-01 07:27:03 +1100106 int yshift;
107#endif
108 SAVE_STACK;
109
110#ifdef FIXED_POINT
Jean-Marc Valin98c86c72008-03-27 08:40:45 +1100111 yshift = 13-celt_ilog2(K);
Jean-Marc Valinf675adc2008-02-28 12:15:17 +1100112#endif
Jean-Marc Valin9d8d9b32008-02-27 16:17:39 +1100113
Jean-Marc Valin44c63352008-03-25 21:28:40 +1100114 ALLOC(y, N, celt_norm_t);
115 ALLOC(iy, N, int);
Jean-Marc Valin49134382008-03-25 16:07:05 +1100116 ALLOC(signx, N, int);
Jean-Marc Valin124d1cd2008-03-28 00:33:04 +1100117 N_1 = 512/N;
Jean-Marc Valin3d152a52008-04-15 07:46:48 +1000118
119 sum = 0;
Jean-Marc Valindff9b7e2008-04-21 11:43:51 +1000120 j=0; do {
Jean-Marc Valin49134382008-03-25 16:07:05 +1100121 if (X[j]>0)
122 signx[j]=1;
123 else
124 signx[j]=-1;
Jean-Marc Valin3d152a52008-04-15 07:46:48 +1000125 iy[j] = 0;
126 y[j] = 0;
Jean-Marc Valin44c63352008-03-25 21:28:40 +1100127 sum = MAC16_16(sum, P[j],P[j]);
Jean-Marc Valindff9b7e2008-04-21 11:43:51 +1000128 } while (++j<N);
Jean-Marc Valin44c63352008-03-25 21:28:40 +1100129 Rpp = ROUND16(sum, NORM_SHIFT);
Jean-Marc Valinbd718ba2008-03-25 14:15:41 +1100130
Jean-Marc Valin4ff068e2008-03-15 23:34:39 +1100131 celt_assert2(Rpp<=NORM_SCALING, "Rpp should never have a norm greater than unity");
Jean-Marc Valinb60340f2008-02-26 15:41:51 +1100132
Jean-Marc Valinbd718ba2008-03-25 14:15:41 +1100133 xy = yy = yp = 0;
Jean-Marc Valin0d587d82008-02-14 21:29:50 +1100134
Jean-Marc Valin846d4e22008-02-12 13:48:48 +1100135 pulsesLeft = K;
136 while (pulsesLeft > 0)
Jean-Marc Valin41af4212007-11-30 18:35:37 +1100137 {
Jean-Marc Valin846d4e22008-02-12 13:48:48 +1100138 int pulsesAtOnce=1;
Jean-Marc Valin35a1f882008-03-26 10:34:23 +1100139 int best_id;
Jean-Marc Valined317c92008-04-15 17:31:23 +1000140 celt_word16_t magnitude;
Jean-Marc Valin0bc5f7f2008-04-20 17:16:18 +1000141#ifdef FIXED_POINT
142 int rshift;
143#endif
Jean-Marc Valinbd718ba2008-03-25 14:15:41 +1100144 /* Decide on how many pulses to find at once */
Jean-Marc Valin124d1cd2008-03-28 00:33:04 +1100145 pulsesAtOnce = (pulsesLeft*N_1)>>9; /* pulsesLeft/N */
Jean-Marc Valincab576e2008-02-12 17:21:14 +1100146 if (pulsesAtOnce<1)
147 pulsesAtOnce = 1;
Jean-Marc Valin0bc5f7f2008-04-20 17:16:18 +1000148#ifdef FIXED_POINT
149 rshift = yshift+1+celt_ilog2(K-pulsesLeft+pulsesAtOnce);
150#endif
Jean-Marc Valined317c92008-04-15 17:31:23 +1000151 magnitude = SHL16(pulsesAtOnce, yshift);
Jean-Marc Valin846d4e22008-02-12 13:48:48 +1100152
Jean-Marc Valin35a1f882008-03-26 10:34:23 +1100153 best_id = 0;
Jean-Marc Valined317c92008-04-15 17:31:23 +1000154 /* The squared magnitude term gets added anyway, so we might as well
155 add it outside the loop */
156 yy = ADD32(yy, MULT16_16(magnitude,magnitude));
Jean-Marc Valin44c63352008-03-25 21:28:40 +1100157 /* Choose between fast and accurate strategy depending on where we are in the search */
158 if (pulsesLeft>1)
Jean-Marc Valin41af4212007-11-30 18:35:37 +1100159 {
Jean-Marc Valined317c92008-04-15 17:31:23 +1000160 /* This should ensure that anything we can process will have a better score */
161 celt_word32_t best_num = -VERY_LARGE16;
162 celt_word16_t best_den = 0;
Jean-Marc Valinfed97d52008-03-26 21:31:56 +1100163 j=0;
164 do {
Jean-Marc Valined317c92008-04-15 17:31:23 +1000165 celt_word16_t Rxy, Ryy;
Jean-Marc Valin44c63352008-03-25 21:28:40 +1100166 /* Select sign based on X[j] alone */
Jean-Marc Valined317c92008-04-15 17:31:23 +1000167 s = signx[j]*magnitude;
Jean-Marc Valin44c63352008-03-25 21:28:40 +1100168 /* Temporary sums of the new pulse(s) */
Jean-Marc Valin0bc5f7f2008-04-20 17:16:18 +1000169 Rxy = EXTRACT16(SHR32(xy + MULT16_16(s,X[j]),rshift));
Jean-Marc Valined317c92008-04-15 17:31:23 +1000170 /* We're multiplying y[j] by two so we don't have to do it here */
Jean-Marc Valin0bc5f7f2008-04-20 17:16:18 +1000171 Ryy = EXTRACT16(SHR32(yy + MULT16_16(s,y[j]),rshift));
Jean-Marc Valin35a1f882008-03-26 10:34:23 +1100172
Jean-Marc Valined317c92008-04-15 17:31:23 +1000173 /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
174 Rxy is positive because the sign is pre-computed) */
175 Rxy = MULT16_16_Q15(Rxy,Rxy);
Jean-Marc Valin35a1f882008-03-26 10:34:23 +1100176 /* The idea is to check for num/den >= best_num/best_den, but that way
177 we can do it without any division */
Jean-Marc Valined317c92008-04-15 17:31:23 +1000178 /* OPT: Make sure to use conditional moves here */
179 if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num))
Jean-Marc Valin35a1f882008-03-26 10:34:23 +1100180 {
Jean-Marc Valined317c92008-04-15 17:31:23 +1000181 best_den = Ryy;
182 best_num = Rxy;
Jean-Marc Valin35a1f882008-03-26 10:34:23 +1100183 best_id = j;
184 }
Jean-Marc Valin96069fd2008-04-15 21:14:18 +1000185 } while (++j<N);
Jean-Marc Valin44c63352008-03-25 21:28:40 +1100186 } else {
Jean-Marc Valin684fb282008-04-15 18:06:59 +1000187 celt_word16_t g;
Jean-Marc Valin96069fd2008-04-15 21:14:18 +1000188 celt_word16_t best_num = -VERY_LARGE16;
189 celt_word16_t best_den = 0;
190 j=0;
191 do {
Jean-Marc Valind5683032008-04-15 18:04:33 +1000192 celt_word16_t Rxy, Ryy, Ryp;
Jean-Marc Valin96069fd2008-04-15 21:14:18 +1000193 celt_word16_t num;
Jean-Marc Valin44c63352008-03-25 21:28:40 +1100194 /* Select sign based on X[j] alone */
Jean-Marc Valined317c92008-04-15 17:31:23 +1000195 s = signx[j]*magnitude;
Jean-Marc Valin44c63352008-03-25 21:28:40 +1100196 /* Temporary sums of the new pulse(s) */
Jean-Marc Valin0bc5f7f2008-04-20 17:16:18 +1000197 Rxy = ROUND16(xy + MULT16_16(s,X[j]), 14);
Jean-Marc Valined317c92008-04-15 17:31:23 +1000198 /* We're multiplying y[j] by two so we don't have to do it here */
Jean-Marc Valind5683032008-04-15 18:04:33 +1000199 Ryy = ROUND16(yy + MULT16_16(s,y[j]), 14);
200 Ryp = ROUND16(yp + MULT16_16(s,P[j]), 14);
Jean-Marc Valinbd718ba2008-03-25 14:15:41 +1100201
Jean-Marc Valin96069fd2008-04-15 21:14:18 +1000202 /* Compute the gain such that ||p + g*y|| = 1
203 ...but instead, we compute g*Ryy to avoid dividing */
204 g = celt_psqrt(MULT16_16(Ryp,Ryp) + MULT16_16(Ryy,QCONST16(1.f,14)-Rpp)) - Ryp;
Jean-Marc Valinbd718ba2008-03-25 14:15:41 +1100205 /* Knowing that gain, what's the error: (x-g*y)^2
206 (result is negated and we discard x^2 because it's constant) */
Jean-Marc Valind5683032008-04-15 18:04:33 +1000207 /* score = 2*g*Rxy - g*g*Ryy;*/
208#ifdef FIXED_POINT
Jean-Marc Valin96069fd2008-04-15 21:14:18 +1000209 /* No need to multiply Rxy by 2 because we did it earlier */
Jean-Marc Valin0bc5f7f2008-04-20 17:16:18 +1000210 num = MULT16_16_Q15(ADD16(SUB16(Rxy,g),Rxy),g);
Jean-Marc Valind5683032008-04-15 18:04:33 +1000211#else
Jean-Marc Valin96069fd2008-04-15 21:14:18 +1000212 num = g*(2*Rxy-g);
Jean-Marc Valind5683032008-04-15 18:04:33 +1000213#endif
Jean-Marc Valin96069fd2008-04-15 21:14:18 +1000214 if (MULT16_16(best_den, num) > MULT16_16(Ryy, best_num))
Jean-Marc Valin35a1f882008-03-26 10:34:23 +1100215 {
Jean-Marc Valin96069fd2008-04-15 21:14:18 +1000216 best_den = Ryy;
Jean-Marc Valin35a1f882008-03-26 10:34:23 +1100217 best_num = num;
218 best_id = j;
Jean-Marc Valin96069fd2008-04-15 21:14:18 +1000219 }
220 } while (++j<N);
Jean-Marc Valin41af4212007-11-30 18:35:37 +1100221 }
Jean-Marc Valin44c63352008-03-25 21:28:40 +1100222
Jean-Marc Valin35a1f882008-03-26 10:34:23 +1100223 j = best_id;
Jean-Marc Valin44c63352008-03-25 21:28:40 +1100224 is = signx[j]*pulsesAtOnce;
225 s = SHL16(is, yshift);
Jean-Marc Valinbd718ba2008-03-25 14:15:41 +1100226
Jean-Marc Valin44c63352008-03-25 21:28:40 +1100227 /* Updating the sums of the new pulse(s) */
228 xy = xy + MULT16_16(s,X[j]);
Jean-Marc Valined317c92008-04-15 17:31:23 +1000229 /* We're multiplying y[j] by two so we don't have to do it here */
230 yy = yy + MULT16_16(s,y[j]);
Jean-Marc Valin44c63352008-03-25 21:28:40 +1100231 yp = yp + MULT16_16(s, P[j]);
Jean-Marc Valinbd718ba2008-03-25 14:15:41 +1100232
Jean-Marc Valin44c63352008-03-25 21:28:40 +1100233 /* Only now that we've made the final choice, update y/iy */
Jean-Marc Valined317c92008-04-15 17:31:23 +1000234 /* Multiplying y[j] by 2 so we don't have to do it everywhere else */
235 y[j] += 2*s;
Jean-Marc Valin44c63352008-03-25 21:28:40 +1100236 iy[j] += is;
Jean-Marc Valin846d4e22008-02-12 13:48:48 +1100237 pulsesLeft -= pulsesAtOnce;
Jean-Marc Valin41af4212007-11-30 18:35:37 +1100238 }
239
Jean-Marc Valinbd718ba2008-03-25 14:15:41 +1100240 encode_pulses(iy, N, K, enc);
Jean-Marc Valin5fa59952008-02-14 13:50:44 +1100241
Jean-Marc Valina4833ff2008-01-10 15:34:00 +1100242 /* Recompute the gain in one pass to reduce the encoder-decoder mismatch
Jean-Marc Valinbd718ba2008-03-25 14:15:41 +1100243 due to the recursive computation used in quantisation. */
244 mix_pitch_and_residual(iy, X, N, K, P);
Jean-Marc Valin8600f692008-02-29 15:14:12 +1100245 RESTORE_STACK;
Jean-Marc Valin41af4212007-11-30 18:35:37 +1100246}
247
Jean-Marc Valinbd718ba2008-03-25 14:15:41 +1100248
Jean-Marc Valin879fbfd2008-02-20 17:17:13 +1100249/** Decode pulse vector and combine the result with the pitch vector to produce
250 the final normalised signal in the current band. */
Jean-Marc Valinbd718ba2008-03-25 14:15:41 +1100251void alg_unquant(celt_norm_t *X, int N, int K, celt_norm_t *P, ec_dec *dec)
Jean-Marc Valin0d227d82007-12-31 16:12:12 +1100252{
Jean-Marc Valin31b79d12008-03-12 17:17:23 +1100253 VARDECL(int, iy);
Jean-Marc Valin8600f692008-02-29 15:14:12 +1100254 SAVE_STACK;
Jean-Marc Valin9a0bba12008-02-20 14:08:50 +1100255 ALLOC(iy, N, int);
Jean-Marc Valin5fa59952008-02-14 13:50:44 +1100256 decode_pulses(iy, N, K, dec);
Jean-Marc Valinbd718ba2008-03-25 14:15:41 +1100257 mix_pitch_and_residual(iy, X, N, K, P);
Jean-Marc Valin8600f692008-02-29 15:14:12 +1100258 RESTORE_STACK;
Jean-Marc Valin0d227d82007-12-31 16:12:12 +1100259}
260
Jean-Marc Valin9455d1b2008-03-07 17:17:37 +1100261#ifdef FIXED_POINT
262static const celt_word16_t pg[11] = {32767, 24576, 21299, 19661, 19661, 19661, 18022, 18022, 16384, 16384, 16384};
263#else
Jean-Marc Valin3e650972008-03-07 17:38:58 +1100264static const celt_word16_t pg[11] = {1.f, .75f, .65f, 0.6f, 0.6f, .6f, .55f, .55f, .5f, .5f, .5f};
Jean-Marc Valin9455d1b2008-03-07 17:17:37 +1100265#endif
Jean-Marc Valin0d227d82007-12-31 16:12:12 +1100266
Jean-Marc Valin208ae6e2008-03-25 15:25:08 +1100267#define MAX_INTRA 32
268#define LOG_MAX_INTRA 5
Jean-Marc Valin945d0df2008-04-21 13:41:09 +1000269
270void intra_prediction(celt_norm_t * restrict x, celt_mask_t *W, int N, int K, celt_norm_t *Y, celt_norm_t * restrict P, int C, int N0, ec_enc *enc)
Jean-Marc Valin4841a0a2007-12-03 13:54:30 +1100271{
Jean-Marc Valin381b29c2008-04-10 11:00:51 +1000272 int i,j,c;
Jean-Marc Valin4841a0a2007-12-03 13:54:30 +1100273 int best=0;
Jean-Marc Valin0a864642008-04-16 10:29:01 +1000274 celt_word16_t best_num=-VERY_LARGE16;
Jean-Marc Valin89c5fd12008-03-26 12:16:00 +1100275 celt_word16_t best_den=0;
Jean-Marc Valin877b1972008-02-29 16:40:39 +1100276 celt_word16_t s = 1;
Jean-Marc Valin0aa39032007-12-07 15:09:58 +1100277 int sign;
Jean-Marc Valin877b1972008-02-29 16:40:39 +1100278 celt_word32_t E;
Jean-Marc Valin9455d1b2008-03-07 17:17:37 +1100279 celt_word16_t pred_gain;
Jean-Marc Valine28f25f2008-03-27 14:18:28 +1100280 int max_pos = N0-N;
Jean-Marc Valina66c3202008-04-16 10:43:32 +1000281 celt_word32_t yy=0;
Jean-Marc Valin3956de92008-04-12 06:55:39 +1000282 VARDECL(celt_norm_t, Xr);
Jean-Marc Valin381b29c2008-04-10 11:00:51 +1000283 SAVE_STACK;
284
Jean-Marc Valin945d0df2008-04-21 13:41:09 +1000285 ALLOC(Xr, C*N, celt_norm_t);
Jean-Marc Valin381b29c2008-04-10 11:00:51 +1000286
Jean-Marc Valin208ae6e2008-03-25 15:25:08 +1100287 if (max_pos > MAX_INTRA)
288 max_pos = MAX_INTRA;
Jean-Marc Valin8f0f4b92008-02-11 13:52:44 +1100289
Jean-Marc Valinc8e3b672008-04-10 12:21:26 +1000290 /* Reverse the samples of x without reversing the channels */
Jean-Marc Valin945d0df2008-04-21 13:41:09 +1000291 for (c=0;c<C;c++)
Jean-Marc Valin8c4877b2008-04-21 12:15:16 +1000292 {
Jean-Marc Valin945d0df2008-04-21 13:41:09 +1000293 celt_norm_t * restrict Xrp = &Xr[C*N-C+c];
Jean-Marc Valin8c4877b2008-04-21 12:15:16 +1000294 const celt_norm_t * restrict xp = &x[c];
295 j=0; do {
296 *Xrp = *xp;
Jean-Marc Valin945d0df2008-04-21 13:41:09 +1000297 Xrp -= C;
298 xp += C;
Jean-Marc Valin8c4877b2008-04-21 12:15:16 +1000299 } while (++j<N); /* Promises we loop at least once */
300 }
Jean-Marc Valina66c3202008-04-16 10:43:32 +1000301 /* Compute yy for i=0 */
302 j=0;
303 do {
304 yy = MAC16_16(yy, Y[j], Y[j]);
Jean-Marc Valin945d0df2008-04-21 13:41:09 +1000305 } while (++j<C*N); /* Promises we loop at least once */
Jean-Marc Valina66c3202008-04-16 10:43:32 +1000306
Jean-Marc Valinc8e3b672008-04-10 12:21:26 +1000307 for (i=0;i<max_pos;i++)
Jean-Marc Valin4841a0a2007-12-03 13:54:30 +1100308 {
Jean-Marc Valina66c3202008-04-16 10:43:32 +1000309 celt_word32_t xy=0;
310 celt_word16_t num, den;
Jean-Marc Valinc8e3b672008-04-10 12:21:26 +1000311 const celt_word16_t * restrict xp = Xr;
Jean-Marc Valin945d0df2008-04-21 13:41:09 +1000312 const celt_word16_t * restrict yp = Y+C*i;
Jean-Marc Valinfed97d52008-03-26 21:31:56 +1100313 j=0;
314 do {
Jean-Marc Valina66c3202008-04-16 10:43:32 +1000315 xy = MAC16_16(xy, *xp++, *yp++);
Jean-Marc Valin945d0df2008-04-21 13:41:09 +1000316 } while (++j<C*N); /* Promises we loop at least once */
Jean-Marc Valin89c5fd12008-03-26 12:16:00 +1100317 /* Using xy^2/yy as the score but without having to do the division */
Jean-Marc Valin0a864642008-04-16 10:29:01 +1000318 num = MULT16_16_Q15(ROUND16(xy,14),ROUND16(xy,14));
Jean-Marc Valin89c5fd12008-03-26 12:16:00 +1100319 den = ROUND16(yy,14);
Jean-Marc Valin208ae6e2008-03-25 15:25:08 +1100320 /* If you're really desperate for speed, just use xy as the score */
Jean-Marc Valinfed97d52008-03-26 21:31:56 +1100321 /* OPT: Make sure to use a conditional move here */
Jean-Marc Valin0a864642008-04-16 10:29:01 +1000322 if (MULT16_16(best_den, num) > MULT16_16(den, best_num))
Jean-Marc Valin4841a0a2007-12-03 13:54:30 +1100323 {
Jean-Marc Valin89c5fd12008-03-26 12:16:00 +1100324 best_num = num;
325 best_den = den;
Jean-Marc Valin4841a0a2007-12-03 13:54:30 +1100326 best = i;
Jean-Marc Valin208ae6e2008-03-25 15:25:08 +1100327 /* Store xy as the sign. We'll normalise it to +/- 1 later. */
328 s = ROUND16(xy,14);
Jean-Marc Valin4841a0a2007-12-03 13:54:30 +1100329 }
Jean-Marc Valina66c3202008-04-16 10:43:32 +1000330 /* Update yy for the next iteration */
Jean-Marc Valin945d0df2008-04-21 13:41:09 +1000331 yp = Y+C*i;
Jean-Marc Valina66c3202008-04-16 10:43:32 +1000332 j=0;
333 do {
Jean-Marc Valin945d0df2008-04-21 13:41:09 +1000334 yy = yy - MULT16_16(*yp, *yp) + MULT16_16(yp[C*N], yp[C*N]);
Jean-Marc Valina66c3202008-04-16 10:43:32 +1000335 yp++;
Jean-Marc Valin945d0df2008-04-21 13:41:09 +1000336 } while (++j<C);
Jean-Marc Valin4841a0a2007-12-03 13:54:30 +1100337 }
Jean-Marc Valin0aa39032007-12-07 15:09:58 +1100338 if (s<0)
Jean-Marc Valin208ae6e2008-03-25 15:25:08 +1100339 {
340 s = -1;
Jean-Marc Valin0aa39032007-12-07 15:09:58 +1100341 sign = 1;
Jean-Marc Valin208ae6e2008-03-25 15:25:08 +1100342 } else {
343 s = 1;
Jean-Marc Valin0aa39032007-12-07 15:09:58 +1100344 sign = 0;
Jean-Marc Valin208ae6e2008-03-25 15:25:08 +1100345 }
Jean-Marc Valina85657b2008-02-20 11:59:30 +1100346 /*printf ("%d %d ", sign, best);*/
Jean-Marc Valin208ae6e2008-03-25 15:25:08 +1100347 ec_enc_bits(enc,sign,1);
348 if (max_pos == MAX_INTRA)
Jean-Marc Valinc8e3b672008-04-10 12:21:26 +1000349 ec_enc_bits(enc,best,LOG_MAX_INTRA);
Jean-Marc Valin208ae6e2008-03-25 15:25:08 +1100350 else
Jean-Marc Valinc8e3b672008-04-10 12:21:26 +1000351 ec_enc_uint(enc,best,max_pos);
Jean-Marc Valin208ae6e2008-03-25 15:25:08 +1100352
Jean-Marc Valina85657b2008-02-20 11:59:30 +1100353 /*printf ("%d %f\n", best, best_score);*/
Jean-Marc Valin0d227d82007-12-31 16:12:12 +1100354
Jean-Marc Valin0d227d82007-12-31 16:12:12 +1100355 if (K>10)
356 pred_gain = pg[10];
357 else
358 pred_gain = pg[K];
Jean-Marc Valin03892c12008-03-07 17:25:47 +1100359 E = EPSILON;
Jean-Marc Valin945d0df2008-04-21 13:41:09 +1000360 for (c=0;c<C;c++)
Jean-Marc Valin4841a0a2007-12-03 13:54:30 +1100361 {
Jean-Marc Valin381b29c2008-04-10 11:00:51 +1000362 for (j=0;j<N;j++)
363 {
Jean-Marc Valin945d0df2008-04-21 13:41:09 +1000364 P[C*j+c] = s*Y[C*best+C*(N-j-1)+c];
365 E = MAC16_16(E, P[C*j+c],P[C*j+c]);
Jean-Marc Valin381b29c2008-04-10 11:00:51 +1000366 }
Jean-Marc Valin4841a0a2007-12-03 13:54:30 +1100367 }
Jean-Marc Valin9455d1b2008-03-07 17:17:37 +1100368 /*pred_gain = pred_gain/sqrt(E);*/
Jean-Marc Valin23e82b22008-03-24 08:15:40 +1100369 pred_gain = MULT16_16_Q15(pred_gain,celt_rcp(SHL32(celt_sqrt(E),9)));
Jean-Marc Valin945d0df2008-04-21 13:41:09 +1000370 for (j=0;j<C*N;j++)
Jean-Marc Valin9455d1b2008-03-07 17:17:37 +1100371 P[j] = PSHR32(MULT16_16(pred_gain, P[j]),8);
Jean-Marc Valin0d227d82007-12-31 16:12:12 +1100372 if (K>0)
373 {
Jean-Marc Valin945d0df2008-04-21 13:41:09 +1000374 for (j=0;j<C*N;j++)
Jean-Marc Valin0d227d82007-12-31 16:12:12 +1100375 x[j] -= P[j];
376 } else {
Jean-Marc Valin945d0df2008-04-21 13:41:09 +1000377 for (j=0;j<C*N;j++)
Jean-Marc Valin0d227d82007-12-31 16:12:12 +1100378 x[j] = P[j];
379 }
Jean-Marc Valina85657b2008-02-20 11:59:30 +1100380 /*printf ("quant ");*/
381 /*for (j=0;j<N;j++) printf ("%f ", P[j]);*/
Jean-Marc Valin6d3289c2008-04-10 14:43:59 +1000382 RESTORE_STACK;
Jean-Marc Valin4841a0a2007-12-03 13:54:30 +1100383}
Jean-Marc Valinfc08d0a2007-12-07 13:26:15 +1100384
Jean-Marc Valin945d0df2008-04-21 13:41:09 +1000385void intra_unquant(celt_norm_t *x, int N, int K, celt_norm_t *Y, celt_norm_t * restrict P, int C, int N0, ec_dec *dec)
Jean-Marc Valin6e9058a2007-12-07 14:59:06 +1100386{
Jean-Marc Valin381b29c2008-04-10 11:00:51 +1000387 int j, c;
Jean-Marc Valin0aa39032007-12-07 15:09:58 +1100388 int sign;
Jean-Marc Valin877b1972008-02-29 16:40:39 +1100389 celt_word16_t s;
Jean-Marc Valin6e9058a2007-12-07 14:59:06 +1100390 int best;
Jean-Marc Valin877b1972008-02-29 16:40:39 +1100391 celt_word32_t E;
Jean-Marc Valin9455d1b2008-03-07 17:17:37 +1100392 celt_word16_t pred_gain;
Jean-Marc Valine28f25f2008-03-27 14:18:28 +1100393 int max_pos = N0-N;
Jean-Marc Valin208ae6e2008-03-25 15:25:08 +1100394 if (max_pos > MAX_INTRA)
395 max_pos = MAX_INTRA;
Jean-Marc Valin8f0f4b92008-02-11 13:52:44 +1100396
Jean-Marc Valin208ae6e2008-03-25 15:25:08 +1100397 sign = ec_dec_bits(dec, 1);
Jean-Marc Valin0aa39032007-12-07 15:09:58 +1100398 if (sign == 0)
Jean-Marc Valin6e9058a2007-12-07 14:59:06 +1100399 s = 1;
400 else
401 s = -1;
402
Jean-Marc Valin208ae6e2008-03-25 15:25:08 +1100403 if (max_pos == MAX_INTRA)
Jean-Marc Valin945d0df2008-04-21 13:41:09 +1000404 best = C*ec_dec_bits(dec, LOG_MAX_INTRA);
Jean-Marc Valin208ae6e2008-03-25 15:25:08 +1100405 else
Jean-Marc Valin945d0df2008-04-21 13:41:09 +1000406 best = C*ec_dec_uint(dec, max_pos);
Jean-Marc Valina85657b2008-02-20 11:59:30 +1100407 /*printf ("%d %d ", sign, best);*/
Jean-Marc Valin6e9058a2007-12-07 14:59:06 +1100408
Jean-Marc Valin0d227d82007-12-31 16:12:12 +1100409 if (K>10)
410 pred_gain = pg[10];
411 else
412 pred_gain = pg[K];
Jean-Marc Valin9455d1b2008-03-07 17:17:37 +1100413 E = EPSILON;
Jean-Marc Valin945d0df2008-04-21 13:41:09 +1000414 for (c=0;c<C;c++)
Jean-Marc Valin0d227d82007-12-31 16:12:12 +1100415 {
Jean-Marc Valin381b29c2008-04-10 11:00:51 +1000416 for (j=0;j<N;j++)
417 {
Jean-Marc Valin945d0df2008-04-21 13:41:09 +1000418 P[C*j+c] = s*Y[best+C*(N-j-1)+c];
419 E = MAC16_16(E, P[C*j+c],P[C*j+c]);
Jean-Marc Valin381b29c2008-04-10 11:00:51 +1000420 }
Jean-Marc Valin0d227d82007-12-31 16:12:12 +1100421 }
Jean-Marc Valin9455d1b2008-03-07 17:17:37 +1100422 /*pred_gain = pred_gain/sqrt(E);*/
Jean-Marc Valin23e82b22008-03-24 08:15:40 +1100423 pred_gain = MULT16_16_Q15(pred_gain,celt_rcp(SHL32(celt_sqrt(E),9)));
Jean-Marc Valin945d0df2008-04-21 13:41:09 +1000424 for (j=0;j<C*N;j++)
Jean-Marc Valin9455d1b2008-03-07 17:17:37 +1100425 P[j] = PSHR32(MULT16_16(pred_gain, P[j]),8);
Jean-Marc Valin6e9058a2007-12-07 14:59:06 +1100426 if (K==0)
427 {
Jean-Marc Valin945d0df2008-04-21 13:41:09 +1000428 for (j=0;j<C*N;j++)
Jean-Marc Valin0d227d82007-12-31 16:12:12 +1100429 x[j] = P[j];
Jean-Marc Valin6e9058a2007-12-07 14:59:06 +1100430 }
431}
Jean-Marc Valin0e20ca02008-02-11 15:33:53 +1100432
Jean-Marc Valin945d0df2008-04-21 13:41:09 +1000433void intra_fold(celt_norm_t *x, int N, celt_norm_t *Y, celt_norm_t * restrict P, int C, int N0, int Nmax)
Jean-Marc Valin0e20ca02008-02-11 15:33:53 +1100434{
Jean-Marc Valin0df0eb42008-02-13 16:00:10 +1100435 int i, j;
Jean-Marc Valin877b1972008-02-29 16:40:39 +1100436 celt_word32_t E;
Jean-Marc Valinec9b6df2008-03-07 17:05:47 +1100437 celt_word16_t g;
Jean-Marc Valin0e20ca02008-02-11 15:33:53 +1100438
Jean-Marc Valinec9b6df2008-03-07 17:05:47 +1100439 E = EPSILON;
Jean-Marc Valina536f772008-03-22 09:01:50 +1100440 if (N0 >= (Nmax>>1))
Jean-Marc Valin0e20ca02008-02-11 15:33:53 +1100441 {
Jean-Marc Valin945d0df2008-04-21 13:41:09 +1000442 for (i=0;i<C;i++)
Jean-Marc Valin0df0eb42008-02-13 16:00:10 +1100443 {
Jean-Marc Valine28f25f2008-03-27 14:18:28 +1100444 for (j=0;j<N;j++)
Jean-Marc Valin0df0eb42008-02-13 16:00:10 +1100445 {
Jean-Marc Valin945d0df2008-04-21 13:41:09 +1000446 P[j*C+i] = Y[(Nmax-N0-j-1)*C+i];
447 E += P[j*C+i]*P[j*C+i];
Jean-Marc Valin0df0eb42008-02-13 16:00:10 +1100448 }
449 }
450 } else {
Jean-Marc Valin945d0df2008-04-21 13:41:09 +1000451 for (j=0;j<C*N;j++)
Jean-Marc Valin0df0eb42008-02-13 16:00:10 +1100452 {
453 P[j] = Y[j];
Jean-Marc Valin877b1972008-02-29 16:40:39 +1100454 E = MAC16_16(E, P[j],P[j]);
Jean-Marc Valin0df0eb42008-02-13 16:00:10 +1100455 }
Jean-Marc Valin0e20ca02008-02-11 15:33:53 +1100456 }
Jean-Marc Valin23e82b22008-03-24 08:15:40 +1100457 g = celt_rcp(SHL32(celt_sqrt(E),9));
Jean-Marc Valin945d0df2008-04-21 13:41:09 +1000458 for (j=0;j<C*N;j++)
Jean-Marc Valinec9b6df2008-03-07 17:05:47 +1100459 P[j] = PSHR32(MULT16_16(g, P[j]),8);
Jean-Marc Valin945d0df2008-04-21 13:41:09 +1000460 for (j=0;j<C*N;j++)
Jean-Marc Valin0e20ca02008-02-11 15:33:53 +1100461 x[j] = P[j];
462}
463