blob: d9fb2a51dc7cd1a80d868603bd41c6507de485b8 [file] [log] [blame]
Jean-Marc Valin69062102012-11-08 09:42:27 -05001/* Copyright (c) 2007-2008 CSIRO
2 Copyright (c) 2007-2010 Xiph.Org Foundation
3 Copyright (c) 2008 Gregory Maxwell
4 Written by Jean-Marc Valin and Gregory Maxwell */
5/*
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
8 are met:
9
10 - Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12
13 - Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in the
15 documentation and/or other materials provided with the distribution.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
21 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28*/
29
30#ifdef HAVE_CONFIG_H
31#include "config.h"
32#endif
33
Jean-Marc Valin1ecb7ea2012-11-08 11:25:20 -050034#define CELT_ENCODER_C
Jean-Marc Valin69062102012-11-08 09:42:27 -050035
36#include "os_support.h"
37#include "mdct.h"
38#include <math.h>
39#include "celt.h"
40#include "pitch.h"
41#include "bands.h"
42#include "modes.h"
43#include "entcode.h"
44#include "quant_bands.h"
45#include "rate.h"
46#include "stack_alloc.h"
47#include "mathops.h"
48#include "float_cast.h"
49#include <stdarg.h>
50#include "celt_lpc.h"
51#include "vq.h"
52
53
54/** Encoder state
55 @brief Encoder state
56 */
57struct OpusCustomEncoder {
58 const OpusCustomMode *mode; /**< Mode used by the encoder */
59 int overlap;
60 int channels;
61 int stream_channels;
62
63 int force_intra;
64 int clip;
65 int disable_pf;
66 int complexity;
67 int upsample;
68 int start, end;
69
70 opus_int32 bitrate;
71 int vbr;
72 int signalling;
73 int constrained_vbr; /* If zero, VBR can do whatever it likes with the rate */
74 int loss_rate;
75 int lsb_depth;
76
77 /* Everything beyond this point gets cleared on a reset */
78#define ENCODER_RESET_START rng
79
80 opus_uint32 rng;
81 int spread_decision;
82 opus_val32 delayedIntra;
83 int tonal_average;
84 int lastCodedBands;
85 int hf_average;
86 int tapset_decision;
87
88 int prefilter_period;
89 opus_val16 prefilter_gain;
90 int prefilter_tapset;
91#ifdef RESYNTH
92 int prefilter_period_old;
93 opus_val16 prefilter_gain_old;
94 int prefilter_tapset_old;
95#endif
96 int consec_transient;
97 AnalysisInfo analysis;
98
99 opus_val32 preemph_memE[2];
100 opus_val32 preemph_memD[2];
101
102 /* VBR-related parameters */
103 opus_int32 vbr_reservoir;
104 opus_int32 vbr_drift;
105 opus_int32 vbr_offset;
106 opus_int32 vbr_count;
107 opus_val16 overlap_max;
108 opus_val16 stereo_saving;
109 int intensity;
110
111#ifdef RESYNTH
112 /* +MAX_PERIOD/2 to make space for overlap */
113 celt_sig syn_mem[2][2*MAX_PERIOD+MAX_PERIOD/2];
114#endif
115
116 celt_sig in_mem[1]; /* Size = channels*mode->overlap */
117 /* celt_sig prefilter_mem[], Size = channels*COMBFILTER_MAXPERIOD */
118 /* opus_val16 oldBandE[], Size = channels*mode->nbEBands */
119 /* opus_val16 oldLogE[], Size = channels*mode->nbEBands */
120 /* opus_val16 oldLogE2[], Size = channels*mode->nbEBands */
121};
122
123int celt_encoder_get_size(int channels)
124{
125 CELTMode *mode = opus_custom_mode_create(48000, 960, NULL);
126 return opus_custom_encoder_get_size(mode, channels);
127}
128
129OPUS_CUSTOM_NOSTATIC int opus_custom_encoder_get_size(const CELTMode *mode, int channels)
130{
131 int size = sizeof(struct CELTEncoder)
132 + (channels*mode->overlap-1)*sizeof(celt_sig) /* celt_sig in_mem[channels*mode->overlap]; */
133 + channels*COMBFILTER_MAXPERIOD*sizeof(celt_sig) /* celt_sig prefilter_mem[channels*COMBFILTER_MAXPERIOD]; */
134 + 3*channels*mode->nbEBands*sizeof(opus_val16); /* opus_val16 oldBandE[channels*mode->nbEBands]; */
135 /* opus_val16 oldLogE[channels*mode->nbEBands]; */
136 /* opus_val16 oldLogE2[channels*mode->nbEBands]; */
137 return size;
138}
139
140#ifdef CUSTOM_MODES
141CELTEncoder *opus_custom_encoder_create(const CELTMode *mode, int channels, int *error)
142{
143 int ret;
144 CELTEncoder *st = (CELTEncoder *)opus_alloc(opus_custom_encoder_get_size(mode, channels));
145 /* init will handle the NULL case */
146 ret = opus_custom_encoder_init(st, mode, channels);
147 if (ret != OPUS_OK)
148 {
149 opus_custom_encoder_destroy(st);
150 st = NULL;
151 }
152 if (error)
153 *error = ret;
154 return st;
155}
156#endif /* CUSTOM_MODES */
157
158int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels)
159{
160 int ret;
161 ret = opus_custom_encoder_init(st, opus_custom_mode_create(48000, 960, NULL), channels);
162 if (ret != OPUS_OK)
163 return ret;
164 st->upsample = resampling_factor(sampling_rate);
165 return OPUS_OK;
166}
167
168OPUS_CUSTOM_NOSTATIC int opus_custom_encoder_init(CELTEncoder *st, const CELTMode *mode, int channels)
169{
170 if (channels < 0 || channels > 2)
171 return OPUS_BAD_ARG;
172
173 if (st==NULL || mode==NULL)
174 return OPUS_ALLOC_FAIL;
175
176 OPUS_CLEAR((char*)st, opus_custom_encoder_get_size(mode, channels));
177
178 st->mode = mode;
179 st->overlap = mode->overlap;
180 st->stream_channels = st->channels = channels;
181
182 st->upsample = 1;
183 st->start = 0;
184 st->end = st->mode->effEBands;
185 st->signalling = 1;
186
187 st->constrained_vbr = 1;
188 st->clip = 1;
189
190 st->bitrate = OPUS_BITRATE_MAX;
191 st->vbr = 0;
192 st->force_intra = 0;
193 st->complexity = 5;
194 st->lsb_depth=24;
195
196 opus_custom_encoder_ctl(st, OPUS_RESET_STATE);
197
198 return OPUS_OK;
199}
200
201#ifdef CUSTOM_MODES
202void opus_custom_encoder_destroy(CELTEncoder *st)
203{
204 opus_free(st);
205}
206#endif /* CUSTOM_MODES */
207
208
209static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int C,
210 opus_val16 *tf_estimate, int *tf_chan)
211{
212 int i;
213 VARDECL(opus_val16, tmp);
214 opus_val32 mem0,mem1;
215 int is_transient = 0;
216 opus_int32 mask_metric = 0;
217 int c;
218 int tf_max;
Jean-Marc Valin144b6e62012-11-10 10:13:03 -0500219 int len2;
Jean-Marc Valin69062102012-11-08 09:42:27 -0500220 /* Table of 6*64/x, trained on real data to minimize the average error */
221 static const unsigned char inv_table[128] = {
222 255,255,156,110, 86, 70, 59, 51, 45, 40, 37, 33, 31, 28, 26, 25,
223 23, 22, 21, 20, 19, 18, 17, 16, 16, 15, 15, 14, 13, 13, 12, 12,
224 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 9, 9, 9, 9, 8, 8,
225 8, 8, 8, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6,
226 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5,
227 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
228 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3,
229 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2,
230 };
231 SAVE_STACK;
232 ALLOC(tmp, len, opus_val16);
233
Jean-Marc Valin144b6e62012-11-10 10:13:03 -0500234 len2=len/2;
Jean-Marc Valin69062102012-11-08 09:42:27 -0500235 tf_max = 0;
236 for (c=0;c<C;c++)
237 {
238 opus_val32 mean;
239 opus_int32 unmask=0;
240 opus_val32 norm;
Jean-Marc Valin413caa02012-11-19 16:36:22 -0500241 opus_val16 maxE;
Jean-Marc Valin69062102012-11-08 09:42:27 -0500242 mem0=0;
243 mem1=0;
244 /* High-pass filter: (1 - 2*z^-1 + z^-2) / (1 - z^-1 + .5*z^-2) */
245 for (i=0;i<len;i++)
246 {
247 opus_val32 x,y;
248 x = SHR32(in[i+c*len],SIG_SHIFT);
249 y = ADD32(mem0, x);
250#ifdef FIXED_POINT
251 mem0 = mem1 + y - SHL32(x,1);
252 mem1 = x - SHR32(y,1);
253#else
254 mem0 = mem1 + y - 2*x;
255 mem1 = x - .5f*y;
256#endif
257 tmp[i] = EXTRACT16(SHR32(y,2));
258 /*printf("%f ", tmp[i]);*/
259 }
260 /*printf("\n");*/
261 /* First few samples are bad because we don't propagate the memory */
262 for (i=0;i<12;i++)
263 tmp[i] = 0;
264
265#ifdef FIXED_POINT
266 /* Normalize tmp to max range */
267 {
268 int shift=0;
269 shift = 14-celt_ilog2(1+celt_maxabs16(tmp, len));
270 if (shift!=0)
271 {
272 for (i=0;i<len;i++)
273 tmp[i] = SHL16(tmp[i], shift);
274 }
275 }
276#endif
277
278 mean=0;
279 mem0=0;
Jean-Marc Valin413caa02012-11-19 16:36:22 -0500280 /* Grouping by two to reduce complexity */
Jean-Marc Valin69062102012-11-08 09:42:27 -0500281 /* Forward pass to compute the post-echo threshold*/
Jean-Marc Valin144b6e62012-11-10 10:13:03 -0500282 for (i=0;i<len2;i++)
Jean-Marc Valin69062102012-11-08 09:42:27 -0500283 {
284 opus_val16 x2 = PSHR32(MULT16_16(tmp[2*i],tmp[2*i]) + MULT16_16(tmp[2*i+1],tmp[2*i+1]),16);
285 mean += x2;
286#ifdef FIXED_POINT
287 /* FIXME: Use PSHR16() instead */
288 tmp[i] = mem0 + PSHR32(x2-mem0,4);
289#else
290 tmp[i] = mem0 + MULT16_16_P15(QCONST16(.0625f,15),x2-mem0);
291#endif
292 mem0 = tmp[i];
293 }
294
295 mem0=0;
Jean-Marc Valin413caa02012-11-19 16:36:22 -0500296 maxE=0;
Jean-Marc Valin69062102012-11-08 09:42:27 -0500297 /* Backward pass to compute the pre-echo threshold */
Jean-Marc Valin144b6e62012-11-10 10:13:03 -0500298 for (i=len2-1;i>=0;i--)
Jean-Marc Valin69062102012-11-08 09:42:27 -0500299 {
300#ifdef FIXED_POINT
301 /* FIXME: Use PSHR16() instead */
302 tmp[i] = mem0 + PSHR32(tmp[i]-mem0,3);
303#else
304 tmp[i] = mem0 + MULT16_16_P15(QCONST16(0.125f,15),tmp[i]-mem0);
305#endif
306 mem0 = tmp[i];
Jean-Marc Valin413caa02012-11-19 16:36:22 -0500307 maxE = MAX16(maxE, mem0);
Jean-Marc Valin69062102012-11-08 09:42:27 -0500308 }
Jean-Marc Valin413caa02012-11-19 16:36:22 -0500309 /*for (i=0;i<len2;i++)printf("%f ", tmp[i]/mean);printf("\n");*/
Jean-Marc Valin69062102012-11-08 09:42:27 -0500310
Jean-Marc Valin413caa02012-11-19 16:36:22 -0500311 /* Compute the ratio of the "frame energy" over the harmonic mean of the energy.
Jean-Marc Valin69062102012-11-08 09:42:27 -0500312 This essentially corresponds to a bitrate-normalized temporal noise-to-mask
313 ratio */
314
Jean-Marc Valin413caa02012-11-19 16:36:22 -0500315 /* As a compromise with the old transient detector, frame energy is the
316 geometric mean of the energy and half the max */
317#ifdef FIXED_POINT
318 /* Costs two sqrt() to avoid overflows */
319 mean = MULT16_16(celt_sqrt(mean), celt_sqrt(MULT16_16(maxE,len2>>1)));
320#else
321 mean = sqrt(mean * maxE*.5*len2);
322#endif
Jean-Marc Valin69062102012-11-08 09:42:27 -0500323 /* Inverse of the mean energy in Q15+6 */
Jean-Marc Valin144b6e62012-11-10 10:13:03 -0500324 norm = SHL32(EXTEND32(len2),6+14)/ADD32(EPSILON,SHR32(mean,1));
Jean-Marc Valin69062102012-11-08 09:42:27 -0500325 /* Compute harmonic mean discarding the unreliable boundaries
326 The data is smooth, so we only take 1/4th of the samples */
327 unmask=0;
Jean-Marc Valin144b6e62012-11-10 10:13:03 -0500328 for (i=12;i<len2-5;i+=4)
Jean-Marc Valin69062102012-11-08 09:42:27 -0500329 {
330 int id;
331#ifdef FIXED_POINT
332 id = IMAX(0,IMIN(127,MULT16_32_Q15(tmp[i],norm))); /* Do not round to nearest */
333#else
334 id = IMAX(0,IMIN(127,floor(64*norm*tmp[i]))); /* Do not round to nearest */
335#endif
336 unmask += inv_table[id];
337 }
338 /*printf("%d\n", unmask);*/
339 /* Normalize, compensate for the 1/4th of the sample and the factor of 6 in the inverse table */
Jean-Marc Valin144b6e62012-11-10 10:13:03 -0500340 unmask = 64*unmask*4/(6*(len2-17));
Jean-Marc Valin69062102012-11-08 09:42:27 -0500341 if (unmask>mask_metric)
342 {
343 *tf_chan = c;
344 mask_metric = unmask;
345 }
346 }
Jean-Marc Valin413caa02012-11-19 16:36:22 -0500347 is_transient = mask_metric>200;
Jean-Marc Valin69062102012-11-08 09:42:27 -0500348
349 /* Arbitrary metric for VBR boost */
Jean-Marc Valin413caa02012-11-19 16:36:22 -0500350 tf_max = MAX16(0,celt_sqrt(27*mask_metric)-42);
Jean-Marc Valin69062102012-11-08 09:42:27 -0500351 /* *tf_estimate = 1 + MIN16(1, sqrt(MAX16(0, tf_max-30))/20); */
352 *tf_estimate = QCONST16(1.f, 14) + celt_sqrt(MAX16(0, SHL32(MULT16_16(QCONST16(0.0069,14),IMIN(163,tf_max)),14)-QCONST32(0.139,28)));
353 /*printf("%d %f\n", tf_max, mask_metric);*/
354 RESTORE_STACK;
355#ifdef FUZZING
356 is_transient = rand()&0x1;
357#endif
358 /*printf("%d %f %d\n", is_transient, (float)*tf_estimate, tf_max);*/
359 return is_transient;
360}
361
362/** Apply window and compute the MDCT for all sub-frames and
363 all channels in a frame */
364static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS_RESTRICT in, celt_sig * OPUS_RESTRICT out, int C, int LM)
365{
366 const int overlap = OVERLAP(mode);
367 int N;
368 int B;
369 int shift;
370 int b, c;
371 if (shortBlocks)
372 {
373 B = shortBlocks;
374 N = mode->shortMdctSize;
375 shift = mode->maxLM;
376 } else {
377 B = 1;
378 N = mode->shortMdctSize<<LM;
379 shift = mode->maxLM-LM;
380 }
381 c=0; do {
382 for (b=0;b<B;b++)
383 {
384 /* Interleaving the sub-frames while doing the MDCTs */
385 clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N, &out[b+c*N*B], mode->window, overlap, shift, B);
386 }
387 } while (++c<C);
388}
389
390
391static void preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp,
392 int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip)
393{
394 int i;
395 opus_val16 coef0, coef1;
396 celt_sig m;
397 int Nu;
398
399 coef0 = coef[0];
400 coef1 = coef[1];
401
402
403 Nu = N/upsample;
404 if (upsample!=1)
405 {
406 for (i=0;i<N;i++)
407 inp[i] = 0;
408 }
409 for (i=0;i<Nu;i++)
410 {
411 celt_sig x;
412
413 x = SCALEIN(pcmp[CC*i]);
414#ifndef FIXED_POINT
415 /* Replace NaNs with zeros */
416 if (!(x==x))
417 x = 0;
418#endif
419 inp[i*upsample] = x;
420 }
421
422#ifndef FIXED_POINT
423 if (clip)
424 {
425 /* Clip input to avoid encoding non-portable files */
426 for (i=0;i<Nu;i++)
427 inp[i*upsample] = MAX32(-65536.f, MIN32(65536.f,inp[i*upsample]));
428 }
429#endif
430 m = *mem;
431 if (coef1 == 0)
432 {
433 for (i=0;i<N;i++)
434 {
435 celt_sig x;
436 x = SHL32(inp[i], SIG_SHIFT);
437 /* Apply pre-emphasis */
438 inp[i] = x + m;
439 m = - MULT16_32_Q15(coef0, x);
440 }
441 } else {
442 opus_val16 coef2 = coef[2];
443 for (i=0;i<N;i++)
444 {
445 opus_val16 x, tmp;
446 x = inp[i];
447 /* Apply pre-emphasis */
448 tmp = MULT16_16(coef2, x);
449 inp[i] = tmp + m;
450 m = MULT16_32_Q15(coef1, inp[i]) - MULT16_32_Q15(coef0, tmp);
451 }
452 }
453 *mem = m;
454}
455
456
457
458static opus_val32 l1_metric(const celt_norm *tmp, int N, int LM, opus_val16 bias)
459{
460 int i;
461 opus_val32 L1;
462 L1 = 0;
463 for (i=0;i<N;i++)
464 L1 += EXTEND32(ABS16(tmp[i]));
465 /* When in doubt, prefer good freq resolution */
466 L1 = MAC16_32_Q15(L1, LM*bias, L1);
467 return L1;
468
469}
470
Jean-Marc Valina6d663c2012-11-08 13:26:49 -0500471static int tf_analysis(const CELTMode *m, int len, int isTransient,
472 int *tf_res, int lambda, celt_norm *X, int N0, int LM,
Jean-Marc Valin69062102012-11-08 09:42:27 -0500473 int *tf_sum, opus_val16 tf_estimate, int tf_chan)
474{
475 int i;
476 VARDECL(int, metric);
477 int cost0;
478 int cost1;
479 VARDECL(int, path0);
480 VARDECL(int, path1);
481 VARDECL(celt_norm, tmp);
482 VARDECL(celt_norm, tmp_1);
Jean-Marc Valin69062102012-11-08 09:42:27 -0500483 int sel;
484 int selcost[2];
485 int tf_select=0;
486 opus_val16 bias;
487
488 SAVE_STACK;
489 bias = MULT16_16_Q14(QCONST16(.04f,15), MAX16(-QCONST16(.25f,14), QCONST16(1.5f,14)-tf_estimate));
490 /*printf("%f ", bias);*/
491
Jean-Marc Valin69062102012-11-08 09:42:27 -0500492 ALLOC(metric, len, int);
493 ALLOC(tmp, (m->eBands[len]-m->eBands[len-1])<<LM, celt_norm);
494 ALLOC(tmp_1, (m->eBands[len]-m->eBands[len-1])<<LM, celt_norm);
495 ALLOC(path0, len, int);
496 ALLOC(path1, len, int);
497
498 *tf_sum = 0;
499 for (i=0;i<len;i++)
500 {
501 int j, k, N;
502 int narrow;
503 opus_val32 L1, best_L1;
504 int best_level=0;
505 N = (m->eBands[i+1]-m->eBands[i])<<LM;
506 /* band is too narrow to be split down to LM=-1 */
507 narrow = (m->eBands[i+1]-m->eBands[i])==1;
508 for (j=0;j<N;j++)
509 tmp[j] = X[tf_chan*N0 + j+(m->eBands[i]<<LM)];
510 /* Just add the right channel if we're in stereo */
511 /*if (C==2)
512 for (j=0;j<N;j++)
513 tmp[j] = ADD16(SHR16(tmp[j], 1),SHR16(X[N0+j+(m->eBands[i]<<LM)], 1));*/
514 L1 = l1_metric(tmp, N, isTransient ? LM : 0, bias);
515 best_L1 = L1;
516 /* Check the -1 case for transients */
517 if (isTransient && !narrow)
518 {
519 for (j=0;j<N;j++)
520 tmp_1[j] = tmp[j];
521 haar1(tmp_1, N>>LM, 1<<LM);
522 L1 = l1_metric(tmp_1, N, LM+1, bias);
523 if (L1<best_L1)
524 {
525 best_L1 = L1;
526 best_level = -1;
527 }
528 }
529 /*printf ("%f ", L1);*/
530 for (k=0;k<LM+!(isTransient||narrow);k++)
531 {
532 int B;
533
534 if (isTransient)
535 B = (LM-k-1);
536 else
537 B = k+1;
538
539 haar1(tmp, N>>k, 1<<k);
540
541 L1 = l1_metric(tmp, N, B, bias);
542
543 if (L1 < best_L1)
544 {
545 best_L1 = L1;
546 best_level = k+1;
547 }
548 }
549 /*printf ("%d ", isTransient ? LM-best_level : best_level);*/
550 /* metric is in Q1 to be able to select the mid-point (-0.5) for narrower bands */
551 if (isTransient)
552 metric[i] = 2*best_level;
553 else
554 metric[i] = -2*best_level;
555 *tf_sum += (isTransient ? LM : 0) - metric[i]/2;
556 /* For bands that can't be split to -1, set the metric to the half-way point to avoid
557 biasing the decision */
558 if (narrow && (metric[i]==0 || metric[i]==-2*LM))
559 metric[i]-=1;
560 /*printf("%d ", metric[i]);*/
561 }
562 /*printf("\n");*/
563 /* Search for the optimal tf resolution, including tf_select */
564 tf_select = 0;
565 for (sel=0;sel<2;sel++)
566 {
567 cost0 = 0;
568 cost1 = isTransient ? 0 : lambda;
569 for (i=1;i<len;i++)
570 {
571 int curr0, curr1;
572 curr0 = IMIN(cost0, cost1 + lambda);
573 curr1 = IMIN(cost0 + lambda, cost1);
574 cost0 = curr0 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+0]);
575 cost1 = curr1 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+1]);
576 }
577 cost0 = IMIN(cost0, cost1);
578 selcost[sel]=cost0;
579 }
580 /* For now, we're conservative and only allow tf_select=1 for transients.
581 * If tests confirm it's useful for non-transients, we could allow it. */
582 if (selcost[1]<selcost[0] && isTransient)
583 tf_select=1;
584 cost0 = 0;
585 cost1 = isTransient ? 0 : lambda;
586 /* Viterbi forward pass */
587 for (i=1;i<len;i++)
588 {
589 int curr0, curr1;
590 int from0, from1;
591
592 from0 = cost0;
593 from1 = cost1 + lambda;
594 if (from0 < from1)
595 {
596 curr0 = from0;
597 path0[i]= 0;
598 } else {
599 curr0 = from1;
600 path0[i]= 1;
601 }
602
603 from0 = cost0 + lambda;
604 from1 = cost1;
605 if (from0 < from1)
606 {
607 curr1 = from0;
608 path1[i]= 0;
609 } else {
610 curr1 = from1;
611 path1[i]= 1;
612 }
613 cost0 = curr0 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+0]);
614 cost1 = curr1 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+1]);
615 }
616 tf_res[len-1] = cost0 < cost1 ? 0 : 1;
617 /* Viterbi backward pass to check the decisions */
618 for (i=len-2;i>=0;i--)
619 {
620 if (tf_res[i+1] == 1)
621 tf_res[i] = path1[i+1];
622 else
623 tf_res[i] = path0[i+1];
624 }
625 /*printf("%d %f\n", *tf_sum, tf_estimate);*/
626 RESTORE_STACK;
627#ifdef FUZZING
628 tf_select = rand()&0x1;
629 tf_res[0] = rand()&0x1;
630 for (i=1;i<len;i++)
631 tf_res[i] = tf_res[i-1] ^ ((rand()&0xF) == 0);
632#endif
633 return tf_select;
634}
635
636static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM, int tf_select, ec_enc *enc)
637{
638 int curr, i;
639 int tf_select_rsv;
640 int tf_changed;
641 int logp;
642 opus_uint32 budget;
643 opus_uint32 tell;
644 budget = enc->storage*8;
645 tell = ec_tell(enc);
646 logp = isTransient ? 2 : 4;
647 /* Reserve space to code the tf_select decision. */
648 tf_select_rsv = LM>0 && tell+logp+1 <= budget;
649 budget -= tf_select_rsv;
650 curr = tf_changed = 0;
651 for (i=start;i<end;i++)
652 {
653 if (tell+logp<=budget)
654 {
655 ec_enc_bit_logp(enc, tf_res[i] ^ curr, logp);
656 tell = ec_tell(enc);
657 curr = tf_res[i];
658 tf_changed |= curr;
659 }
660 else
661 tf_res[i] = curr;
662 logp = isTransient ? 4 : 5;
663 }
664 /* Only code tf_select if it would actually make a difference. */
665 if (tf_select_rsv &&
666 tf_select_table[LM][4*isTransient+0+tf_changed]!=
667 tf_select_table[LM][4*isTransient+2+tf_changed])
668 ec_enc_bit_logp(enc, tf_select, 1);
669 else
670 tf_select = 0;
671 for (i=start;i<end;i++)
672 tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]];
673 /*for(i=0;i<end;i++)printf("%d ", isTransient ? tf_res[i] : LM+tf_res[i]);printf("\n");*/
674}
675
676
677static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
678 const opus_val16 *bandLogE, int end, int LM, int C, int N0,
679 AnalysisInfo *analysis, opus_val16 *stereo_saving, opus_val16 tf_estimate,
680 int intensity)
681{
682 int i;
683 opus_val32 diff=0;
684 int c;
685 int trim_index = 5;
686 opus_val16 trim = QCONST16(5.f, 8);
687 opus_val16 logXC, logXC2;
688 if (C==2)
689 {
690 opus_val16 sum = 0; /* Q10 */
691 opus_val16 minXC; /* Q10 */
692 /* Compute inter-channel correlation for low frequencies */
693 for (i=0;i<8;i++)
694 {
695 int j;
696 opus_val32 partial = 0;
697 for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++)
698 partial = MAC16_16(partial, X[j], X[N0+j]);
699 sum = ADD16(sum, EXTRACT16(SHR32(partial, 18)));
700 }
701 sum = MULT16_16_Q15(QCONST16(1.f/8, 15), sum);
702 sum = MIN16(QCONST16(1.f, 10), ABS16(sum));
703 minXC = sum;
704 for (i=8;i<intensity;i++)
705 {
706 int j;
707 opus_val32 partial = 0;
708 for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++)
709 partial = MAC16_16(partial, X[j], X[N0+j]);
710 minXC = MIN16(minXC, ABS16(EXTRACT16(SHR32(partial, 18))));
711 }
712 minXC = MIN16(QCONST16(1.f, 10), ABS16(minXC));
713 /*printf ("%f\n", sum);*/
714 if (sum > QCONST16(.995f,10))
715 trim_index-=4;
716 else if (sum > QCONST16(.92f,10))
717 trim_index-=3;
718 else if (sum > QCONST16(.85f,10))
719 trim_index-=2;
720 else if (sum > QCONST16(.8f,10))
721 trim_index-=1;
722 /* mid-side savings estimations based on the LF average*/
723 logXC = celt_log2(QCONST32(1.001f, 20)-MULT16_16(sum, sum));
724 /* mid-side savings estimations based on min correlation */
725 logXC2 = MAX16(HALF16(logXC), celt_log2(QCONST32(1.001f, 20)-MULT16_16(minXC, minXC)));
726#ifdef FIXED_POINT
727 /* Compensate for Q20 vs Q14 input and convert output to Q8 */
728 logXC = PSHR32(logXC-QCONST16(6.f, DB_SHIFT),DB_SHIFT-8);
729 logXC2 = PSHR32(logXC2-QCONST16(6.f, DB_SHIFT),DB_SHIFT-8);
730#endif
731
732 trim += MAX16(-QCONST16(4.f, 8), MULT16_16_Q15(QCONST16(.75f,15),logXC));
733 *stereo_saving = MIN16(*stereo_saving + QCONST16(0.25f, 8), -HALF16(logXC2));
734 }
735
736 /* Estimate spectral tilt */
737 c=0; do {
738 for (i=0;i<end-1;i++)
739 {
740 diff += bandLogE[i+c*m->nbEBands]*(opus_int32)(2+2*i-end);
741 }
742 } while (++c<C);
743 diff /= C*(end-1);
744 /*printf("%f\n", diff);*/
745 if (diff > QCONST16(2.f, DB_SHIFT))
746 trim_index--;
747 if (diff > QCONST16(8.f, DB_SHIFT))
748 trim_index--;
749 if (diff < -QCONST16(4.f, DB_SHIFT))
750 trim_index++;
751 if (diff < -QCONST16(10.f, DB_SHIFT))
752 trim_index++;
753 trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), SHR16(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 ));
754 trim -= 2*SHR16(tf_estimate-QCONST16(1.f,14), 14-8);
755#ifndef FIXED_POINT
756 if (analysis->valid)
757 {
758 trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), 2*(analysis->tonality_slope+.05)));
759 }
760#endif
761
762#ifdef FIXED_POINT
763 trim_index = PSHR32(trim, 8);
764#else
765 trim_index = floor(.5+trim);
766#endif
767 if (trim_index<0)
768 trim_index = 0;
769 if (trim_index>10)
770 trim_index = 10;
771 /*printf("%d\n", trim_index);*/
772#ifdef FUZZING
773 trim_index = rand()%11;
774#endif
775 return trim_index;
776}
777
778static int stereo_analysis(const CELTMode *m, const celt_norm *X,
779 int LM, int N0)
780{
781 int i;
782 int thetas;
783 opus_val32 sumLR = EPSILON, sumMS = EPSILON;
784
785 /* Use the L1 norm to model the entropy of the L/R signal vs the M/S signal */
786 for (i=0;i<13;i++)
787 {
788 int j;
789 for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++)
790 {
791 opus_val32 L, R, M, S;
792 /* We cast to 32-bit first because of the -32768 case */
793 L = EXTEND32(X[j]);
794 R = EXTEND32(X[N0+j]);
795 M = ADD32(L, R);
796 S = SUB32(L, R);
797 sumLR = ADD32(sumLR, ADD32(ABS32(L), ABS32(R)));
798 sumMS = ADD32(sumMS, ADD32(ABS32(M), ABS32(S)));
799 }
800 }
801 sumMS = MULT16_32_Q15(QCONST16(0.707107f, 15), sumMS);
802 thetas = 13;
803 /* We don't need thetas for lower bands with LM<=1 */
804 if (LM<=1)
805 thetas -= 8;
806 return MULT16_32_Q15((m->eBands[13]<<(LM+1))+thetas, sumMS)
807 > MULT16_32_Q15(m->eBands[13]<<(LM+1), sumLR);
808}
809
Jean-Marc Valin10b30e72012-11-10 00:44:03 -0500810static int dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 *bandLogE2,
811 int nbEBands, int start, int end, int C, int *offsets, int lsb_depth, const opus_int16 *logN,
812 int isTransient, int vbr, int constrained_vbr, const opus_int16 *eBands, int LM,
813 int effectiveBytes, opus_int32 *tot_boost_)
814{
815 int i, c;
816 opus_int32 tot_boost=0;
817 opus_val16 maxDepth;
818 VARDECL(opus_val16, follower);
819 VARDECL(opus_val16, noise_floor);
820 SAVE_STACK;
821 ALLOC(follower, C*nbEBands, opus_val16);
822 ALLOC(noise_floor, C*nbEBands, opus_val16);
823 for (i=0;i<nbEBands;i++)
824 offsets[i] = 0;
825 /* Dynamic allocation code */
826 maxDepth=-QCONST16(32.f, DB_SHIFT);
827 for (i=0;i<end;i++)
828 {
829 /* Noise floor must take into account eMeans, the depth, the width of the bands
830 and the preemphasis filter (approx. square of bark band ID) */
831 noise_floor[i] = MULT16_16(QCONST16(0.0625f, DB_SHIFT),logN[i])
832 +QCONST16(.5f,DB_SHIFT)+SHL16(9-lsb_depth,DB_SHIFT)-SHL16(eMeans[i],6)
833 +MULT16_16(QCONST16(.0062,DB_SHIFT),(i+5)*(i+5));
834 }
835 c=0;do
836 {
837 for (i=0;i<end;i++)
838 maxDepth = MAX16(maxDepth, bandLogE[c*nbEBands+i]-noise_floor[i]);
839 } while (++c<C);
840 /* Make sure that dynamic allocation can't make us bust the budget */
841 if (effectiveBytes > 50 && LM>=1)
842 {
843 int last=0;
844 c=0;do
845 {
846 follower[c*nbEBands] = bandLogE2[c*nbEBands];
847 for (i=1;i<end;i++)
848 {
849 /* The last band to be at least 3 dB higher than the previous one
850 is the last we'll consider. Otherwise, we run into problems on
851 bandlimited signals. */
852 if (bandLogE2[c*nbEBands+i] > bandLogE2[c*nbEBands+i-1]+QCONST16(.5f,DB_SHIFT))
853 last=i;
854 follower[c*nbEBands+i] = MIN16(follower[c*nbEBands+i-1]+QCONST16(1.5f,DB_SHIFT), bandLogE2[c*nbEBands+i]);
855 }
856 for (i=last-1;i>=0;i--)
857 follower[c*nbEBands+i] = MIN16(follower[c*nbEBands+i], MIN16(follower[c*nbEBands+i+1]+QCONST16(2.f,DB_SHIFT), bandLogE2[c*nbEBands+i]));
858 for (i=0;i<end;i++)
859 follower[c*nbEBands+i] = MAX16(follower[c*nbEBands+i], noise_floor[i]);
860 } while (++c<C);
861 if (C==2)
862 {
863 for (i=start;i<end;i++)
864 {
865 /* Consider 24 dB "cross-talk" */
866 follower[nbEBands+i] = MAX16(follower[nbEBands+i], follower[ i]-QCONST16(4.f,DB_SHIFT));
867 follower[ i] = MAX16(follower[ i], follower[nbEBands+i]-QCONST16(4.f,DB_SHIFT));
868 follower[i] = HALF16(MAX16(0, bandLogE[i]-follower[i]) + MAX16(0, bandLogE[nbEBands+i]-follower[nbEBands+i]));
869 }
870 } else {
871 for (i=start;i<end;i++)
872 {
873 follower[i] = MAX16(0, bandLogE[i]-follower[i]);
874 }
875 }
876 /* For non-transient CBR/CVBR frames, halve the dynalloc contribution */
877 if ((!vbr || constrained_vbr)&&!isTransient)
878 {
879 for (i=start;i<end;i++)
880 follower[i] = HALF16(follower[i]);
881 }
882 for (i=start;i<end;i++)
883 {
884 int width;
885 int boost;
886 int boost_bits;
887
888 if (i<8)
889 follower[i] *= 2;
890 if (i>=12)
891 follower[i] = HALF16(follower[i]);
892 follower[i] = MIN16(follower[i], QCONST16(4, DB_SHIFT));
893
894 width = C*(eBands[i+1]-eBands[i])<<LM;
895 if (width<6)
896 {
897 boost = SHR32(EXTEND32(follower[i]),DB_SHIFT);
898 boost_bits = boost*width<<BITRES;
899 } else if (width > 48) {
900 boost = SHR32(EXTEND32(follower[i])*8,DB_SHIFT);
901 boost_bits = (boost*width<<BITRES)/8;
902 } else {
903 boost = SHR32(EXTEND32(follower[i])*width/6,DB_SHIFT);
904 boost_bits = boost*6<<BITRES;
905 }
906 /* For CBR and non-transient CVBR frames, limit dynalloc to 1/4 of the bits */
907 if ((!vbr || (constrained_vbr&&!isTransient))
908 && (tot_boost+boost_bits)>>BITRES>>3 > effectiveBytes/4)
909 {
910 offsets[i] = 0;
911 break;
912 } else {
913 offsets[i] = boost;
914 tot_boost += boost_bits;
915 }
916 }
917 }
918 *tot_boost_ = tot_boost;
919 RESTORE_STACK;
920 return maxDepth;
921}
922
923
Jean-Marc Valin69062102012-11-08 09:42:27 -0500924static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem, int CC, int N,
925 int prefilter_tapset, int *pitch, opus_val16 *gain, int *qgain, int enabled, int nbAvailableBytes)
926{
927 int c;
928 VARDECL(celt_sig, _pre);
929 celt_sig *pre[2];
930 const CELTMode *mode;
931 int pitch_index;
932 opus_val16 gain1;
933 opus_val16 pf_threshold;
934 int pf_on;
935 int qg;
936 SAVE_STACK;
937
938 mode = st->mode;
939 ALLOC(_pre, CC*(N+COMBFILTER_MAXPERIOD), celt_sig);
940
941 pre[0] = _pre;
942 pre[1] = _pre + (N+COMBFILTER_MAXPERIOD);
943
944
945 c=0; do {
946 OPUS_COPY(pre[c], prefilter_mem+c*COMBFILTER_MAXPERIOD, COMBFILTER_MAXPERIOD);
947 OPUS_COPY(pre[c]+COMBFILTER_MAXPERIOD, in+c*(N+st->overlap)+st->overlap, N);
948 } while (++c<CC);
949
950 if (enabled)
951 {
952 VARDECL(opus_val16, pitch_buf);
953 ALLOC(pitch_buf, (COMBFILTER_MAXPERIOD+N)>>1, opus_val16);
954
955 pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC);
956 /* Don't search for the fir last 1.5 octave of the range because
957 there's too many false-positives due to short-term correlation */
958 pitch_search(pitch_buf+(COMBFILTER_MAXPERIOD>>1), pitch_buf, N,
959 COMBFILTER_MAXPERIOD-3*COMBFILTER_MINPERIOD, &pitch_index);
960 pitch_index = COMBFILTER_MAXPERIOD-pitch_index;
961
962 gain1 = remove_doubling(pitch_buf, COMBFILTER_MAXPERIOD, COMBFILTER_MINPERIOD,
963 N, &pitch_index, st->prefilter_period, st->prefilter_gain);
964 if (pitch_index > COMBFILTER_MAXPERIOD-2)
965 pitch_index = COMBFILTER_MAXPERIOD-2;
966 gain1 = MULT16_16_Q15(QCONST16(.7f,15),gain1);
967 /*printf("%d %d %f %f\n", pitch_change, pitch_index, gain1, st->analysis.tonality);*/
968 if (st->loss_rate>2)
969 gain1 = HALF32(gain1);
970 if (st->loss_rate>4)
971 gain1 = HALF32(gain1);
972 if (st->loss_rate>8)
973 gain1 = 0;
974 } else {
975 gain1 = 0;
976 pitch_index = COMBFILTER_MINPERIOD;
977 }
978
979 /* Gain threshold for enabling the prefilter/postfilter */
980 pf_threshold = QCONST16(.2f,15);
981
982 /* Adjusting the threshold based on rate and continuity */
983 if (abs(pitch_index-st->prefilter_period)*10>pitch_index)
984 pf_threshold += QCONST16(.2f,15);
985 if (nbAvailableBytes<25)
986 pf_threshold += QCONST16(.1f,15);
987 if (nbAvailableBytes<35)
988 pf_threshold += QCONST16(.1f,15);
989 if (st->prefilter_gain > QCONST16(.4f,15))
990 pf_threshold -= QCONST16(.1f,15);
991 if (st->prefilter_gain > QCONST16(.55f,15))
992 pf_threshold -= QCONST16(.1f,15);
993
994 /* Hard threshold at 0.2 */
995 pf_threshold = MAX16(pf_threshold, QCONST16(.2f,15));
996 if (gain1<pf_threshold)
997 {
998 gain1 = 0;
999 pf_on = 0;
1000 qg = 0;
1001 } else {
1002 /*This block is not gated by a total bits check only because
1003 of the nbAvailableBytes check above.*/
1004 if (ABS16(gain1-st->prefilter_gain)<QCONST16(.1f,15))
1005 gain1=st->prefilter_gain;
1006
1007#ifdef FIXED_POINT
1008 qg = ((gain1+1536)>>10)/3-1;
1009#else
1010 qg = (int)floor(.5f+gain1*32/3)-1;
1011#endif
1012 qg = IMAX(0, IMIN(7, qg));
1013 gain1 = QCONST16(0.09375f,15)*(qg+1);
1014 pf_on = 1;
1015 }
1016 /*printf("%d %f\n", pitch_index, gain1);*/
1017
1018 c=0; do {
1019 int offset = mode->shortMdctSize-st->overlap;
1020 st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD);
1021 OPUS_COPY(in+c*(N+st->overlap), st->in_mem+c*(st->overlap), st->overlap);
1022 if (offset)
1023 comb_filter(in+c*(N+st->overlap)+st->overlap, pre[c]+COMBFILTER_MAXPERIOD,
1024 st->prefilter_period, st->prefilter_period, offset, -st->prefilter_gain, -st->prefilter_gain,
1025 st->prefilter_tapset, st->prefilter_tapset, NULL, 0);
1026
1027 comb_filter(in+c*(N+st->overlap)+st->overlap+offset, pre[c]+COMBFILTER_MAXPERIOD+offset,
1028 st->prefilter_period, pitch_index, N-offset, -st->prefilter_gain, -gain1,
1029 st->prefilter_tapset, prefilter_tapset, mode->window, st->overlap);
1030 OPUS_COPY(st->in_mem+c*(st->overlap), in+c*(N+st->overlap)+N, st->overlap);
1031
1032 if (N>COMBFILTER_MAXPERIOD)
1033 {
1034 OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, pre[c]+N, COMBFILTER_MAXPERIOD);
1035 } else {
1036 OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, prefilter_mem+c*COMBFILTER_MAXPERIOD+N, COMBFILTER_MAXPERIOD-N);
1037 OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD+COMBFILTER_MAXPERIOD-N, pre[c]+COMBFILTER_MAXPERIOD, N);
1038 }
1039 } while (++c<CC);
1040
1041 RESTORE_STACK;
1042 *gain = gain1;
1043 *pitch = pitch_index;
1044 *qgain = qg;
1045 return pf_on;
1046}
1047
1048int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc)
1049{
1050 int i, c, N;
1051 opus_int32 bits;
1052 ec_enc _enc;
1053 VARDECL(celt_sig, in);
1054 VARDECL(celt_sig, freq);
1055 VARDECL(celt_norm, X);
1056 VARDECL(celt_ener, bandE);
1057 VARDECL(opus_val16, bandLogE);
1058 VARDECL(opus_val16, bandLogE2);
1059 VARDECL(int, fine_quant);
1060 VARDECL(opus_val16, error);
1061 VARDECL(int, pulses);
1062 VARDECL(int, cap);
1063 VARDECL(int, offsets);
1064 VARDECL(int, fine_priority);
1065 VARDECL(int, tf_res);
1066 VARDECL(unsigned char, collapse_masks);
1067 celt_sig *prefilter_mem;
1068 opus_val16 *oldBandE, *oldLogE, *oldLogE2;
1069 int shortBlocks=0;
1070 int isTransient=0;
1071 const int CC = st->channels;
1072 const int C = st->stream_channels;
1073 int LM, M;
1074 int tf_select;
1075 int nbFilledBytes, nbAvailableBytes;
1076 int effEnd;
1077 int codedBands;
1078 int tf_sum;
1079 int alloc_trim;
1080 int pitch_index=COMBFILTER_MINPERIOD;
1081 opus_val16 gain1 = 0;
1082 int dual_stereo=0;
1083 int effectiveBytes;
1084 int dynalloc_logp;
1085 opus_int32 vbr_rate;
1086 opus_int32 total_bits;
1087 opus_int32 total_boost;
1088 opus_int32 balance;
1089 opus_int32 tell;
1090 int prefilter_tapset=0;
1091 int pf_on;
1092 int anti_collapse_rsv;
1093 int anti_collapse_on=0;
1094 int silence=0;
1095 int tf_chan = 0;
1096 opus_val16 tf_estimate;
1097 int pitch_change=0;
Jean-Marc Valin10b30e72012-11-10 00:44:03 -05001098 opus_int32 tot_boost;
Jean-Marc Valin69062102012-11-08 09:42:27 -05001099 opus_val16 sample_max;
1100 opus_val16 maxDepth;
1101 const OpusCustomMode *mode;
1102 int nbEBands;
1103 int overlap;
1104 const opus_int16 *eBands;
1105 int secondMdct;
1106 ALLOC_STACK;
1107
1108 mode = st->mode;
1109 nbEBands = mode->nbEBands;
1110 overlap = mode->overlap;
1111 eBands = mode->eBands;
1112 tf_estimate = QCONST16(1.0f,14);
1113 if (nbCompressedBytes<2 || pcm==NULL)
1114 return OPUS_BAD_ARG;
1115
1116 frame_size *= st->upsample;
1117 for (LM=0;LM<=mode->maxLM;LM++)
1118 if (mode->shortMdctSize<<LM==frame_size)
1119 break;
1120 if (LM>mode->maxLM)
1121 return OPUS_BAD_ARG;
1122 M=1<<LM;
1123 N = M*mode->shortMdctSize;
1124
1125 prefilter_mem = st->in_mem+CC*(st->overlap);
1126 oldBandE = (opus_val16*)(st->in_mem+CC*(st->overlap+COMBFILTER_MAXPERIOD));
1127 oldLogE = oldBandE + CC*nbEBands;
1128 oldLogE2 = oldLogE + CC*nbEBands;
1129
1130 if (enc==NULL)
1131 {
1132 tell=1;
1133 nbFilledBytes=0;
1134 } else {
1135 tell=ec_tell(enc);
1136 nbFilledBytes=(tell+4)>>3;
1137 }
1138
1139#ifdef CUSTOM_MODES
1140 if (st->signalling && enc==NULL)
1141 {
1142 int tmp = (mode->effEBands-st->end)>>1;
1143 st->end = IMAX(1, mode->effEBands-tmp);
1144 compressed[0] = tmp<<5;
1145 compressed[0] |= LM<<3;
1146 compressed[0] |= (C==2)<<2;
1147 /* Convert "standard mode" to Opus header */
1148 if (mode->Fs==48000 && mode->shortMdctSize==120)
1149 {
1150 int c0 = toOpus(compressed[0]);
1151 if (c0<0)
1152 return OPUS_BAD_ARG;
1153 compressed[0] = c0;
1154 }
1155 compressed++;
1156 nbCompressedBytes--;
1157 }
1158#else
1159 celt_assert(st->signalling==0);
1160#endif
1161
1162 /* Can't produce more than 1275 output bytes */
1163 nbCompressedBytes = IMIN(nbCompressedBytes,1275);
1164 nbAvailableBytes = nbCompressedBytes - nbFilledBytes;
1165
1166 if (st->vbr && st->bitrate!=OPUS_BITRATE_MAX)
1167 {
1168 opus_int32 den=mode->Fs>>BITRES;
1169 vbr_rate=(st->bitrate*frame_size+(den>>1))/den;
1170#ifdef CUSTOM_MODES
1171 if (st->signalling)
1172 vbr_rate -= 8<<BITRES;
1173#endif
1174 effectiveBytes = vbr_rate>>(3+BITRES);
1175 } else {
1176 opus_int32 tmp;
1177 vbr_rate = 0;
1178 tmp = st->bitrate*frame_size;
1179 if (tell>1)
1180 tmp += tell;
1181 if (st->bitrate!=OPUS_BITRATE_MAX)
1182 nbCompressedBytes = IMAX(2, IMIN(nbCompressedBytes,
1183 (tmp+4*mode->Fs)/(8*mode->Fs)-!!st->signalling));
1184 effectiveBytes = nbCompressedBytes;
1185 }
1186
1187 if (enc==NULL)
1188 {
1189 ec_enc_init(&_enc, compressed, nbCompressedBytes);
1190 enc = &_enc;
1191 }
1192
1193 if (vbr_rate>0)
1194 {
1195 /* Computes the max bit-rate allowed in VBR mode to avoid violating the
1196 target rate and buffering.
1197 We must do this up front so that bust-prevention logic triggers
1198 correctly if we don't have enough bits. */
1199 if (st->constrained_vbr)
1200 {
1201 opus_int32 vbr_bound;
1202 opus_int32 max_allowed;
1203 /* We could use any multiple of vbr_rate as bound (depending on the
1204 delay).
1205 This is clamped to ensure we use at least two bytes if the encoder
1206 was entirely empty, but to allow 0 in hybrid mode. */
1207 vbr_bound = vbr_rate;
1208 max_allowed = IMIN(IMAX(tell==1?2:0,
1209 (vbr_rate+vbr_bound-st->vbr_reservoir)>>(BITRES+3)),
1210 nbAvailableBytes);
1211 if(max_allowed < nbAvailableBytes)
1212 {
1213 nbCompressedBytes = nbFilledBytes+max_allowed;
1214 nbAvailableBytes = max_allowed;
1215 ec_enc_shrink(enc, nbCompressedBytes);
1216 }
1217 }
1218 }
1219 total_bits = nbCompressedBytes*8;
1220
1221 effEnd = st->end;
1222 if (effEnd > mode->effEBands)
1223 effEnd = mode->effEBands;
1224
1225 ALLOC(in, CC*(N+st->overlap), celt_sig);
1226
1227 sample_max=MAX16(st->overlap_max, celt_maxabs16(pcm, C*(N-overlap)/st->upsample));
1228 st->overlap_max=celt_maxabs16(pcm+C*(N-overlap)/st->upsample, C*overlap/st->upsample);
1229 sample_max=MAX16(sample_max, st->overlap_max);
1230#ifdef FIXED_POINT
1231 silence = (sample_max==0);
1232#else
1233 silence = (sample_max <= (opus_val16)1/(1<<st->lsb_depth));
1234#endif
1235#ifdef FUZZING
1236 if ((rand()&0x3F)==0)
1237 silence = 1;
1238#endif
1239 if (tell==1)
1240 ec_enc_bit_logp(enc, silence, 15);
1241 else
1242 silence=0;
1243 if (silence)
1244 {
1245 /*In VBR mode there is no need to send more than the minimum. */
1246 if (vbr_rate>0)
1247 {
1248 effectiveBytes=nbCompressedBytes=IMIN(nbCompressedBytes, nbFilledBytes+2);
1249 total_bits=nbCompressedBytes*8;
1250 nbAvailableBytes=2;
1251 ec_enc_shrink(enc, nbCompressedBytes);
1252 }
1253 /* Pretend we've filled all the remaining bits with zeros
1254 (that's what the initialiser did anyway) */
1255 tell = nbCompressedBytes*8;
1256 enc->nbits_total+=tell-ec_tell(enc);
1257 }
1258 c=0; do {
1259 preemphasis(pcm+c, in+c*(N+st->overlap)+st->overlap, N, CC, st->upsample,
1260 mode->preemph, st->preemph_memE+c, st->clip);
1261 } while (++c<CC);
1262
1263
1264
1265 /* Find pitch period and gain */
1266 {
1267 int enabled;
1268 int qg;
1269 enabled = nbAvailableBytes>12*C && st->start==0 && !silence && !st->disable_pf && st->complexity >= 5;
1270
1271 prefilter_tapset = st->tapset_decision;
1272 pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes);
1273 if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && st->analysis.tonality > .3
1274 && (pitch_index > 1.26*st->prefilter_period || pitch_index < .79*st->prefilter_period))
1275 pitch_change = 1;
1276 if (pf_on==0)
1277 {
1278 if(st->start==0 && tell+16<=total_bits)
1279 ec_enc_bit_logp(enc, 0, 1);
1280 } else {
1281 /*This block is not gated by a total bits check only because
1282 of the nbAvailableBytes check above.*/
1283 int octave;
1284 ec_enc_bit_logp(enc, 1, 1);
1285 pitch_index += 1;
1286 octave = EC_ILOG(pitch_index)-5;
1287 ec_enc_uint(enc, octave, 6);
1288 ec_enc_bits(enc, pitch_index-(16<<octave), 4+octave);
1289 pitch_index -= 1;
1290 ec_enc_bits(enc, qg, 3);
1291 ec_enc_icdf(enc, prefilter_tapset, tapset_icdf, 2);
1292 }
1293 }
1294
1295 isTransient = 0;
1296 shortBlocks = 0;
1297 if (LM>0 && ec_tell(enc)+3<=total_bits)
1298 {
1299 if (st->complexity > 1)
1300 {
1301 isTransient = transient_analysis(in, N+st->overlap, CC,
1302 &tf_estimate, &tf_chan);
1303 if (isTransient)
1304 shortBlocks = M;
1305 }
1306 ec_enc_bit_logp(enc, isTransient, 3);
1307 }
1308
1309 ALLOC(freq, CC*N, celt_sig); /**< Interleaved signal MDCTs */
1310 ALLOC(bandE,nbEBands*CC, celt_ener);
1311 ALLOC(bandLogE,nbEBands*CC, opus_val16);
1312
1313 secondMdct = shortBlocks && st->complexity>=8;
1314 ALLOC(bandLogE2, C*nbEBands, opus_val16);
1315 if (secondMdct)
1316 {
1317 compute_mdcts(mode, 0, in, freq, CC, LM);
1318 if (CC==2&&C==1)
1319 {
1320 for (i=0;i<N;i++)
1321 freq[i] = ADD32(HALF32(freq[i]), HALF32(freq[N+i]));
1322 }
1323 if (st->upsample != 1)
1324 {
1325 c=0; do
1326 {
1327 int bound = N/st->upsample;
1328 for (i=0;i<bound;i++)
1329 freq[c*N+i] *= st->upsample;
1330 for (;i<N;i++)
1331 freq[c*N+i] = 0;
1332 } while (++c<C);
1333 }
1334 compute_band_energies(mode, freq, bandE, effEnd, C, M);
1335 amp2Log2(mode, effEnd, st->end, bandE, bandLogE2, C);
1336 for (i=0;i<C*nbEBands;i++)
1337 bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT));
1338 }
1339
1340 compute_mdcts(mode, shortBlocks, in, freq, CC, LM);
1341
1342 if (CC==2&&C==1)
1343 {
1344 for (i=0;i<N;i++)
1345 freq[i] = ADD32(HALF32(freq[i]), HALF32(freq[N+i]));
1346 tf_chan = 0;
1347 }
1348 if (st->upsample != 1)
1349 {
1350 c=0; do
1351 {
1352 int bound = N/st->upsample;
1353 for (i=0;i<bound;i++)
1354 freq[c*N+i] *= st->upsample;
1355 for (;i<N;i++)
1356 freq[c*N+i] = 0;
1357 } while (++c<C);
1358 }
1359 compute_band_energies(mode, freq, bandE, effEnd, C, M);
1360
1361 amp2Log2(mode, effEnd, st->end, bandE, bandLogE, C);
1362 /*for (i=0;i<21;i++)
1363 printf("%f ", bandLogE[i]);
1364 printf("\n");*/
1365
1366 if (!secondMdct)
1367 {
1368 for (i=0;i<C*nbEBands;i++)
1369 bandLogE2[i] = bandLogE[i];
1370 }
1371
1372 ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */
1373
1374 /* Band normalisation */
1375 normalise_bands(mode, freq, X, bandE, effEnd, C, M);
1376
1377 ALLOC(tf_res, nbEBands, int);
Jean-Marc Valina6d663c2012-11-08 13:26:49 -05001378 /* Disable variable tf resolution for hybrid and at very low bitrate */
1379 if (effectiveBytes>=15*C && st->start==0)
1380 {
1381 int lambda;
1382 if (effectiveBytes<40)
1383 lambda = 12;
1384 else if (effectiveBytes<60)
1385 lambda = 6;
1386 else if (effectiveBytes<100)
1387 lambda = 4;
1388 else
1389 lambda = 3;
1390 lambda*=2;
1391 tf_select = tf_analysis(mode, effEnd, isTransient, tf_res, lambda, X, N, LM, &tf_sum, tf_estimate, tf_chan);
1392 for (i=effEnd;i<st->end;i++)
1393 tf_res[i] = tf_res[effEnd-1];
1394 } else {
1395 tf_sum = 0;
1396 for (i=0;i<st->end;i++)
1397 tf_res[i] = isTransient;
1398 tf_select=0;
1399 }
Jean-Marc Valin69062102012-11-08 09:42:27 -05001400
1401 ALLOC(error, C*nbEBands, opus_val16);
1402 quant_coarse_energy(mode, st->start, st->end, effEnd, bandLogE,
1403 oldBandE, total_bits, error, enc,
1404 C, LM, nbAvailableBytes, st->force_intra,
1405 &st->delayedIntra, st->complexity >= 4, st->loss_rate);
1406
1407 tf_encode(st->start, st->end, isTransient, tf_res, LM, tf_select, enc);
1408
1409 if (ec_tell(enc)+4<=total_bits)
1410 {
Jean-Marc Valin1fd1d7d2012-11-08 17:22:07 -05001411 if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C || st->start != 0)
Jean-Marc Valin69062102012-11-08 09:42:27 -05001412 {
1413 if (st->complexity == 0)
1414 st->spread_decision = SPREAD_NONE;
Jean-Marc Valin1fd1d7d2012-11-08 17:22:07 -05001415 else
1416 st->spread_decision = SPREAD_NORMAL;
Jean-Marc Valin69062102012-11-08 09:42:27 -05001417 } else {
Jean-Marc Valin41fd7a12012-12-12 14:41:29 -05001418 /* Disable new spreading+tapset estimator until we can show it works
1419 better than the old one. So far it seems like spreading_decision()
1420 works best. */
1421 if (0&&st->analysis.valid)
Jean-Marc Valin69062102012-11-08 09:42:27 -05001422 {
1423 static const opus_val16 spread_thresholds[3] = {-QCONST16(.6f, 15), -QCONST16(.2f, 15), -QCONST16(.07f, 15)};
1424 static const opus_val16 spread_histeresis[3] = {QCONST16(.15f, 15), QCONST16(.07f, 15), QCONST16(.02f, 15)};
1425 static const opus_val16 tapset_thresholds[2] = {QCONST16(.0f, 15), QCONST16(.15f, 15)};
1426 static const opus_val16 tapset_histeresis[2] = {QCONST16(.1f, 15), QCONST16(.05f, 15)};
1427 st->spread_decision = hysteresis_decision(-st->analysis.tonality, spread_thresholds, spread_histeresis, 3, st->spread_decision);
1428 st->tapset_decision = hysteresis_decision(st->analysis.tonality_slope, tapset_thresholds, tapset_histeresis, 2, st->tapset_decision);
1429 } else {
1430 st->spread_decision = spreading_decision(mode, X,
1431 &st->tonal_average, st->spread_decision, &st->hf_average,
1432 &st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M);
1433 }
1434 /*printf("%d %d\n", st->tapset_decision, st->spread_decision);*/
1435 /*printf("%f %d %f %d\n\n", st->analysis.tonality, st->spread_decision, st->analysis.tonality_slope, st->tapset_decision);*/
1436 }
1437 ec_enc_icdf(enc, st->spread_decision, spread_icdf, 5);
1438 }
1439
Jean-Marc Valin69062102012-11-08 09:42:27 -05001440 ALLOC(offsets, nbEBands, int);
1441
Jean-Marc Valin10b30e72012-11-10 00:44:03 -05001442 maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, st->start, st->end, C, offsets,
1443 st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr,
1444 eBands, LM, effectiveBytes, &tot_boost);
1445 ALLOC(cap, nbEBands, int);
Jean-Marc Valin69062102012-11-08 09:42:27 -05001446 init_caps(mode,cap,LM,C);
Jean-Marc Valin69062102012-11-08 09:42:27 -05001447
Jean-Marc Valin69062102012-11-08 09:42:27 -05001448 dynalloc_logp = 6;
1449 total_bits<<=BITRES;
1450 total_boost = 0;
1451 tell = ec_tell_frac(enc);
1452 for (i=st->start;i<st->end;i++)
1453 {
1454 int width, quanta;
1455 int dynalloc_loop_logp;
1456 int boost;
1457 int j;
1458 width = C*(eBands[i+1]-eBands[i])<<LM;
1459 /* quanta is 6 bits, but no more than 1 bit/sample
1460 and no less than 1/8 bit/sample */
1461 quanta = IMIN(width<<BITRES, IMAX(6<<BITRES, width));
1462 dynalloc_loop_logp = dynalloc_logp;
1463 boost = 0;
1464 for (j = 0; tell+(dynalloc_loop_logp<<BITRES) < total_bits-total_boost
1465 && boost < cap[i]; j++)
1466 {
1467 int flag;
1468 flag = j<offsets[i];
1469 ec_enc_bit_logp(enc, flag, dynalloc_loop_logp);
1470 tell = ec_tell_frac(enc);
1471 if (!flag)
1472 break;
1473 boost += quanta;
1474 total_boost += quanta;
1475 dynalloc_loop_logp = 1;
1476 }
1477 /* Making dynalloc more likely */
1478 if (j)
1479 dynalloc_logp = IMAX(2, dynalloc_logp-1);
1480 offsets[i] = boost;
1481 }
1482
1483 if (C==2)
1484 {
1485 int effectiveRate;
1486
1487 static const opus_val16 intensity_thresholds[21]=
1488 /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 off*/
1489 { 16,21,23,25,27,29,31,33,35,38,42,46,50,54,58,63,68,75,84,102,130};
1490 static const opus_val16 intensity_histeresis[21]=
1491 { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 4, 5, 6, 8, 12};
1492
1493 /* Always use MS for 2.5 ms frames until we can do a better analysis */
1494 if (LM!=0)
1495 dual_stereo = stereo_analysis(mode, X, LM, N);
1496
1497 /* Account for coarse energy */
1498 effectiveRate = (8*effectiveBytes - 80)>>LM;
1499
1500 /* effectiveRate in kb/s */
1501 effectiveRate = 2*effectiveRate/5;
1502
1503 st->intensity = hysteresis_decision(effectiveRate, intensity_thresholds, intensity_histeresis, 21, st->intensity);
1504 st->intensity = IMIN(st->end,IMAX(st->start, st->intensity));
1505 }
1506
1507 alloc_trim = 5;
1508 if (tell+(6<<BITRES) <= total_bits - total_boost)
1509 {
1510 alloc_trim = alloc_trim_analysis(mode, X, bandLogE,
1511 st->end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, st->intensity);
1512 ec_enc_icdf(enc, alloc_trim, trim_icdf, 7);
1513 tell = ec_tell_frac(enc);
1514 }
1515
1516 /* Variable bitrate */
1517 if (vbr_rate>0)
1518 {
1519 opus_val16 alpha;
1520 opus_int32 delta;
1521 /* The target rate in 8th bits per frame */
1522 opus_int32 target, base_target;
1523 opus_int32 min_allowed;
1524 int coded_bins;
1525 int coded_bands;
1526 int lm_diff = mode->maxLM - LM;
1527 coded_bands = st->lastCodedBands ? st->lastCodedBands : nbEBands;
1528 coded_bins = eBands[coded_bands]<<LM;
1529 if (C==2)
1530 coded_bins += eBands[IMIN(st->intensity, coded_bands)]<<LM;
1531
1532 /* Don't attempt to use more than 510 kb/s, even for frames smaller than 20 ms.
1533 The CELT allocator will just not be able to use more than that anyway. */
1534 nbCompressedBytes = IMIN(nbCompressedBytes,1275>>(3-LM));
1535 target = vbr_rate - ((40*C+20)<<BITRES);
1536 base_target = target;
1537
1538 if (st->constrained_vbr)
1539 target += (st->vbr_offset>>lm_diff);
1540
1541 /*printf("%f %f %f %f %d %d ", st->analysis.activity, st->analysis.tonality, tf_estimate, st->stereo_saving, tot_boost, coded_bands);*/
1542#ifndef FIXED_POINT
1543 if (st->analysis.valid && st->analysis.activity<.4)
1544 target -= (coded_bins<<BITRES)*1*(.4-st->analysis.activity);
1545#endif
1546 /* Stereo savings */
1547 if (C==2)
1548 {
1549 int coded_stereo_bands;
1550 int coded_stereo_dof;
Jean-Marc Valin1122d292012-12-12 15:16:27 -05001551 opus_val16 max_frac;
Jean-Marc Valin69062102012-11-08 09:42:27 -05001552 coded_stereo_bands = IMIN(st->intensity, coded_bands);
1553 coded_stereo_dof = (eBands[coded_stereo_bands]<<LM)-coded_stereo_bands;
Jean-Marc Valin1122d292012-12-12 15:16:27 -05001554 /* Maximum fraction of the bits we can save if the signal is mono. */
1555 max_frac = DIV32_16(MULT16_16(QCONST16(0.8f, 15), coded_stereo_dof), coded_bins);
Jean-Marc Valin69062102012-11-08 09:42:27 -05001556 /*printf("%d %d %d ", coded_stereo_dof, coded_bins, tot_boost);*/
Jean-Marc Valin1122d292012-12-12 15:16:27 -05001557 target -= MIN32(MULT16_32_Q15(max_frac,target),
1558 SHR16(MULT16_16(st->stereo_saving,(coded_stereo_dof<<BITRES)),8));
1559 target += MULT16_16_Q15(QCONST16(0.1f,15),coded_stereo_dof<<BITRES);
Jean-Marc Valin69062102012-11-08 09:42:27 -05001560 }
1561 /* Limits starving of other bands when using dynalloc */
1562 target += tot_boost;
1563 /* Compensates for the average transient boost */
1564 target = MULT16_32_Q15(QCONST16(0.96f,15),target);
1565 /* Apply transient boost */
1566 target = SHL32(MULT16_32_Q15(tf_estimate, target),1);
1567
1568#ifndef FIXED_POINT
1569 /* Apply tonality boost */
1570 if (st->analysis.valid) {
1571 int tonal_target;
1572 float tonal;
1573
1574 /* Compensates for the average tonality boost */
Jean-Marc Valinb33db8f2012-12-12 14:45:16 -05001575 target -= MULT16_16_Q15(QCONST16(0.11f,15),coded_bins<<BITRES);
Jean-Marc Valin69062102012-11-08 09:42:27 -05001576
Jean-Marc Valinb33db8f2012-12-12 14:45:16 -05001577 tonal = MAX16(0,st->analysis.tonality-.15);
1578 tonal_target = target + (coded_bins<<BITRES)*1.2f*tonal;
Jean-Marc Valin69062102012-11-08 09:42:27 -05001579 if (pitch_change)
1580 tonal_target += (coded_bins<<BITRES)*.8;
1581 /*printf("%f %f ", st->analysis.tonality, tonal);*/
Jean-Marc Valinb33db8f2012-12-12 14:45:16 -05001582 target = tonal_target;
Jean-Marc Valin69062102012-11-08 09:42:27 -05001583 }
1584#endif
1585
1586 {
1587 opus_int32 floor_depth;
1588 int bins;
1589 bins = eBands[nbEBands-2]<<LM;
1590 /*floor_depth = SHR32(MULT16_16((C*bins<<BITRES),celt_log2(SHL32(MAX16(1,sample_max),13))), DB_SHIFT);*/
1591 floor_depth = SHR32(MULT16_16((C*bins<<BITRES),maxDepth), DB_SHIFT);
1592 floor_depth = IMAX(floor_depth, target>>2);
1593 target = IMIN(target, floor_depth);
1594 /*printf("%f %d\n", maxDepth, floor_depth);*/
1595 }
1596
1597 if (st->constrained_vbr || st->bitrate<64000)
1598 {
1599 opus_val16 rate_factor;
1600#ifdef FIXED_POINT
1601 rate_factor = MAX16(0,(st->bitrate-32000));
1602#else
1603 rate_factor = MAX16(0,(1.f/32768)*(st->bitrate-32000));
1604#endif
1605 if (st->constrained_vbr)
1606 rate_factor = MIN16(rate_factor, QCONST16(0.67f, 15));
1607 target = base_target + MULT16_32_Q15(rate_factor, target-base_target);
1608
1609 }
1610 /* Don't allow more than doubling the rate */
1611 target = IMIN(2*base_target, target);
1612
1613 /* The current offset is removed from the target and the space used
1614 so far is added*/
1615 target=target+tell;
1616 /* In VBR mode the frame size must not be reduced so much that it would
1617 result in the encoder running out of bits.
1618 The margin of 2 bytes ensures that none of the bust-prevention logic
1619 in the decoder will have triggered so far. */
1620 min_allowed = ((tell+total_boost+(1<<(BITRES+3))-1)>>(BITRES+3)) + 2 - nbFilledBytes;
1621
1622 nbAvailableBytes = (target+(1<<(BITRES+2)))>>(BITRES+3);
1623 nbAvailableBytes = IMAX(min_allowed,nbAvailableBytes);
1624 nbAvailableBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes) - nbFilledBytes;
1625
1626 /* By how much did we "miss" the target on that frame */
1627 delta = target - vbr_rate;
1628
1629 target=nbAvailableBytes<<(BITRES+3);
1630
1631 /*If the frame is silent we don't adjust our drift, otherwise
1632 the encoder will shoot to very high rates after hitting a
1633 span of silence, but we do allow the bitres to refill.
1634 This means that we'll undershoot our target in CVBR/VBR modes
1635 on files with lots of silence. */
1636 if(silence)
1637 {
1638 nbAvailableBytes = 2;
1639 target = 2*8<<BITRES;
1640 delta = 0;
1641 }
1642
1643 if (st->vbr_count < 970)
1644 {
1645 st->vbr_count++;
1646 alpha = celt_rcp(SHL32(EXTEND32(st->vbr_count+20),16));
1647 } else
1648 alpha = QCONST16(.001f,15);
1649 /* How many bits have we used in excess of what we're allowed */
1650 if (st->constrained_vbr)
1651 st->vbr_reservoir += target - vbr_rate;
1652 /*printf ("%d\n", st->vbr_reservoir);*/
1653
1654 /* Compute the offset we need to apply in order to reach the target */
1655 if (st->constrained_vbr)
1656 {
1657 st->vbr_drift += (opus_int32)MULT16_32_Q15(alpha,(delta*(1<<lm_diff))-st->vbr_offset-st->vbr_drift);
1658 st->vbr_offset = -st->vbr_drift;
1659 }
1660 /*printf ("%d\n", st->vbr_drift);*/
1661
1662 if (st->constrained_vbr && st->vbr_reservoir < 0)
1663 {
1664 /* We're under the min value -- increase rate */
1665 int adjust = (-st->vbr_reservoir)/(8<<BITRES);
1666 /* Unless we're just coding silence */
1667 nbAvailableBytes += silence?0:adjust;
1668 st->vbr_reservoir = 0;
1669 /*printf ("+%d\n", adjust);*/
1670 }
1671 nbCompressedBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes);
1672 /*printf("%d\n", nbCompressedBytes*50*8);*/
1673 /* This moves the raw bits to take into account the new compressed size */
1674 ec_enc_shrink(enc, nbCompressedBytes);
1675 }
1676
1677 /* Bit allocation */
1678 ALLOC(fine_quant, nbEBands, int);
1679 ALLOC(pulses, nbEBands, int);
1680 ALLOC(fine_priority, nbEBands, int);
1681
1682 /* bits = packet size - where we are - safety*/
1683 bits = (((opus_int32)nbCompressedBytes*8)<<BITRES) - ec_tell_frac(enc) - 1;
1684 anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0;
1685 bits -= anti_collapse_rsv;
1686 codedBands = compute_allocation(mode, st->start, st->end, offsets, cap,
1687 alloc_trim, &st->intensity, &dual_stereo, bits, &balance, pulses,
1688 fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands);
1689 st->lastCodedBands = codedBands;
1690
1691 quant_fine_energy(mode, st->start, st->end, oldBandE, error, fine_quant, enc, C);
1692
1693#ifdef MEASURE_NORM_MSE
1694 float X0[3000];
1695 float bandE0[60];
1696 c=0; do
1697 for (i=0;i<N;i++)
1698 X0[i+c*N] = X[i+c*N];
1699 while (++c<C);
1700 for (i=0;i<C*nbEBands;i++)
1701 bandE0[i] = bandE[i];
1702#endif
1703
1704 /* Residual quantisation */
1705 ALLOC(collapse_masks, C*nbEBands, unsigned char);
1706 quant_all_bands(1, mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks,
1707 bandE, pulses, shortBlocks, st->spread_decision, dual_stereo, st->intensity, tf_res,
1708 nbCompressedBytes*(8<<BITRES)-anti_collapse_rsv, balance, enc, LM, codedBands, &st->rng);
1709
1710 if (anti_collapse_rsv > 0)
1711 {
1712 anti_collapse_on = st->consec_transient<2;
1713#ifdef FUZZING
1714 anti_collapse_on = rand()&0x1;
1715#endif
1716 ec_enc_bits(enc, anti_collapse_on, 1);
1717 }
1718 quant_energy_finalise(mode, st->start, st->end, oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_tell(enc), enc, C);
1719
1720 if (silence)
1721 {
1722 for (i=0;i<C*nbEBands;i++)
1723 oldBandE[i] = -QCONST16(28.f,DB_SHIFT);
1724 }
1725
1726#ifdef RESYNTH
1727 /* Re-synthesis of the coded audio if required */
1728 {
1729 celt_sig *out_mem[2];
1730
1731 log2Amp(mode, st->start, st->end, bandE, oldBandE, C);
1732 if (silence)
1733 {
1734 for (i=0;i<C*nbEBands;i++)
1735 bandE[i] = 0;
1736 }
1737
1738#ifdef MEASURE_NORM_MSE
1739 measure_norm_mse(mode, X, X0, bandE, bandE0, M, N, C);
1740#endif
1741 if (anti_collapse_on)
1742 {
1743 anti_collapse(mode, X, collapse_masks, LM, C, N,
1744 st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
1745 }
1746
1747 /* Synthesis */
1748 denormalise_bands(mode, X, freq, bandE, st->start, effEnd, C, M);
1749
1750 c=0; do {
Nils Wallméniuse0884fe2012-12-01 21:11:50 +01001751 OPUS_MOVE(st->syn_mem[c], st->syn_mem[c]+N, 2*MAX_PERIOD-N+overlap/2);
Jean-Marc Valin69062102012-11-08 09:42:27 -05001752 } while (++c<CC);
1753
1754 if (CC==2&&C==1)
1755 {
1756 for (i=0;i<N;i++)
1757 freq[N+i] = freq[i];
1758 }
1759
1760 c=0; do {
1761 out_mem[c] = st->syn_mem[c]+2*MAX_PERIOD-N;
1762 } while (++c<CC);
1763
1764 compute_inv_mdcts(mode, shortBlocks, freq, out_mem, CC, LM);
1765
1766 c=0; do {
1767 st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD);
1768 st->prefilter_period_old=IMAX(st->prefilter_period_old, COMBFILTER_MINPERIOD);
1769 comb_filter(out_mem[c], out_mem[c], st->prefilter_period_old, st->prefilter_period, mode->shortMdctSize,
1770 st->prefilter_gain_old, st->prefilter_gain, st->prefilter_tapset_old, st->prefilter_tapset,
1771 mode->window, st->overlap);
1772 if (LM!=0)
1773 comb_filter(out_mem[c]+mode->shortMdctSize, out_mem[c]+mode->shortMdctSize, st->prefilter_period, pitch_index, N-mode->shortMdctSize,
1774 st->prefilter_gain, gain1, st->prefilter_tapset, prefilter_tapset,
1775 mode->window, overlap);
1776 } while (++c<CC);
1777
1778 /* We reuse freq[] as scratch space for the de-emphasis */
1779 deemphasis(out_mem, (opus_val16*)pcm, N, CC, st->upsample, mode->preemph, st->preemph_memD, freq);
1780 st->prefilter_period_old = st->prefilter_period;
1781 st->prefilter_gain_old = st->prefilter_gain;
1782 st->prefilter_tapset_old = st->prefilter_tapset;
1783 }
1784#endif
1785
1786 st->prefilter_period = pitch_index;
1787 st->prefilter_gain = gain1;
1788 st->prefilter_tapset = prefilter_tapset;
1789#ifdef RESYNTH
1790 if (LM!=0)
1791 {
1792 st->prefilter_period_old = st->prefilter_period;
1793 st->prefilter_gain_old = st->prefilter_gain;
1794 st->prefilter_tapset_old = st->prefilter_tapset;
1795 }
1796#endif
1797
1798 if (CC==2&&C==1) {
1799 for (i=0;i<nbEBands;i++)
1800 oldBandE[nbEBands+i]=oldBandE[i];
1801 }
1802
1803 if (!isTransient)
1804 {
1805 for (i=0;i<CC*nbEBands;i++)
1806 oldLogE2[i] = oldLogE[i];
1807 for (i=0;i<CC*nbEBands;i++)
1808 oldLogE[i] = oldBandE[i];
1809 } else {
1810 for (i=0;i<CC*nbEBands;i++)
1811 oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]);
1812 }
1813 /* In case start or end were to change */
1814 c=0; do
1815 {
1816 for (i=0;i<st->start;i++)
1817 {
1818 oldBandE[c*nbEBands+i]=0;
1819 oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
1820 }
1821 for (i=st->end;i<nbEBands;i++)
1822 {
1823 oldBandE[c*nbEBands+i]=0;
1824 oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
1825 }
1826 } while (++c<CC);
1827
1828 if (isTransient)
1829 st->consec_transient++;
1830 else
1831 st->consec_transient=0;
1832 st->rng = enc->rng;
1833
1834 /* If there's any room left (can only happen for very high rates),
1835 it's already filled with zeros */
1836 ec_enc_done(enc);
1837
1838#ifdef CUSTOM_MODES
1839 if (st->signalling)
1840 nbCompressedBytes++;
1841#endif
1842
1843 RESTORE_STACK;
1844 if (ec_get_error(enc))
1845 return OPUS_INTERNAL_ERROR;
1846 else
1847 return nbCompressedBytes;
1848}
1849
1850
1851#ifdef CUSTOM_MODES
1852
1853#ifdef FIXED_POINT
1854int opus_custom_encode(CELTEncoder * OPUS_RESTRICT st, const opus_int16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
1855{
1856 return celt_encode_with_ec(st, pcm, frame_size, compressed, nbCompressedBytes, NULL);
1857}
1858
1859#ifndef DISABLE_FLOAT_API
1860int opus_custom_encode_float(CELTEncoder * OPUS_RESTRICT st, const float * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
1861{
1862 int j, ret, C, N;
1863 VARDECL(opus_int16, in);
1864 ALLOC_STACK;
1865
1866 if (pcm==NULL)
1867 return OPUS_BAD_ARG;
1868
1869 C = st->channels;
1870 N = frame_size;
1871 ALLOC(in, C*N, opus_int16);
1872
1873 for (j=0;j<C*N;j++)
1874 in[j] = FLOAT2INT16(pcm[j]);
1875
1876 ret=celt_encode_with_ec(st,in,frame_size,compressed,nbCompressedBytes, NULL);
1877#ifdef RESYNTH
1878 for (j=0;j<C*N;j++)
1879 ((float*)pcm)[j]=in[j]*(1.f/32768.f);
1880#endif
1881 RESTORE_STACK;
1882 return ret;
1883}
1884#endif /* DISABLE_FLOAT_API */
1885#else
1886
1887int opus_custom_encode(CELTEncoder * OPUS_RESTRICT st, const opus_int16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
1888{
1889 int j, ret, C, N;
1890 VARDECL(celt_sig, in);
1891 ALLOC_STACK;
1892
1893 if (pcm==NULL)
1894 return OPUS_BAD_ARG;
1895
1896 C=st->channels;
1897 N=frame_size;
1898 ALLOC(in, C*N, celt_sig);
1899 for (j=0;j<C*N;j++) {
1900 in[j] = SCALEOUT(pcm[j]);
1901 }
1902
1903 ret = celt_encode_with_ec(st,in,frame_size,compressed,nbCompressedBytes, NULL);
1904#ifdef RESYNTH
1905 for (j=0;j<C*N;j++)
1906 ((opus_int16*)pcm)[j] = FLOAT2INT16(in[j]);
1907#endif
1908 RESTORE_STACK;
1909 return ret;
1910}
1911
1912int opus_custom_encode_float(CELTEncoder * OPUS_RESTRICT st, const float * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
1913{
1914 return celt_encode_with_ec(st, pcm, frame_size, compressed, nbCompressedBytes, NULL);
1915}
1916
1917#endif
1918
1919#endif /* CUSTOM_MODES */
1920
1921int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...)
1922{
1923 va_list ap;
1924
1925 va_start(ap, request);
1926 switch (request)
1927 {
1928 case OPUS_SET_COMPLEXITY_REQUEST:
1929 {
1930 int value = va_arg(ap, opus_int32);
1931 if (value<0 || value>10)
1932 goto bad_arg;
1933 st->complexity = value;
1934 }
1935 break;
1936 case CELT_SET_START_BAND_REQUEST:
1937 {
1938 opus_int32 value = va_arg(ap, opus_int32);
1939 if (value<0 || value>=st->mode->nbEBands)
1940 goto bad_arg;
1941 st->start = value;
1942 }
1943 break;
1944 case CELT_SET_END_BAND_REQUEST:
1945 {
1946 opus_int32 value = va_arg(ap, opus_int32);
1947 if (value<1 || value>st->mode->nbEBands)
1948 goto bad_arg;
1949 st->end = value;
1950 }
1951 break;
1952 case CELT_SET_PREDICTION_REQUEST:
1953 {
1954 int value = va_arg(ap, opus_int32);
1955 if (value<0 || value>2)
1956 goto bad_arg;
1957 st->disable_pf = value<=1;
1958 st->force_intra = value==0;
1959 }
1960 break;
1961 case OPUS_SET_PACKET_LOSS_PERC_REQUEST:
1962 {
1963 int value = va_arg(ap, opus_int32);
1964 if (value<0 || value>100)
1965 goto bad_arg;
1966 st->loss_rate = value;
1967 }
1968 break;
1969 case OPUS_SET_VBR_CONSTRAINT_REQUEST:
1970 {
1971 opus_int32 value = va_arg(ap, opus_int32);
1972 st->constrained_vbr = value;
1973 }
1974 break;
1975 case OPUS_SET_VBR_REQUEST:
1976 {
1977 opus_int32 value = va_arg(ap, opus_int32);
1978 st->vbr = value;
1979 }
1980 break;
1981 case OPUS_SET_BITRATE_REQUEST:
1982 {
1983 opus_int32 value = va_arg(ap, opus_int32);
1984 if (value<=500 && value!=OPUS_BITRATE_MAX)
1985 goto bad_arg;
1986 value = IMIN(value, 260000*st->channels);
1987 st->bitrate = value;
1988 }
1989 break;
1990 case CELT_SET_CHANNELS_REQUEST:
1991 {
1992 opus_int32 value = va_arg(ap, opus_int32);
1993 if (value<1 || value>2)
1994 goto bad_arg;
1995 st->stream_channels = value;
1996 }
1997 break;
1998 case OPUS_SET_LSB_DEPTH_REQUEST:
1999 {
2000 opus_int32 value = va_arg(ap, opus_int32);
2001 if (value<8 || value>24)
2002 goto bad_arg;
2003 st->lsb_depth=value;
2004 }
2005 break;
2006 case OPUS_GET_LSB_DEPTH_REQUEST:
2007 {
2008 opus_int32 *value = va_arg(ap, opus_int32*);
2009 *value=st->lsb_depth;
2010 }
2011 break;
2012 case OPUS_RESET_STATE:
2013 {
2014 int i;
2015 opus_val16 *oldBandE, *oldLogE, *oldLogE2;
2016 oldBandE = (opus_val16*)(st->in_mem+st->channels*(st->overlap+COMBFILTER_MAXPERIOD));
2017 oldLogE = oldBandE + st->channels*st->mode->nbEBands;
2018 oldLogE2 = oldLogE + st->channels*st->mode->nbEBands;
2019 OPUS_CLEAR((char*)&st->ENCODER_RESET_START,
2020 opus_custom_encoder_get_size(st->mode, st->channels)-
2021 ((char*)&st->ENCODER_RESET_START - (char*)st));
2022 for (i=0;i<st->channels*st->mode->nbEBands;i++)
2023 oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT);
2024 st->vbr_offset = 0;
2025 st->delayedIntra = 1;
2026 st->spread_decision = SPREAD_NORMAL;
2027 st->tonal_average = 256;
2028 st->hf_average = 0;
2029 st->tapset_decision = 0;
2030 }
2031 break;
2032#ifdef CUSTOM_MODES
2033 case CELT_SET_INPUT_CLIPPING_REQUEST:
2034 {
2035 opus_int32 value = va_arg(ap, opus_int32);
2036 st->clip = value;
2037 }
2038 break;
2039#endif
2040 case CELT_SET_SIGNALLING_REQUEST:
2041 {
2042 opus_int32 value = va_arg(ap, opus_int32);
2043 st->signalling = value;
2044 }
2045 break;
2046 case CELT_SET_ANALYSIS_REQUEST:
2047 {
2048 AnalysisInfo *info = va_arg(ap, AnalysisInfo *);
2049 if (info)
2050 OPUS_COPY(&st->analysis, info, 1);
2051 }
2052 break;
2053 case CELT_GET_MODE_REQUEST:
2054 {
2055 const CELTMode ** value = va_arg(ap, const CELTMode**);
2056 if (value==0)
2057 goto bad_arg;
2058 *value=st->mode;
2059 }
2060 break;
2061 case OPUS_GET_FINAL_RANGE_REQUEST:
2062 {
2063 opus_uint32 * value = va_arg(ap, opus_uint32 *);
2064 if (value==0)
2065 goto bad_arg;
2066 *value=st->rng;
2067 }
2068 break;
2069 default:
2070 goto bad_request;
2071 }
2072 va_end(ap);
2073 return OPUS_OK;
2074bad_arg:
2075 va_end(ap);
2076 return OPUS_BAD_ARG;
2077bad_request:
2078 va_end(ap);
2079 return OPUS_UNIMPLEMENTED;
2080}