blob: 9130a41609baa781ca1a6633a56e2c0a2659fbe1 [file] [log] [blame]
Vikas Aroraa2415722012-08-09 16:18:58 -07001// Copyright 2011 Google Inc. All Rights Reserved.
Vikas Arora7c970a02011-06-16 15:56:45 +05302//
Vikas Arora0406ce12013-08-09 15:57:12 -07003// Use of this source code is governed by a BSD-style license
4// that can be found in the COPYING file in the root of the source
5// tree. An additional intellectual property rights grant can be found
6// in the file PATENTS. All contributing project authors may
7// be found in the AUTHORS file in the root of the source tree.
Vikas Arora7c970a02011-06-16 15:56:45 +05308// -----------------------------------------------------------------------------
9//
10// Quantization
11//
12// Author: Skal (pascal.massimino@gmail.com)
13
14#include <assert.h>
15#include <math.h>
Vikas Arora8b720222014-01-02 16:48:02 -080016#include <stdlib.h> // for abs()
Vikas Arora7c970a02011-06-16 15:56:45 +053017
Vikas Aroraa2415722012-08-09 16:18:58 -070018#include "./vp8enci.h"
19#include "./cost.h"
Vikas Arora7c970a02011-06-16 15:56:45 +053020
21#define DO_TRELLIS_I4 1
22#define DO_TRELLIS_I16 1 // not a huge gain, but ok at low bitrate.
23#define DO_TRELLIS_UV 0 // disable trellis for UV. Risky. Not worth.
24#define USE_TDISTO 1
25
26#define MID_ALPHA 64 // neutral value for susceptibility
27#define MIN_ALPHA 30 // lowest usable value for susceptibility
Vikas Arora8b720222014-01-02 16:48:02 -080028#define MAX_ALPHA 100 // higher meaningful value for susceptibility
Vikas Arora7c970a02011-06-16 15:56:45 +053029
30#define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP
31 // power-law modulation. Must be strictly less than 1.
32
Vikas Arora1e7bf882013-03-13 16:43:18 -070033#define I4_PENALTY 4000 // Rate-penalty for quick i4/i16 decision
34
Vikas Arora8b720222014-01-02 16:48:02 -080035// number of non-zero coeffs below which we consider the block very flat
36// (and apply a penalty to complex predictions)
37#define FLATNESS_LIMIT_I16 10 // I16 mode
38#define FLATNESS_LIMIT_I4 3 // I4 mode
39#define FLATNESS_LIMIT_UV 2 // UV mode
40#define FLATNESS_PENALTY 140 // roughly ~1bit per block
41
Vikas Arora7c970a02011-06-16 15:56:45 +053042#define MULT_8B(a, b) (((a) * (b) + 128) >> 8)
43
Vikas Arora8b720222014-01-02 16:48:02 -080044// #define DEBUG_BLOCK
45
46//------------------------------------------------------------------------------
47
48#if defined(DEBUG_BLOCK)
49
50#include <stdio.h>
51#include <stdlib.h>
52
53static void PrintBlockInfo(const VP8EncIterator* const it,
54 const VP8ModeScore* const rd) {
55 int i, j;
56 const int is_i16 = (it->mb_->type_ == 1);
57 printf("SOURCE / OUTPUT / ABS DELTA\n");
58 for (j = 0; j < 24; ++j) {
59 if (j == 16) printf("\n"); // newline before the U/V block
60 for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_in_[i + j * BPS]);
61 printf(" ");
62 for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_out_[i + j * BPS]);
63 printf(" ");
64 for (i = 0; i < 16; ++i) {
65 printf("%1d ", abs(it->yuv_out_[i + j * BPS] - it->yuv_in_[i + j * BPS]));
66 }
67 printf("\n");
68 }
69 printf("\nD:%d SD:%d R:%d H:%d nz:0x%x score:%d\n",
70 (int)rd->D, (int)rd->SD, (int)rd->R, (int)rd->H, (int)rd->nz,
71 (int)rd->score);
72 if (is_i16) {
73 printf("Mode: %d\n", rd->mode_i16);
74 printf("y_dc_levels:");
75 for (i = 0; i < 16; ++i) printf("%3d ", rd->y_dc_levels[i]);
76 printf("\n");
77 } else {
78 printf("Modes[16]: ");
79 for (i = 0; i < 16; ++i) printf("%d ", rd->modes_i4[i]);
80 printf("\n");
81 }
82 printf("y_ac_levels:\n");
83 for (j = 0; j < 16; ++j) {
84 for (i = is_i16 ? 1 : 0; i < 16; ++i) {
85 printf("%4d ", rd->y_ac_levels[j][i]);
86 }
87 printf("\n");
88 }
89 printf("\n");
90 printf("uv_levels (mode=%d):\n", rd->mode_uv);
91 for (j = 0; j < 8; ++j) {
92 for (i = 0; i < 16; ++i) {
93 printf("%4d ", rd->uv_levels[j][i]);
94 }
95 printf("\n");
96 }
97}
98
99#endif // DEBUG_BLOCK
Vikas Arora7c970a02011-06-16 15:56:45 +0530100
Vikas Aroraa2415722012-08-09 16:18:58 -0700101//------------------------------------------------------------------------------
Vikas Arora7c970a02011-06-16 15:56:45 +0530102
Vikas Aroraa2415722012-08-09 16:18:58 -0700103static WEBP_INLINE int clip(int v, int m, int M) {
Vikas Arora7c970a02011-06-16 15:56:45 +0530104 return v < m ? m : v > M ? M : v;
105}
106
Vikas Aroraa2415722012-08-09 16:18:58 -0700107static const uint8_t kZigzag[16] = {
Vikas Arora7c970a02011-06-16 15:56:45 +0530108 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
109};
110
111static const uint8_t kDcTable[128] = {
112 4, 5, 6, 7, 8, 9, 10, 10,
113 11, 12, 13, 14, 15, 16, 17, 17,
114 18, 19, 20, 20, 21, 21, 22, 22,
115 23, 23, 24, 25, 25, 26, 27, 28,
116 29, 30, 31, 32, 33, 34, 35, 36,
117 37, 37, 38, 39, 40, 41, 42, 43,
118 44, 45, 46, 46, 47, 48, 49, 50,
119 51, 52, 53, 54, 55, 56, 57, 58,
120 59, 60, 61, 62, 63, 64, 65, 66,
121 67, 68, 69, 70, 71, 72, 73, 74,
122 75, 76, 76, 77, 78, 79, 80, 81,
123 82, 83, 84, 85, 86, 87, 88, 89,
124 91, 93, 95, 96, 98, 100, 101, 102,
125 104, 106, 108, 110, 112, 114, 116, 118,
126 122, 124, 126, 128, 130, 132, 134, 136,
127 138, 140, 143, 145, 148, 151, 154, 157
128};
129
130static const uint16_t kAcTable[128] = {
131 4, 5, 6, 7, 8, 9, 10, 11,
132 12, 13, 14, 15, 16, 17, 18, 19,
133 20, 21, 22, 23, 24, 25, 26, 27,
134 28, 29, 30, 31, 32, 33, 34, 35,
135 36, 37, 38, 39, 40, 41, 42, 43,
136 44, 45, 46, 47, 48, 49, 50, 51,
137 52, 53, 54, 55, 56, 57, 58, 60,
138 62, 64, 66, 68, 70, 72, 74, 76,
139 78, 80, 82, 84, 86, 88, 90, 92,
140 94, 96, 98, 100, 102, 104, 106, 108,
141 110, 112, 114, 116, 119, 122, 125, 128,
142 131, 134, 137, 140, 143, 146, 149, 152,
143 155, 158, 161, 164, 167, 170, 173, 177,
144 181, 185, 189, 193, 197, 201, 205, 209,
145 213, 217, 221, 225, 229, 234, 239, 245,
146 249, 254, 259, 264, 269, 274, 279, 284
147};
148
149static const uint16_t kAcTable2[128] = {
150 8, 8, 9, 10, 12, 13, 15, 17,
151 18, 20, 21, 23, 24, 26, 27, 29,
152 31, 32, 34, 35, 37, 38, 40, 41,
153 43, 44, 46, 48, 49, 51, 52, 54,
154 55, 57, 58, 60, 62, 63, 65, 66,
155 68, 69, 71, 72, 74, 75, 77, 79,
156 80, 82, 83, 85, 86, 88, 89, 93,
157 96, 99, 102, 105, 108, 111, 114, 117,
158 120, 124, 127, 130, 133, 136, 139, 142,
159 145, 148, 151, 155, 158, 161, 164, 167,
160 170, 173, 176, 179, 184, 189, 193, 198,
161 203, 207, 212, 217, 221, 226, 230, 235,
162 240, 244, 249, 254, 258, 263, 268, 274,
163 280, 286, 292, 299, 305, 311, 317, 323,
164 330, 336, 342, 348, 354, 362, 370, 379,
165 385, 393, 401, 409, 416, 424, 432, 440
166};
167
Vikas Arora8b720222014-01-02 16:48:02 -0800168static const uint8_t kBiasMatrices[3][2] = { // [luma-ac,luma-dc,chroma][dc,ac]
169 { 96, 110 }, { 96, 108 }, { 110, 115 }
Vikas Arora7c970a02011-06-16 15:56:45 +0530170};
171
Vikas Arora8b720222014-01-02 16:48:02 -0800172// Sharpening by (slightly) raising the hi-frequency coeffs.
Vikas Arora7c970a02011-06-16 15:56:45 +0530173// Hack-ish but helpful for mid-bitrate range. Use with care.
Vikas Arora8b720222014-01-02 16:48:02 -0800174#define SHARPEN_BITS 11 // number of descaling bits for sharpening bias
Vikas Arora7c970a02011-06-16 15:56:45 +0530175static const uint8_t kFreqSharpening[16] = {
176 0, 30, 60, 90,
177 30, 60, 90, 90,
178 60, 90, 90, 90,
179 90, 90, 90, 90
180};
181
Vikas Aroraa2415722012-08-09 16:18:58 -0700182//------------------------------------------------------------------------------
Vikas Arora7c970a02011-06-16 15:56:45 +0530183// Initialize quantization parameters in VP8Matrix
184
185// Returns the average quantizer
186static int ExpandMatrix(VP8Matrix* const m, int type) {
Vikas Arora8b720222014-01-02 16:48:02 -0800187 int i, sum;
188 for (i = 0; i < 2; ++i) {
189 const int is_ac_coeff = (i > 0);
190 const int bias = kBiasMatrices[type][is_ac_coeff];
191 m->iq_[i] = (1 << QFIX) / m->q_[i];
192 m->bias_[i] = BIAS(bias);
193 // zthresh_ is the exact value such that QUANTDIV(coeff, iQ, B) is:
194 // * zero if coeff <= zthresh
195 // * non-zero if coeff > zthresh
196 m->zthresh_[i] = ((1 << QFIX) - 1 - m->bias_[i]) / m->iq_[i];
197 }
Vikas Arora7c970a02011-06-16 15:56:45 +0530198 for (i = 2; i < 16; ++i) {
199 m->q_[i] = m->q_[1];
Vikas Arora8b720222014-01-02 16:48:02 -0800200 m->iq_[i] = m->iq_[1];
201 m->bias_[i] = m->bias_[1];
202 m->zthresh_[i] = m->zthresh_[1];
Vikas Arora7c970a02011-06-16 15:56:45 +0530203 }
Vikas Arora8b720222014-01-02 16:48:02 -0800204 for (sum = 0, i = 0; i < 16; ++i) {
205 if (type == 0) { // we only use sharpening for AC luma coeffs
206 m->sharpen_[i] = (kFreqSharpening[i] * m->q_[i]) >> SHARPEN_BITS;
207 } else {
208 m->sharpen_[i] = 0;
209 }
210 sum += m->q_[i];
Vikas Arora7c970a02011-06-16 15:56:45 +0530211 }
212 return (sum + 8) >> 4;
213}
214
215static void SetupMatrices(VP8Encoder* enc) {
216 int i;
217 const int tlambda_scale =
218 (enc->method_ >= 4) ? enc->config_->sns_strength
219 : 0;
220 const int num_segments = enc->segment_hdr_.num_segments_;
221 for (i = 0; i < num_segments; ++i) {
222 VP8SegmentInfo* const m = &enc->dqm_[i];
223 const int q = m->quant_;
224 int q4, q16, quv;
225 m->y1_.q_[0] = kDcTable[clip(q + enc->dq_y1_dc_, 0, 127)];
226 m->y1_.q_[1] = kAcTable[clip(q, 0, 127)];
227
228 m->y2_.q_[0] = kDcTable[ clip(q + enc->dq_y2_dc_, 0, 127)] * 2;
229 m->y2_.q_[1] = kAcTable2[clip(q + enc->dq_y2_ac_, 0, 127)];
230
231 m->uv_.q_[0] = kDcTable[clip(q + enc->dq_uv_dc_, 0, 117)];
232 m->uv_.q_[1] = kAcTable[clip(q + enc->dq_uv_ac_, 0, 127)];
233
234 q4 = ExpandMatrix(&m->y1_, 0);
235 q16 = ExpandMatrix(&m->y2_, 1);
236 quv = ExpandMatrix(&m->uv_, 2);
237
Vikas Arora8b720222014-01-02 16:48:02 -0800238 m->lambda_i4_ = (3 * q4 * q4) >> 7;
239 m->lambda_i16_ = (3 * q16 * q16);
240 m->lambda_uv_ = (3 * quv * quv) >> 6;
241 m->lambda_mode_ = (1 * q4 * q4) >> 7;
242 m->lambda_trellis_i4_ = (7 * q4 * q4) >> 3;
243 m->lambda_trellis_i16_ = (q16 * q16) >> 2;
244 m->lambda_trellis_uv_ = (quv *quv) << 1;
245 m->tlambda_ = (tlambda_scale * q4) >> 5;
246
247 m->min_disto_ = 10 * m->y1_.q_[0]; // quantization-aware min disto
248 m->max_edge_ = 0;
Vikas Arora7c970a02011-06-16 15:56:45 +0530249 }
250}
251
Vikas Aroraa2415722012-08-09 16:18:58 -0700252//------------------------------------------------------------------------------
Vikas Arora7c970a02011-06-16 15:56:45 +0530253// Initialize filtering parameters
254
255// Very small filter-strength values have close to no visual effect. So we can
256// save a little decoding-CPU by turning filtering off for these.
Vikas Arora8b720222014-01-02 16:48:02 -0800257#define FSTRENGTH_CUTOFF 2
Vikas Arora7c970a02011-06-16 15:56:45 +0530258
259static void SetupFilterStrength(VP8Encoder* const enc) {
260 int i;
Vikas Arora8b720222014-01-02 16:48:02 -0800261 // level0 is in [0..500]. Using '-f 50' as filter_strength is mid-filtering.
262 const int level0 = 5 * enc->config_->filter_strength;
Vikas Arora7c970a02011-06-16 15:56:45 +0530263 for (i = 0; i < NUM_MB_SEGMENTS; ++i) {
Vikas Arora8b720222014-01-02 16:48:02 -0800264 VP8SegmentInfo* const m = &enc->dqm_[i];
265 // We focus on the quantization of AC coeffs.
266 const int qstep = kAcTable[clip(m->quant_, 0, 127)] >> 2;
267 const int base_strength =
268 VP8FilterStrengthFromDelta(enc->filter_hdr_.sharpness_, qstep);
269 // Segments with lower complexity ('beta') will be less filtered.
270 const int f = base_strength * level0 / (256 + m->beta_);
271 m->fstrength_ = (f < FSTRENGTH_CUTOFF) ? 0 : (f > 63) ? 63 : f;
Vikas Arora7c970a02011-06-16 15:56:45 +0530272 }
273 // We record the initial strength (mainly for the case of 1-segment only).
274 enc->filter_hdr_.level_ = enc->dqm_[0].fstrength_;
275 enc->filter_hdr_.simple_ = (enc->config_->filter_type == 0);
276 enc->filter_hdr_.sharpness_ = enc->config_->filter_sharpness;
277}
278
Vikas Aroraa2415722012-08-09 16:18:58 -0700279//------------------------------------------------------------------------------
Vikas Arora7c970a02011-06-16 15:56:45 +0530280
281// Note: if you change the values below, remember that the max range
282// allowed by the syntax for DQ_UV is [-16,16].
283#define MAX_DQ_UV (6)
284#define MIN_DQ_UV (-4)
285
286// We want to emulate jpeg-like behaviour where the expected "good" quality
287// is around q=75. Internally, our "good" middle is around c=50. So we
288// map accordingly using linear piece-wise function
Vikas Arora1e7bf882013-03-13 16:43:18 -0700289static double QualityToCompression(double c) {
290 const double linear_c = (c < 0.75) ? c * (2. / 3.) : 2. * c - 1.;
291 // The file size roughly scales as pow(quantizer, 3.). Actually, the
292 // exponent is somewhere between 2.8 and 3.2, but we're mostly interested
293 // in the mid-quant range. So we scale the compressibility inversely to
294 // this power-law: quant ~= compression ^ 1/3. This law holds well for
Vikas Arora8b720222014-01-02 16:48:02 -0800295 // low quant. Finer modeling for high-quant would make use of kAcTable[]
Vikas Arora1e7bf882013-03-13 16:43:18 -0700296 // more explicitly.
297 const double v = pow(linear_c, 1 / 3.);
298 return v;
299}
300
301static double QualityToJPEGCompression(double c, double alpha) {
302 // We map the complexity 'alpha' and quality setting 'c' to a compression
303 // exponent empirically matched to the compression curve of libjpeg6b.
304 // On average, the WebP output size will be roughly similar to that of a
305 // JPEG file compressed with same quality factor.
306 const double amin = 0.30;
307 const double amax = 0.85;
308 const double exp_min = 0.4;
309 const double exp_max = 0.9;
310 const double slope = (exp_min - exp_max) / (amax - amin);
311 // Linearly interpolate 'expn' from exp_min to exp_max
312 // in the [amin, amax] range.
313 const double expn = (alpha > amax) ? exp_min
314 : (alpha < amin) ? exp_max
315 : exp_max + slope * (alpha - amin);
316 const double v = pow(c, expn);
317 return v;
318}
319
320static int SegmentsAreEquivalent(const VP8SegmentInfo* const S1,
321 const VP8SegmentInfo* const S2) {
322 return (S1->quant_ == S2->quant_) && (S1->fstrength_ == S2->fstrength_);
323}
324
325static void SimplifySegments(VP8Encoder* const enc) {
326 int map[NUM_MB_SEGMENTS] = { 0, 1, 2, 3 };
327 const int num_segments = enc->segment_hdr_.num_segments_;
328 int num_final_segments = 1;
329 int s1, s2;
330 for (s1 = 1; s1 < num_segments; ++s1) { // find similar segments
331 const VP8SegmentInfo* const S1 = &enc->dqm_[s1];
332 int found = 0;
333 // check if we already have similar segment
334 for (s2 = 0; s2 < num_final_segments; ++s2) {
335 const VP8SegmentInfo* const S2 = &enc->dqm_[s2];
336 if (SegmentsAreEquivalent(S1, S2)) {
337 found = 1;
338 break;
339 }
340 }
341 map[s1] = s2;
342 if (!found) {
343 if (num_final_segments != s1) {
344 enc->dqm_[num_final_segments] = enc->dqm_[s1];
345 }
346 ++num_final_segments;
347 }
348 }
349 if (num_final_segments < num_segments) { // Remap
350 int i = enc->mb_w_ * enc->mb_h_;
351 while (i-- > 0) enc->mb_info_[i].segment_ = map[enc->mb_info_[i].segment_];
352 enc->segment_hdr_.num_segments_ = num_final_segments;
353 // Replicate the trailing segment infos (it's mostly cosmetics)
354 for (i = num_final_segments; i < num_segments; ++i) {
355 enc->dqm_[i] = enc->dqm_[num_final_segments - 1];
356 }
357 }
Vikas Arora7c970a02011-06-16 15:56:45 +0530358}
359
360void VP8SetSegmentParams(VP8Encoder* const enc, float quality) {
361 int i;
362 int dq_uv_ac, dq_uv_dc;
Vikas Arora1e7bf882013-03-13 16:43:18 -0700363 const int num_segments = enc->segment_hdr_.num_segments_;
Vikas Arora7c970a02011-06-16 15:56:45 +0530364 const double amp = SNS_TO_DQ * enc->config_->sns_strength / 100. / 128.;
Vikas Arora1e7bf882013-03-13 16:43:18 -0700365 const double Q = quality / 100.;
366 const double c_base = enc->config_->emulate_jpeg_size ?
367 QualityToJPEGCompression(Q, enc->alpha_ / 255.) :
368 QualityToCompression(Q);
Vikas Arora7c970a02011-06-16 15:56:45 +0530369 for (i = 0; i < num_segments; ++i) {
Vikas Arora1e7bf882013-03-13 16:43:18 -0700370 // We modulate the base coefficient to accommodate for the quantization
371 // susceptibility and allow denser segments to be quantized more.
372 const double expn = 1. - amp * enc->dqm_[i].alpha_;
Vikas Arora7c970a02011-06-16 15:56:45 +0530373 const double c = pow(c_base, expn);
374 const int q = (int)(127. * (1. - c));
375 assert(expn > 0.);
376 enc->dqm_[i].quant_ = clip(q, 0, 127);
377 }
378
379 // purely indicative in the bitstream (except for the 1-segment case)
380 enc->base_quant_ = enc->dqm_[0].quant_;
381
382 // fill-in values for the unused segments (required by the syntax)
383 for (i = num_segments; i < NUM_MB_SEGMENTS; ++i) {
384 enc->dqm_[i].quant_ = enc->base_quant_;
385 }
386
387 // uv_alpha_ is normally spread around ~60. The useful range is
388 // typically ~30 (quite bad) to ~100 (ok to decimate UV more).
389 // We map it to the safe maximal range of MAX/MIN_DQ_UV for dq_uv.
390 dq_uv_ac = (enc->uv_alpha_ - MID_ALPHA) * (MAX_DQ_UV - MIN_DQ_UV)
391 / (MAX_ALPHA - MIN_ALPHA);
392 // we rescale by the user-defined strength of adaptation
393 dq_uv_ac = dq_uv_ac * enc->config_->sns_strength / 100;
394 // and make it safe.
395 dq_uv_ac = clip(dq_uv_ac, MIN_DQ_UV, MAX_DQ_UV);
396 // We also boost the dc-uv-quant a little, based on sns-strength, since
397 // U/V channels are quite more reactive to high quants (flat DC-blocks
Vikas Aroraaf51b942014-08-28 10:51:12 -0700398 // tend to appear, and are unpleasant).
Vikas Arora7c970a02011-06-16 15:56:45 +0530399 dq_uv_dc = -4 * enc->config_->sns_strength / 100;
400 dq_uv_dc = clip(dq_uv_dc, -15, 15); // 4bit-signed max allowed
401
402 enc->dq_y1_dc_ = 0; // TODO(skal): dq-lum
403 enc->dq_y2_dc_ = 0;
404 enc->dq_y2_ac_ = 0;
405 enc->dq_uv_dc_ = dq_uv_dc;
406 enc->dq_uv_ac_ = dq_uv_ac;
407
Vikas Arora7c970a02011-06-16 15:56:45 +0530408 SetupFilterStrength(enc); // initialize segments' filtering, eventually
Vikas Arora1e7bf882013-03-13 16:43:18 -0700409
410 if (num_segments > 1) SimplifySegments(enc);
411
412 SetupMatrices(enc); // finalize quantization matrices
Vikas Arora7c970a02011-06-16 15:56:45 +0530413}
414
Vikas Aroraa2415722012-08-09 16:18:58 -0700415//------------------------------------------------------------------------------
Vikas Arora7c970a02011-06-16 15:56:45 +0530416// Form the predictions in cache
417
418// Must be ordered using {DC_PRED, TM_PRED, V_PRED, H_PRED} as index
419const int VP8I16ModeOffsets[4] = { I16DC16, I16TM16, I16VE16, I16HE16 };
420const int VP8UVModeOffsets[4] = { C8DC8, C8TM8, C8VE8, C8HE8 };
421
422// Must be indexed using {B_DC_PRED -> B_HU_PRED} as index
423const int VP8I4ModeOffsets[NUM_BMODES] = {
424 I4DC4, I4TM4, I4VE4, I4HE4, I4RD4, I4VR4, I4LD4, I4VL4, I4HD4, I4HU4
425};
426
427void VP8MakeLuma16Preds(const VP8EncIterator* const it) {
Vikas Arora8b720222014-01-02 16:48:02 -0800428 const uint8_t* const left = it->x_ ? it->y_left_ : NULL;
429 const uint8_t* const top = it->y_ ? it->y_top_ : NULL;
Vikas Arora7c970a02011-06-16 15:56:45 +0530430 VP8EncPredLuma16(it->yuv_p_, left, top);
431}
432
433void VP8MakeChroma8Preds(const VP8EncIterator* const it) {
Vikas Arora8b720222014-01-02 16:48:02 -0800434 const uint8_t* const left = it->x_ ? it->u_left_ : NULL;
435 const uint8_t* const top = it->y_ ? it->uv_top_ : NULL;
Vikas Arora7c970a02011-06-16 15:56:45 +0530436 VP8EncPredChroma8(it->yuv_p_, left, top);
437}
438
439void VP8MakeIntra4Preds(const VP8EncIterator* const it) {
440 VP8EncPredLuma4(it->yuv_p_, it->i4_top_);
441}
442
Vikas Aroraa2415722012-08-09 16:18:58 -0700443//------------------------------------------------------------------------------
Vikas Arora7c970a02011-06-16 15:56:45 +0530444// Quantize
445
446// Layout:
447// +----+
448// |YYYY| 0
449// |YYYY| 4
450// |YYYY| 8
451// |YYYY| 12
452// +----+
453// |UUVV| 16
454// |UUVV| 20
455// +----+
456
Vikas Aroraaf51b942014-08-28 10:51:12 -0700457const int VP8Scan[16] = { // Luma
Vikas Arora7c970a02011-06-16 15:56:45 +0530458 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,
459 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS,
460 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS,
461 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS,
Vikas Aroraaf51b942014-08-28 10:51:12 -0700462};
Vikas Arora7c970a02011-06-16 15:56:45 +0530463
Vikas Aroraaf51b942014-08-28 10:51:12 -0700464static const int VP8ScanUV[4 + 4] = {
Vikas Arora7c970a02011-06-16 15:56:45 +0530465 0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U
466 8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V
467};
468
Vikas Aroraa2415722012-08-09 16:18:58 -0700469//------------------------------------------------------------------------------
Vikas Arora7c970a02011-06-16 15:56:45 +0530470// Distortion measurement
471
472static const uint16_t kWeightY[16] = {
473 38, 32, 20, 9, 32, 28, 17, 7, 20, 17, 10, 4, 9, 7, 4, 2
474};
475
476static const uint16_t kWeightTrellis[16] = {
477#if USE_TDISTO == 0
478 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
479#else
480 30, 27, 19, 11,
481 27, 24, 17, 10,
482 19, 17, 12, 8,
483 11, 10, 8, 6
484#endif
485};
486
487// Init/Copy the common fields in score.
488static void InitScore(VP8ModeScore* const rd) {
489 rd->D = 0;
490 rd->SD = 0;
491 rd->R = 0;
Vikas Arora8b720222014-01-02 16:48:02 -0800492 rd->H = 0;
Vikas Arora7c970a02011-06-16 15:56:45 +0530493 rd->nz = 0;
494 rd->score = MAX_COST;
495}
496
497static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
498 dst->D = src->D;
499 dst->SD = src->SD;
500 dst->R = src->R;
Vikas Arora8b720222014-01-02 16:48:02 -0800501 dst->H = src->H;
Vikas Arora7c970a02011-06-16 15:56:45 +0530502 dst->nz = src->nz; // note that nz is not accumulated, but just copied.
503 dst->score = src->score;
504}
505
506static void AddScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
507 dst->D += src->D;
508 dst->SD += src->SD;
509 dst->R += src->R;
Vikas Arora8b720222014-01-02 16:48:02 -0800510 dst->H += src->H;
Vikas Arora7c970a02011-06-16 15:56:45 +0530511 dst->nz |= src->nz; // here, new nz bits are accumulated.
512 dst->score += src->score;
513}
514
Vikas Aroraa2415722012-08-09 16:18:58 -0700515//------------------------------------------------------------------------------
Vikas Arora7c970a02011-06-16 15:56:45 +0530516// Performs trellis-optimized quantization.
517
Vikas Aroraaf51b942014-08-28 10:51:12 -0700518// Trellis node
Vikas Arora7c970a02011-06-16 15:56:45 +0530519typedef struct {
Vikas Aroraaf51b942014-08-28 10:51:12 -0700520 int8_t prev; // best previous node
521 int8_t sign; // sign of coeff_i
522 int16_t level; // level
Vikas Arora7c970a02011-06-16 15:56:45 +0530523} Node;
524
Vikas Aroraaf51b942014-08-28 10:51:12 -0700525// Score state
526typedef struct {
527 score_t score; // partial RD score
528 const uint16_t* costs; // shortcut to cost tables
529} ScoreState;
530
Vikas Arora7c970a02011-06-16 15:56:45 +0530531// If a coefficient was quantized to a value Q (using a neutral bias),
532// we test all alternate possibilities between [Q-MIN_DELTA, Q+MAX_DELTA]
533// We don't test negative values though.
534#define MIN_DELTA 0 // how much lower level to try
535#define MAX_DELTA 1 // how much higher
536#define NUM_NODES (MIN_DELTA + 1 + MAX_DELTA)
Vikas Aroraaf51b942014-08-28 10:51:12 -0700537#define NODE(n, l) (nodes[(n)][(l) + MIN_DELTA])
538#define SCORE_STATE(n, l) (score_states[n][(l) + MIN_DELTA])
Vikas Arora7c970a02011-06-16 15:56:45 +0530539
Vikas Aroraa2415722012-08-09 16:18:58 -0700540static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) {
Vikas Arora7c970a02011-06-16 15:56:45 +0530541 // TODO: incorporate the "* 256" in the tables?
Vikas Arora8b720222014-01-02 16:48:02 -0800542 rd->score = (rd->R + rd->H) * lambda + 256 * (rd->D + rd->SD);
Vikas Arora7c970a02011-06-16 15:56:45 +0530543}
544
Vikas Aroraa2415722012-08-09 16:18:58 -0700545static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,
546 score_t distortion) {
Vikas Arora7c970a02011-06-16 15:56:45 +0530547 return rate * lambda + 256 * distortion;
548}
549
Vikas Aroraaf51b942014-08-28 10:51:12 -0700550static int TrellisQuantizeBlock(const VP8Encoder* const enc,
Vikas Arora7c970a02011-06-16 15:56:45 +0530551 int16_t in[16], int16_t out[16],
552 int ctx0, int coeff_type,
553 const VP8Matrix* const mtx,
554 int lambda) {
Vikas Aroraaf51b942014-08-28 10:51:12 -0700555 const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type];
556 const CostArray* const costs = enc->proba_.level_cost_[coeff_type];
Vikas Arora7c970a02011-06-16 15:56:45 +0530557 const int first = (coeff_type == 0) ? 1 : 0;
Vikas Aroraaf51b942014-08-28 10:51:12 -0700558 Node nodes[16][NUM_NODES];
559 ScoreState score_states[2][NUM_NODES];
560 ScoreState* ss_cur = &SCORE_STATE(0, MIN_DELTA);
561 ScoreState* ss_prev = &SCORE_STATE(1, MIN_DELTA);
Vikas Arora7c970a02011-06-16 15:56:45 +0530562 int best_path[3] = {-1, -1, -1}; // store best-last/best-level/best-previous
563 score_t best_score;
Vikas Aroraaf51b942014-08-28 10:51:12 -0700564 int n, m, p, last;
Vikas Arora7c970a02011-06-16 15:56:45 +0530565
566 {
567 score_t cost;
Vikas Arora7c970a02011-06-16 15:56:45 +0530568 const int thresh = mtx->q_[1] * mtx->q_[1] / 4;
Vikas Aroraaf51b942014-08-28 10:51:12 -0700569 const int last_proba = probas[VP8EncBands[first]][ctx0][0];
Vikas Arora7c970a02011-06-16 15:56:45 +0530570
Vikas Aroraaf51b942014-08-28 10:51:12 -0700571 // compute the position of the last interesting coefficient
572 last = first - 1;
573 for (n = 15; n >= first; --n) {
574 const int j = kZigzag[n];
Vikas Arora7c970a02011-06-16 15:56:45 +0530575 const int err = in[j] * in[j];
Vikas Aroraaf51b942014-08-28 10:51:12 -0700576 if (err > thresh) {
577 last = n;
578 break;
579 }
Vikas Arora7c970a02011-06-16 15:56:45 +0530580 }
581 // we don't need to go inspect up to n = 16 coeffs. We can just go up
582 // to last + 1 (inclusive) without losing much.
583 if (last < 15) ++last;
584
585 // compute 'skip' score. This is the max score one can do.
586 cost = VP8BitCost(0, last_proba);
Vikas Aroraaf51b942014-08-28 10:51:12 -0700587 best_score = RDScoreTrellis(lambda, cost, 0);
Vikas Arora7c970a02011-06-16 15:56:45 +0530588
589 // initialize source node.
Vikas Arora7c970a02011-06-16 15:56:45 +0530590 for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
Vikas Aroraaf51b942014-08-28 10:51:12 -0700591 const score_t rate = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0;
592 ss_cur[m].score = RDScoreTrellis(lambda, rate, 0);
593 ss_cur[m].costs = costs[VP8EncBands[first]][ctx0];
Vikas Arora7c970a02011-06-16 15:56:45 +0530594 }
595 }
596
597 // traverse trellis.
598 for (n = first; n <= last; ++n) {
Vikas Aroraaf51b942014-08-28 10:51:12 -0700599 const int j = kZigzag[n];
600 const uint32_t Q = mtx->q_[j];
601 const uint32_t iQ = mtx->iq_[j];
602 const uint32_t B = BIAS(0x00); // neutral bias
Vikas Arora7c970a02011-06-16 15:56:45 +0530603 // note: it's important to take sign of the _original_ coeff,
604 // so we don't have to consider level < 0 afterward.
605 const int sign = (in[j] < 0);
Vikas Aroraaf51b942014-08-28 10:51:12 -0700606 const uint32_t coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
Vikas Arora8b720222014-01-02 16:48:02 -0800607 int level0 = QUANTDIV(coeff0, iQ, B);
608 if (level0 > MAX_LEVEL) level0 = MAX_LEVEL;
Vikas Arora7c970a02011-06-16 15:56:45 +0530609
Vikas Aroraaf51b942014-08-28 10:51:12 -0700610 { // Swap current and previous score states
611 ScoreState* const tmp = ss_cur;
612 ss_cur = ss_prev;
613 ss_prev = tmp;
614 }
615
Vikas Arora7c970a02011-06-16 15:56:45 +0530616 // test all alternate level values around level0.
617 for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
618 Node* const cur = &NODE(n, m);
Vikas Arora7c970a02011-06-16 15:56:45 +0530619 int level = level0 + m;
Vikas Aroraaf51b942014-08-28 10:51:12 -0700620 const int ctx = (level > 2) ? 2 : level;
621 const int band = VP8EncBands[n + 1];
622 score_t base_score, last_pos_score;
623 score_t best_cur_score = MAX_COST;
624 int best_prev = 0; // default, in case
Vikas Arora7c970a02011-06-16 15:56:45 +0530625
Vikas Aroraaf51b942014-08-28 10:51:12 -0700626 ss_cur[m].score = MAX_COST;
627 ss_cur[m].costs = costs[band][ctx];
Vikas Arora8b720222014-01-02 16:48:02 -0800628 if (level > MAX_LEVEL || level < 0) { // node is dead?
Vikas Arora7c970a02011-06-16 15:56:45 +0530629 continue;
630 }
Vikas Arora7c970a02011-06-16 15:56:45 +0530631
Vikas Aroraaf51b942014-08-28 10:51:12 -0700632 // Compute extra rate cost if last coeff's position is < 15
633 {
634 const score_t last_pos_cost =
635 (n < 15) ? VP8BitCost(0, probas[band][ctx][0]) : 0;
636 last_pos_score = RDScoreTrellis(lambda, last_pos_cost, 0);
637 }
638
639 {
640 // Compute delta_error = how much coding this level will
641 // subtract to max_error as distortion.
642 // Here, distortion = sum of (|coeff_i| - level_i * Q_i)^2
643 const int new_error = coeff0 - level * Q;
644 const int delta_error =
645 kWeightTrellis[j] * (new_error * new_error - coeff0 * coeff0);
646 base_score = RDScoreTrellis(lambda, 0, delta_error);
647 }
Vikas Arora7c970a02011-06-16 15:56:45 +0530648
649 // Inspect all possible non-dead predecessors. Retain only the best one.
650 for (p = -MIN_DELTA; p <= MAX_DELTA; ++p) {
Vikas Aroraaf51b942014-08-28 10:51:12 -0700651 // Dead nodes (with ss_prev[p].score >= MAX_COST) are automatically
652 // eliminated since their score can't be better than the current best.
653 const score_t cost = VP8LevelCost(ss_prev[p].costs, level);
Vikas Arora7c970a02011-06-16 15:56:45 +0530654 // Examine node assuming it's a non-terminal one.
Vikas Aroraaf51b942014-08-28 10:51:12 -0700655 const score_t score =
656 base_score + ss_prev[p].score + RDScoreTrellis(lambda, cost, 0);
657 if (score < best_cur_score) {
658 best_cur_score = score;
659 best_prev = p;
Vikas Arora7c970a02011-06-16 15:56:45 +0530660 }
Vikas Aroraaf51b942014-08-28 10:51:12 -0700661 }
662 // Store best finding in current node.
663 cur->sign = sign;
664 cur->level = level;
665 cur->prev = best_prev;
666 ss_cur[m].score = best_cur_score;
Vikas Arora7c970a02011-06-16 15:56:45 +0530667
Vikas Aroraaf51b942014-08-28 10:51:12 -0700668 // Now, record best terminal node (and thus best entry in the graph).
669 if (level != 0) {
670 const score_t score = best_cur_score + last_pos_score;
671 if (score < best_score) {
672 best_score = score;
673 best_path[0] = n; // best eob position
674 best_path[1] = m; // best node index
675 best_path[2] = best_prev; // best predecessor
Vikas Arora7c970a02011-06-16 15:56:45 +0530676 }
677 }
678 }
679 }
680
681 // Fresh start
682 memset(in + first, 0, (16 - first) * sizeof(*in));
683 memset(out + first, 0, (16 - first) * sizeof(*out));
684 if (best_path[0] == -1) {
685 return 0; // skip!
686 }
687
Vikas Aroraaf51b942014-08-28 10:51:12 -0700688 {
689 // Unwind the best path.
690 // Note: best-prev on terminal node is not necessarily equal to the
691 // best_prev for non-terminal. So we patch best_path[2] in.
692 int nz = 0;
693 int best_node = best_path[1];
694 n = best_path[0];
695 NODE(n, best_node).prev = best_path[2]; // force best-prev for terminal
Vikas Arora7c970a02011-06-16 15:56:45 +0530696
Vikas Aroraaf51b942014-08-28 10:51:12 -0700697 for (; n >= first; --n) {
698 const Node* const node = &NODE(n, best_node);
699 const int j = kZigzag[n];
700 out[n] = node->sign ? -node->level : node->level;
701 nz |= node->level;
702 in[j] = out[n] * mtx->q_[j];
703 best_node = node->prev;
704 }
705 return (nz != 0);
Vikas Arora7c970a02011-06-16 15:56:45 +0530706 }
Vikas Arora7c970a02011-06-16 15:56:45 +0530707}
708
709#undef NODE
710
Vikas Aroraa2415722012-08-09 16:18:58 -0700711//------------------------------------------------------------------------------
Vikas Arora7c970a02011-06-16 15:56:45 +0530712// Performs: difference, transform, quantize, back-transform, add
713// all at once. Output is the reconstructed block in *yuv_out, and the
714// quantized levels in *levels.
715
716static int ReconstructIntra16(VP8EncIterator* const it,
717 VP8ModeScore* const rd,
718 uint8_t* const yuv_out,
719 int mode) {
Vikas Aroraaf51b942014-08-28 10:51:12 -0700720 const VP8Encoder* const enc = it->enc_;
Vikas Arora7c970a02011-06-16 15:56:45 +0530721 const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
722 const uint8_t* const src = it->yuv_in_ + Y_OFF;
Vikas Aroraaf51b942014-08-28 10:51:12 -0700723 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
Vikas Arora7c970a02011-06-16 15:56:45 +0530724 int nz = 0;
725 int n;
726 int16_t tmp[16][16], dc_tmp[16];
727
728 for (n = 0; n < 16; ++n) {
729 VP8FTransform(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]);
730 }
731 VP8FTransformWHT(tmp[0], dc_tmp);
Vikas Arora8b720222014-01-02 16:48:02 -0800732 nz |= VP8EncQuantizeBlockWHT(dc_tmp, rd->y_dc_levels, &dqm->y2_) << 24;
Vikas Arora7c970a02011-06-16 15:56:45 +0530733
734 if (DO_TRELLIS_I16 && it->do_trellis_) {
735 int x, y;
736 VP8IteratorNzToBytes(it);
737 for (y = 0, n = 0; y < 4; ++y) {
738 for (x = 0; x < 4; ++x, ++n) {
739 const int ctx = it->top_nz_[x] + it->left_nz_[y];
740 const int non_zero =
Vikas Aroraaf51b942014-08-28 10:51:12 -0700741 TrellisQuantizeBlock(enc, tmp[n], rd->y_ac_levels[n], ctx, 0,
742 &dqm->y1_, dqm->lambda_trellis_i16_);
Vikas Arora7c970a02011-06-16 15:56:45 +0530743 it->top_nz_[x] = it->left_nz_[y] = non_zero;
Vikas Aroraaf51b942014-08-28 10:51:12 -0700744 rd->y_ac_levels[n][0] = 0;
Vikas Arora7c970a02011-06-16 15:56:45 +0530745 nz |= non_zero << n;
746 }
747 }
748 } else {
749 for (n = 0; n < 16; ++n) {
Vikas Aroraaf51b942014-08-28 10:51:12 -0700750 // Zero-out the first coeff, so that: a) nz is correct below, and
751 // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified.
752 tmp[n][0] = 0;
753 nz |= VP8EncQuantizeBlock(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n;
754 assert(rd->y_ac_levels[n][0] == 0);
Vikas Arora7c970a02011-06-16 15:56:45 +0530755 }
756 }
757
758 // Transform back
Vikas Aroraaf51b942014-08-28 10:51:12 -0700759 VP8TransformWHT(dc_tmp, tmp[0]);
Vikas Arora46672792011-07-13 16:37:55 +0530760 for (n = 0; n < 16; n += 2) {
761 VP8ITransform(ref + VP8Scan[n], tmp[n], yuv_out + VP8Scan[n], 1);
Vikas Arora7c970a02011-06-16 15:56:45 +0530762 }
763
764 return nz;
765}
766
767static int ReconstructIntra4(VP8EncIterator* const it,
768 int16_t levels[16],
769 const uint8_t* const src,
770 uint8_t* const yuv_out,
771 int mode) {
772 const VP8Encoder* const enc = it->enc_;
773 const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
774 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
775 int nz = 0;
776 int16_t tmp[16];
777
778 VP8FTransform(src, ref, tmp);
779 if (DO_TRELLIS_I4 && it->do_trellis_) {
780 const int x = it->i4_ & 3, y = it->i4_ >> 2;
781 const int ctx = it->top_nz_[x] + it->left_nz_[y];
Vikas Aroraaf51b942014-08-28 10:51:12 -0700782 nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, 3, &dqm->y1_,
Vikas Arora7c970a02011-06-16 15:56:45 +0530783 dqm->lambda_trellis_i4_);
784 } else {
Vikas Aroraaf51b942014-08-28 10:51:12 -0700785 nz = VP8EncQuantizeBlock(tmp, levels, &dqm->y1_);
Vikas Arora7c970a02011-06-16 15:56:45 +0530786 }
Vikas Arora46672792011-07-13 16:37:55 +0530787 VP8ITransform(ref, tmp, yuv_out, 0);
Vikas Arora7c970a02011-06-16 15:56:45 +0530788 return nz;
789}
790
791static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
792 uint8_t* const yuv_out, int mode) {
793 const VP8Encoder* const enc = it->enc_;
794 const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];
795 const uint8_t* const src = it->yuv_in_ + U_OFF;
796 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
797 int nz = 0;
798 int n;
799 int16_t tmp[8][16];
800
801 for (n = 0; n < 8; ++n) {
Vikas Aroraaf51b942014-08-28 10:51:12 -0700802 VP8FTransform(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]);
Vikas Arora7c970a02011-06-16 15:56:45 +0530803 }
804 if (DO_TRELLIS_UV && it->do_trellis_) {
805 int ch, x, y;
806 for (ch = 0, n = 0; ch <= 2; ch += 2) {
807 for (y = 0; y < 2; ++y) {
808 for (x = 0; x < 2; ++x, ++n) {
809 const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
810 const int non_zero =
Vikas Aroraaf51b942014-08-28 10:51:12 -0700811 TrellisQuantizeBlock(enc, tmp[n], rd->uv_levels[n], ctx, 2,
812 &dqm->uv_, dqm->lambda_trellis_uv_);
Vikas Arora7c970a02011-06-16 15:56:45 +0530813 it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero;
814 nz |= non_zero << n;
815 }
816 }
817 }
818 } else {
819 for (n = 0; n < 8; ++n) {
Vikas Aroraaf51b942014-08-28 10:51:12 -0700820 nz |= VP8EncQuantizeBlock(tmp[n], rd->uv_levels[n], &dqm->uv_) << n;
Vikas Arora7c970a02011-06-16 15:56:45 +0530821 }
822 }
823
Vikas Arora46672792011-07-13 16:37:55 +0530824 for (n = 0; n < 8; n += 2) {
Vikas Aroraaf51b942014-08-28 10:51:12 -0700825 VP8ITransform(ref + VP8ScanUV[n], tmp[n], yuv_out + VP8ScanUV[n], 1);
Vikas Arora7c970a02011-06-16 15:56:45 +0530826 }
827 return (nz << 16);
828}
829
Vikas Aroraa2415722012-08-09 16:18:58 -0700830//------------------------------------------------------------------------------
Vikas Arora7c970a02011-06-16 15:56:45 +0530831// RD-opt decision. Reconstruct each modes, evalue distortion and bit-cost.
Vikas Arora8b720222014-01-02 16:48:02 -0800832// Pick the mode is lower RD-cost = Rate + lambda * Distortion.
833
834static void StoreMaxDelta(VP8SegmentInfo* const dqm, const int16_t DCs[16]) {
835 // We look at the first three AC coefficients to determine what is the average
836 // delta between each sub-4x4 block.
837 const int v0 = abs(DCs[1]);
838 const int v1 = abs(DCs[4]);
839 const int v2 = abs(DCs[5]);
840 int max_v = (v0 > v1) ? v1 : v0;
841 max_v = (v2 > max_v) ? v2 : max_v;
842 if (max_v > dqm->max_edge_) dqm->max_edge_ = max_v;
843}
Vikas Arora7c970a02011-06-16 15:56:45 +0530844
845static void SwapPtr(uint8_t** a, uint8_t** b) {
846 uint8_t* const tmp = *a;
847 *a = *b;
848 *b = tmp;
849}
850
851static void SwapOut(VP8EncIterator* const it) {
852 SwapPtr(&it->yuv_out_, &it->yuv_out2_);
853}
854
Vikas Arora8b720222014-01-02 16:48:02 -0800855static score_t IsFlat(const int16_t* levels, int num_blocks, score_t thresh) {
856 score_t score = 0;
857 while (num_blocks-- > 0) { // TODO(skal): refine positional scoring?
858 int i;
859 for (i = 1; i < 16; ++i) { // omit DC, we're only interested in AC
860 score += (levels[i] != 0);
861 if (score > thresh) return 0;
862 }
863 levels += 16;
864 }
865 return 1;
866}
867
Vikas Arora7c970a02011-06-16 15:56:45 +0530868static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) {
Vikas Arora8b720222014-01-02 16:48:02 -0800869 const int kNumBlocks = 16;
Vikas Aroraaf51b942014-08-28 10:51:12 -0700870 VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
Vikas Arora7c970a02011-06-16 15:56:45 +0530871 const int lambda = dqm->lambda_i16_;
872 const int tlambda = dqm->tlambda_;
873 const uint8_t* const src = it->yuv_in_ + Y_OFF;
874 VP8ModeScore rd16;
875 int mode;
876
877 rd->mode_i16 = -1;
Vikas Arora1e7bf882013-03-13 16:43:18 -0700878 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
Vikas Arora7c970a02011-06-16 15:56:45 +0530879 uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF; // scratch buffer
880 int nz;
881
882 // Reconstruct
883 nz = ReconstructIntra16(it, &rd16, tmp_dst, mode);
884
885 // Measure RD-score
886 rd16.D = VP8SSE16x16(src, tmp_dst);
887 rd16.SD = tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY))
888 : 0;
Vikas Arora8b720222014-01-02 16:48:02 -0800889 rd16.H = VP8FixedCostsI16[mode];
Vikas Arora7c970a02011-06-16 15:56:45 +0530890 rd16.R = VP8GetCostLuma16(it, &rd16);
Vikas Arora8b720222014-01-02 16:48:02 -0800891 if (mode > 0 &&
892 IsFlat(rd16.y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) {
893 // penalty to avoid flat area to be mispredicted by complex mode
894 rd16.R += FLATNESS_PENALTY * kNumBlocks;
895 }
Vikas Arora7c970a02011-06-16 15:56:45 +0530896
897 // Since we always examine Intra16 first, we can overwrite *rd directly.
898 SetRDScore(lambda, &rd16);
899 if (mode == 0 || rd16.score < rd->score) {
900 CopyScore(rd, &rd16);
901 rd->mode_i16 = mode;
902 rd->nz = nz;
903 memcpy(rd->y_ac_levels, rd16.y_ac_levels, sizeof(rd16.y_ac_levels));
904 memcpy(rd->y_dc_levels, rd16.y_dc_levels, sizeof(rd16.y_dc_levels));
905 SwapOut(it);
906 }
907 }
908 SetRDScore(dqm->lambda_mode_, rd); // finalize score for mode decision.
909 VP8SetIntra16Mode(it, rd->mode_i16);
Vikas Arora8b720222014-01-02 16:48:02 -0800910
911 // we have a blocky macroblock (only DCs are non-zero) with fairly high
912 // distortion, record max delta so we can later adjust the minimal filtering
913 // strength needed to smooth these blocks out.
914 if ((rd->nz & 0xffff) == 0 && rd->D > dqm->min_disto_) {
915 StoreMaxDelta(dqm, rd->y_dc_levels);
916 }
Vikas Arora7c970a02011-06-16 15:56:45 +0530917}
918
Vikas Aroraa2415722012-08-09 16:18:58 -0700919//------------------------------------------------------------------------------
Vikas Arora7c970a02011-06-16 15:56:45 +0530920
921// return the cost array corresponding to the surrounding prediction modes.
922static const uint16_t* GetCostModeI4(VP8EncIterator* const it,
Vikas Aroraa2415722012-08-09 16:18:58 -0700923 const uint8_t modes[16]) {
Vikas Arora7c970a02011-06-16 15:56:45 +0530924 const int preds_w = it->enc_->preds_w_;
925 const int x = (it->i4_ & 3), y = it->i4_ >> 2;
926 const int left = (x == 0) ? it->preds_[y * preds_w - 1] : modes[it->i4_ - 1];
927 const int top = (y == 0) ? it->preds_[-preds_w + x] : modes[it->i4_ - 4];
928 return VP8FixedCostsI4[top][left];
929}
930
931static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
Vikas Aroraa2415722012-08-09 16:18:58 -0700932 const VP8Encoder* const enc = it->enc_;
Vikas Arora7c970a02011-06-16 15:56:45 +0530933 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
934 const int lambda = dqm->lambda_i4_;
935 const int tlambda = dqm->tlambda_;
936 const uint8_t* const src0 = it->yuv_in_ + Y_OFF;
937 uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF;
Vikas Aroraa2415722012-08-09 16:18:58 -0700938 int total_header_bits = 0;
Vikas Arora7c970a02011-06-16 15:56:45 +0530939 VP8ModeScore rd_best;
940
Vikas Aroraa2415722012-08-09 16:18:58 -0700941 if (enc->max_i4_header_bits_ == 0) {
942 return 0;
943 }
944
Vikas Arora7c970a02011-06-16 15:56:45 +0530945 InitScore(&rd_best);
Vikas Arora8b720222014-01-02 16:48:02 -0800946 rd_best.H = 211; // '211' is the value of VP8BitCost(0, 145)
947 SetRDScore(dqm->lambda_mode_, &rd_best);
Vikas Arora7c970a02011-06-16 15:56:45 +0530948 VP8IteratorStartI4(it);
949 do {
Vikas Arora8b720222014-01-02 16:48:02 -0800950 const int kNumBlocks = 1;
Vikas Arora7c970a02011-06-16 15:56:45 +0530951 VP8ModeScore rd_i4;
952 int mode;
953 int best_mode = -1;
954 const uint8_t* const src = src0 + VP8Scan[it->i4_];
955 const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4);
956 uint8_t* best_block = best_blocks + VP8Scan[it->i4_];
957 uint8_t* tmp_dst = it->yuv_p_ + I4TMP; // scratch buffer.
958
959 InitScore(&rd_i4);
960 VP8MakeIntra4Preds(it);
961 for (mode = 0; mode < NUM_BMODES; ++mode) {
962 VP8ModeScore rd_tmp;
963 int16_t tmp_levels[16];
964
965 // Reconstruct
966 rd_tmp.nz =
967 ReconstructIntra4(it, tmp_levels, src, tmp_dst, mode) << it->i4_;
968
969 // Compute RD-score
970 rd_tmp.D = VP8SSE4x4(src, tmp_dst);
971 rd_tmp.SD =
972 tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY))
973 : 0;
Vikas Arora8b720222014-01-02 16:48:02 -0800974 rd_tmp.H = mode_costs[mode];
Vikas Arora7c970a02011-06-16 15:56:45 +0530975 rd_tmp.R = VP8GetCostLuma4(it, tmp_levels);
Vikas Arora8b720222014-01-02 16:48:02 -0800976 if (mode > 0 && IsFlat(tmp_levels, kNumBlocks, FLATNESS_LIMIT_I4)) {
977 rd_tmp.R += FLATNESS_PENALTY * kNumBlocks;
978 }
Vikas Arora7c970a02011-06-16 15:56:45 +0530979
980 SetRDScore(lambda, &rd_tmp);
981 if (best_mode < 0 || rd_tmp.score < rd_i4.score) {
982 CopyScore(&rd_i4, &rd_tmp);
983 best_mode = mode;
984 SwapPtr(&tmp_dst, &best_block);
985 memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels, sizeof(tmp_levels));
986 }
987 }
988 SetRDScore(dqm->lambda_mode_, &rd_i4);
989 AddScore(&rd_best, &rd_i4);
Vikas Arora8b720222014-01-02 16:48:02 -0800990 if (rd_best.score >= rd->score) {
991 return 0;
992 }
993 total_header_bits += (int)rd_i4.H; // <- equal to mode_costs[best_mode];
994 if (total_header_bits > enc->max_i4_header_bits_) {
Vikas Arora7c970a02011-06-16 15:56:45 +0530995 return 0;
996 }
997 // Copy selected samples if not in the right place already.
Vikas Arora8b720222014-01-02 16:48:02 -0800998 if (best_block != best_blocks + VP8Scan[it->i4_]) {
Vikas Arora7c970a02011-06-16 15:56:45 +0530999 VP8Copy4x4(best_block, best_blocks + VP8Scan[it->i4_]);
Vikas Arora8b720222014-01-02 16:48:02 -08001000 }
Vikas Arora7c970a02011-06-16 15:56:45 +05301001 rd->modes_i4[it->i4_] = best_mode;
1002 it->top_nz_[it->i4_ & 3] = it->left_nz_[it->i4_ >> 2] = (rd_i4.nz ? 1 : 0);
1003 } while (VP8IteratorRotateI4(it, best_blocks));
1004
1005 // finalize state
1006 CopyScore(rd, &rd_best);
1007 VP8SetIntra4Mode(it, rd->modes_i4);
1008 SwapOut(it);
1009 memcpy(rd->y_ac_levels, rd_best.y_ac_levels, sizeof(rd->y_ac_levels));
1010 return 1; // select intra4x4 over intra16x16
1011}
1012
Vikas Aroraa2415722012-08-09 16:18:58 -07001013//------------------------------------------------------------------------------
Vikas Arora7c970a02011-06-16 15:56:45 +05301014
1015static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
Vikas Arora8b720222014-01-02 16:48:02 -08001016 const int kNumBlocks = 8;
Vikas Aroraaf51b942014-08-28 10:51:12 -07001017 const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
Vikas Arora7c970a02011-06-16 15:56:45 +05301018 const int lambda = dqm->lambda_uv_;
1019 const uint8_t* const src = it->yuv_in_ + U_OFF;
1020 uint8_t* const tmp_dst = it->yuv_out2_ + U_OFF; // scratch buffer
1021 uint8_t* const dst0 = it->yuv_out_ + U_OFF;
1022 VP8ModeScore rd_best;
1023 int mode;
1024
1025 rd->mode_uv = -1;
1026 InitScore(&rd_best);
Vikas Arora1e7bf882013-03-13 16:43:18 -07001027 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
Vikas Arora7c970a02011-06-16 15:56:45 +05301028 VP8ModeScore rd_uv;
1029
1030 // Reconstruct
1031 rd_uv.nz = ReconstructUV(it, &rd_uv, tmp_dst, mode);
1032
1033 // Compute RD-score
1034 rd_uv.D = VP8SSE16x8(src, tmp_dst);
1035 rd_uv.SD = 0; // TODO: should we call TDisto? it tends to flatten areas.
Vikas Arora8b720222014-01-02 16:48:02 -08001036 rd_uv.H = VP8FixedCostsUV[mode];
Vikas Arora7c970a02011-06-16 15:56:45 +05301037 rd_uv.R = VP8GetCostUV(it, &rd_uv);
Vikas Arora8b720222014-01-02 16:48:02 -08001038 if (mode > 0 && IsFlat(rd_uv.uv_levels[0], kNumBlocks, FLATNESS_LIMIT_UV)) {
1039 rd_uv.R += FLATNESS_PENALTY * kNumBlocks;
1040 }
Vikas Arora7c970a02011-06-16 15:56:45 +05301041
1042 SetRDScore(lambda, &rd_uv);
1043 if (mode == 0 || rd_uv.score < rd_best.score) {
1044 CopyScore(&rd_best, &rd_uv);
1045 rd->mode_uv = mode;
1046 memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels));
1047 memcpy(dst0, tmp_dst, UV_SIZE); // TODO: SwapUVOut() ?
1048 }
1049 }
1050 VP8SetIntraUVMode(it, rd->mode_uv);
1051 AddScore(rd, &rd_best);
1052}
1053
Vikas Aroraa2415722012-08-09 16:18:58 -07001054//------------------------------------------------------------------------------
Vikas Arora7c970a02011-06-16 15:56:45 +05301055// Final reconstruction and quantization.
1056
1057static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
1058 const VP8Encoder* const enc = it->enc_;
Vikas Arora1e7bf882013-03-13 16:43:18 -07001059 const int is_i16 = (it->mb_->type_ == 1);
Vikas Arora7c970a02011-06-16 15:56:45 +05301060 int nz = 0;
1061
Vikas Arora1e7bf882013-03-13 16:43:18 -07001062 if (is_i16) {
Vikas Arora7c970a02011-06-16 15:56:45 +05301063 nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF, it->preds_[0]);
1064 } else {
1065 VP8IteratorStartI4(it);
1066 do {
1067 const int mode =
1068 it->preds_[(it->i4_ & 3) + (it->i4_ >> 2) * enc->preds_w_];
1069 const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];
1070 uint8_t* const dst = it->yuv_out_ + Y_OFF + VP8Scan[it->i4_];
1071 VP8MakeIntra4Preds(it);
1072 nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_],
1073 src, dst, mode) << it->i4_;
1074 } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF));
1075 }
1076
1077 nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF, it->mb_->uv_mode_);
1078 rd->nz = nz;
1079}
1080
Vikas Arora1e7bf882013-03-13 16:43:18 -07001081// Refine intra16/intra4 sub-modes based on distortion only (not rate).
1082static void DistoRefine(VP8EncIterator* const it, int try_both_i4_i16) {
1083 const int is_i16 = (it->mb_->type_ == 1);
1084 score_t best_score = MAX_COST;
1085
1086 if (try_both_i4_i16 || is_i16) {
1087 int mode;
1088 int best_mode = -1;
1089 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
1090 const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
1091 const uint8_t* const src = it->yuv_in_ + Y_OFF;
1092 const score_t score = VP8SSE16x16(src, ref);
1093 if (score < best_score) {
1094 best_mode = mode;
1095 best_score = score;
1096 }
1097 }
1098 VP8SetIntra16Mode(it, best_mode);
1099 }
1100 if (try_both_i4_i16 || !is_i16) {
1101 uint8_t modes_i4[16];
1102 // We don't evaluate the rate here, but just account for it through a
1103 // constant penalty (i4 mode usually needs more bits compared to i16).
1104 score_t score_i4 = (score_t)I4_PENALTY;
1105
1106 VP8IteratorStartI4(it);
1107 do {
1108 int mode;
1109 int best_sub_mode = -1;
1110 score_t best_sub_score = MAX_COST;
1111 const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];
1112
1113 // TODO(skal): we don't really need the prediction pixels here,
1114 // but just the distortion against 'src'.
1115 VP8MakeIntra4Preds(it);
1116 for (mode = 0; mode < NUM_BMODES; ++mode) {
1117 const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
1118 const score_t score = VP8SSE4x4(src, ref);
1119 if (score < best_sub_score) {
1120 best_sub_mode = mode;
1121 best_sub_score = score;
1122 }
1123 }
1124 modes_i4[it->i4_] = best_sub_mode;
1125 score_i4 += best_sub_score;
1126 if (score_i4 >= best_score) break;
1127 } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF));
1128 if (score_i4 < best_score) {
1129 VP8SetIntra4Mode(it, modes_i4);
1130 }
1131 }
1132}
1133
Vikas Aroraa2415722012-08-09 16:18:58 -07001134//------------------------------------------------------------------------------
Vikas Arora7c970a02011-06-16 15:56:45 +05301135// Entry point
1136
Vikas Arora1e7bf882013-03-13 16:43:18 -07001137int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
1138 VP8RDLevel rd_opt) {
Vikas Arora7c970a02011-06-16 15:56:45 +05301139 int is_skipped;
Vikas Arora1e7bf882013-03-13 16:43:18 -07001140 const int method = it->enc_->method_;
Vikas Arora7c970a02011-06-16 15:56:45 +05301141
1142 InitScore(rd);
1143
1144 // We can perform predictions for Luma16x16 and Chroma8x8 already.
1145 // Luma4x4 predictions needs to be done as-we-go.
1146 VP8MakeLuma16Preds(it);
1147 VP8MakeChroma8Preds(it);
1148
Vikas Arora1e7bf882013-03-13 16:43:18 -07001149 if (rd_opt > RD_OPT_NONE) {
1150 it->do_trellis_ = (rd_opt >= RD_OPT_TRELLIS_ALL);
Vikas Arora7c970a02011-06-16 15:56:45 +05301151 PickBestIntra16(it, rd);
Vikas Arora1e7bf882013-03-13 16:43:18 -07001152 if (method >= 2) {
Vikas Arora7c970a02011-06-16 15:56:45 +05301153 PickBestIntra4(it, rd);
1154 }
1155 PickBestUV(it, rd);
Vikas Arora1e7bf882013-03-13 16:43:18 -07001156 if (rd_opt == RD_OPT_TRELLIS) { // finish off with trellis-optim now
Vikas Arora7c970a02011-06-16 15:56:45 +05301157 it->do_trellis_ = 1;
1158 SimpleQuantize(it, rd);
1159 }
1160 } else {
Vikas Arora1e7bf882013-03-13 16:43:18 -07001161 // For method == 2, pick the best intra4/intra16 based on SSE (~tad slower).
1162 // For method <= 1, we refine intra4 or intra16 (but don't re-examine mode).
1163 DistoRefine(it, (method >= 2));
Vikas Arora7c970a02011-06-16 15:56:45 +05301164 SimpleQuantize(it, rd);
1165 }
1166 is_skipped = (rd->nz == 0);
1167 VP8SetSkip(it, is_skipped);
1168 return is_skipped;
1169}
1170