blob: dd27fc9f1e10f7f32ff904c0a852b58ee439fa89 [file] [log] [blame]
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +02001// Copyright 2013 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Functions to estimate the bit cost of Huffman trees.
16
17#ifndef BROTLI_ENC_BIT_COST_H_
18#define BROTLI_ENC_BIT_COST_H_
19
Lode Vandevenne6511d6b2015-08-28 16:09:23 +020020
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +020021#include <stdint.h>
22
23#include "./entropy_encode.h"
24#include "./fast_log.h"
25
26namespace brotli {
27
Lode Vandevenne6511d6b2015-08-28 16:09:23 +020028static inline double ShannonEntropy(const int *population, int size,
29 int *total) {
Zoltan Szabadka534654d2015-03-27 14:20:35 +010030 int sum = 0;
31 double retval = 0;
32 const int *population_end = population + size;
33 int p;
34 if (size & 1) {
35 goto odd_number_of_elements_left;
36 }
37 while (population < population_end) {
38 p = *population++;
39 sum += p;
40 retval -= p * FastLog2(p);
41 odd_number_of_elements_left:
42 p = *population++;
43 sum += p;
44 retval -= p * FastLog2(p);
45 }
Zoltan Szabadka6d806102015-04-23 15:35:16 +020046 if (sum) retval += sum * FastLog2(sum);
Lode Vandevenne6511d6b2015-08-28 16:09:23 +020047 *total = sum;
48 return retval;
49}
50
51static inline double BitsEntropy(const int *population, int size) {
52 int sum;
53 double retval = ShannonEntropy(population, size, &sum);
Zoltan Szabadka534654d2015-03-27 14:20:35 +010054 if (retval < sum) {
55 // At least one bit per literal is needed.
56 retval = sum;
57 }
58 return retval;
59}
60
Lode Vandevenne6511d6b2015-08-28 16:09:23 +020061
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +020062template<int kSize>
63double PopulationCost(const Histogram<kSize>& histogram) {
64 if (histogram.total_count_ == 0) {
Zoltan Szabadka14473452013-12-17 17:17:57 +010065 return 12;
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +020066 }
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +020067 int count = 0;
Zoltan Szabadka667f70a2015-06-12 15:29:06 +020068 for (int i = 0; i < kSize; ++i) {
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +020069 if (histogram.data_[i] > 0) {
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +020070 ++count;
71 }
72 }
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +010073 if (count == 1) {
Zoltan Szabadka14473452013-12-17 17:17:57 +010074 return 12;
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +010075 }
76 if (count == 2) {
Zoltan Szabadka14473452013-12-17 17:17:57 +010077 return 20 + histogram.total_count_;
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +020078 }
Zoltan Szabadka667f70a2015-06-12 15:29:06 +020079 double bits = 0;
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +020080 uint8_t depth[kSize] = { 0 };
Zoltan Szabadka667f70a2015-06-12 15:29:06 +020081 if (count <= 4) {
82 // For very low symbol count we build the Huffman tree.
83 CreateHuffmanTree(&histogram.data_[0], kSize, 15, depth);
84 for (int i = 0; i < kSize; ++i) {
85 bits += histogram.data_[i] * depth[i];
86 }
87 return count == 3 ? bits + 28 : bits + 37;
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +020088 }
Zoltan Szabadka667f70a2015-06-12 15:29:06 +020089
90 // In this loop we compute the entropy of the histogram and simultaneously
91 // build a simplified histogram of the code length codes where we use the
92 // zero repeat code 17, but we don't use the non-zero repeat code 16.
93 int max_depth = 1;
94 int depth_histo[kCodeLengthCodes] = { 0 };
95 const double log2total = FastLog2(histogram.total_count_);
96 for (int i = 0; i < kSize;) {
97 if (histogram.data_[i] > 0) {
98 // Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
99 // = log2(total_count) - log2(count(symbol))
100 double log2p = log2total - FastLog2(histogram.data_[i]);
101 // Approximate the bit depth by round(-log2(P(symbol)))
102 int depth = static_cast<int>(log2p + 0.5);
103 bits += histogram.data_[i] * log2p;
Zoltan Szabadka65f3fc52015-06-12 16:11:50 +0200104 if (depth > 15) {
105 depth = 15;
106 }
Zoltan Szabadka667f70a2015-06-12 15:29:06 +0200107 if (depth > max_depth) {
108 max_depth = depth;
109 }
110 ++depth_histo[depth];
111 ++i;
112 } else {
Marcin Karpinski21ac39f2015-09-21 21:04:07 +0200113 // Compute the run length of zeros and add the appropriate number of 0 and
Zoltan Szabadka667f70a2015-06-12 15:29:06 +0200114 // 17 code length codes to the code length code histogram.
115 int reps = 1;
116 for (int k = i + 1; k < kSize && histogram.data_[k] == 0; ++k) {
117 ++reps;
118 }
119 i += reps;
120 if (i == kSize) {
121 // Don't add any cost for the last zero run, since these are encoded
122 // only implicitly.
123 break;
124 }
125 if (reps < 3) {
126 depth_histo[0] += reps;
127 } else {
128 reps -= 2;
129 while (reps > 0) {
130 ++depth_histo[17];
131 // Add the 3 extra bits for the 17 code length code.
132 bits += 3;
133 reps >>= 3;
134 }
135 }
136 }
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100137 }
Zoltan Szabadka667f70a2015-06-12 15:29:06 +0200138 // Add the estimated encoding cost of the code length code histogram.
139 bits += 18 + 2 * max_depth;
140 // Add the entropy of the code length code histogram.
141 bits += BitsEntropy(depth_histo, kCodeLengthCodes);
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200142 return bits;
143}
144
145} // namespace brotli
146
147#endif // BROTLI_ENC_BIT_COST_H_