Zoltan Szabadka | c66e4e3 | 2013-10-23 13:06:13 +0200 | [diff] [blame] | 1 | // Copyright 2013 Google Inc. All Rights Reserved. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | // |
| 15 | // Functions to estimate the bit cost of Huffman trees. |
| 16 | |
| 17 | #ifndef BROTLI_ENC_BIT_COST_H_ |
| 18 | #define BROTLI_ENC_BIT_COST_H_ |
| 19 | |
Lode Vandevenne | 6511d6b | 2015-08-28 16:09:23 +0200 | [diff] [blame] | 20 | |
Zoltan Szabadka | c66e4e3 | 2013-10-23 13:06:13 +0200 | [diff] [blame] | 21 | #include <stdint.h> |
| 22 | |
| 23 | #include "./entropy_encode.h" |
| 24 | #include "./fast_log.h" |
| 25 | |
| 26 | namespace brotli { |
| 27 | |
Lode Vandevenne | 6511d6b | 2015-08-28 16:09:23 +0200 | [diff] [blame] | 28 | static inline double ShannonEntropy(const int *population, int size, |
| 29 | int *total) { |
Zoltan Szabadka | 534654d | 2015-03-27 14:20:35 +0100 | [diff] [blame] | 30 | int sum = 0; |
| 31 | double retval = 0; |
| 32 | const int *population_end = population + size; |
| 33 | int p; |
| 34 | if (size & 1) { |
| 35 | goto odd_number_of_elements_left; |
| 36 | } |
| 37 | while (population < population_end) { |
| 38 | p = *population++; |
| 39 | sum += p; |
| 40 | retval -= p * FastLog2(p); |
| 41 | odd_number_of_elements_left: |
| 42 | p = *population++; |
| 43 | sum += p; |
| 44 | retval -= p * FastLog2(p); |
| 45 | } |
Zoltan Szabadka | 6d80610 | 2015-04-23 15:35:16 +0200 | [diff] [blame] | 46 | if (sum) retval += sum * FastLog2(sum); |
Lode Vandevenne | 6511d6b | 2015-08-28 16:09:23 +0200 | [diff] [blame] | 47 | *total = sum; |
| 48 | return retval; |
| 49 | } |
| 50 | |
| 51 | static inline double BitsEntropy(const int *population, int size) { |
| 52 | int sum; |
| 53 | double retval = ShannonEntropy(population, size, &sum); |
Zoltan Szabadka | 534654d | 2015-03-27 14:20:35 +0100 | [diff] [blame] | 54 | if (retval < sum) { |
| 55 | // At least one bit per literal is needed. |
| 56 | retval = sum; |
| 57 | } |
| 58 | return retval; |
| 59 | } |
| 60 | |
Lode Vandevenne | 6511d6b | 2015-08-28 16:09:23 +0200 | [diff] [blame] | 61 | |
Zoltan Szabadka | c66e4e3 | 2013-10-23 13:06:13 +0200 | [diff] [blame] | 62 | template<int kSize> |
| 63 | double PopulationCost(const Histogram<kSize>& histogram) { |
| 64 | if (histogram.total_count_ == 0) { |
Zoltan Szabadka | 1447345 | 2013-12-17 17:17:57 +0100 | [diff] [blame] | 65 | return 12; |
Zoltan Szabadka | c66e4e3 | 2013-10-23 13:06:13 +0200 | [diff] [blame] | 66 | } |
Zoltan Szabadka | c66e4e3 | 2013-10-23 13:06:13 +0200 | [diff] [blame] | 67 | int count = 0; |
Zoltan Szabadka | 667f70a | 2015-06-12 15:29:06 +0200 | [diff] [blame] | 68 | for (int i = 0; i < kSize; ++i) { |
Zoltan Szabadka | c66e4e3 | 2013-10-23 13:06:13 +0200 | [diff] [blame] | 69 | if (histogram.data_[i] > 0) { |
Zoltan Szabadka | c66e4e3 | 2013-10-23 13:06:13 +0200 | [diff] [blame] | 70 | ++count; |
| 71 | } |
| 72 | } |
Zoltan Szabadka | c6b9c7c | 2013-11-15 19:02:17 +0100 | [diff] [blame] | 73 | if (count == 1) { |
Zoltan Szabadka | 1447345 | 2013-12-17 17:17:57 +0100 | [diff] [blame] | 74 | return 12; |
Zoltan Szabadka | c6b9c7c | 2013-11-15 19:02:17 +0100 | [diff] [blame] | 75 | } |
| 76 | if (count == 2) { |
Zoltan Szabadka | 1447345 | 2013-12-17 17:17:57 +0100 | [diff] [blame] | 77 | return 20 + histogram.total_count_; |
Zoltan Szabadka | c66e4e3 | 2013-10-23 13:06:13 +0200 | [diff] [blame] | 78 | } |
Zoltan Szabadka | 667f70a | 2015-06-12 15:29:06 +0200 | [diff] [blame] | 79 | double bits = 0; |
Zoltan Szabadka | c66e4e3 | 2013-10-23 13:06:13 +0200 | [diff] [blame] | 80 | uint8_t depth[kSize] = { 0 }; |
Zoltan Szabadka | 667f70a | 2015-06-12 15:29:06 +0200 | [diff] [blame] | 81 | if (count <= 4) { |
| 82 | // For very low symbol count we build the Huffman tree. |
| 83 | CreateHuffmanTree(&histogram.data_[0], kSize, 15, depth); |
| 84 | for (int i = 0; i < kSize; ++i) { |
| 85 | bits += histogram.data_[i] * depth[i]; |
| 86 | } |
| 87 | return count == 3 ? bits + 28 : bits + 37; |
Zoltan Szabadka | c66e4e3 | 2013-10-23 13:06:13 +0200 | [diff] [blame] | 88 | } |
Zoltan Szabadka | 667f70a | 2015-06-12 15:29:06 +0200 | [diff] [blame] | 89 | |
| 90 | // In this loop we compute the entropy of the histogram and simultaneously |
| 91 | // build a simplified histogram of the code length codes where we use the |
| 92 | // zero repeat code 17, but we don't use the non-zero repeat code 16. |
| 93 | int max_depth = 1; |
| 94 | int depth_histo[kCodeLengthCodes] = { 0 }; |
| 95 | const double log2total = FastLog2(histogram.total_count_); |
| 96 | for (int i = 0; i < kSize;) { |
| 97 | if (histogram.data_[i] > 0) { |
| 98 | // Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) = |
| 99 | // = log2(total_count) - log2(count(symbol)) |
| 100 | double log2p = log2total - FastLog2(histogram.data_[i]); |
| 101 | // Approximate the bit depth by round(-log2(P(symbol))) |
| 102 | int depth = static_cast<int>(log2p + 0.5); |
| 103 | bits += histogram.data_[i] * log2p; |
Zoltan Szabadka | 65f3fc5 | 2015-06-12 16:11:50 +0200 | [diff] [blame] | 104 | if (depth > 15) { |
| 105 | depth = 15; |
| 106 | } |
Zoltan Szabadka | 667f70a | 2015-06-12 15:29:06 +0200 | [diff] [blame] | 107 | if (depth > max_depth) { |
| 108 | max_depth = depth; |
| 109 | } |
| 110 | ++depth_histo[depth]; |
| 111 | ++i; |
| 112 | } else { |
Marcin Karpinski | 21ac39f | 2015-09-21 21:04:07 +0200 | [diff] [blame] | 113 | // Compute the run length of zeros and add the appropriate number of 0 and |
Zoltan Szabadka | 667f70a | 2015-06-12 15:29:06 +0200 | [diff] [blame] | 114 | // 17 code length codes to the code length code histogram. |
| 115 | int reps = 1; |
| 116 | for (int k = i + 1; k < kSize && histogram.data_[k] == 0; ++k) { |
| 117 | ++reps; |
| 118 | } |
| 119 | i += reps; |
| 120 | if (i == kSize) { |
| 121 | // Don't add any cost for the last zero run, since these are encoded |
| 122 | // only implicitly. |
| 123 | break; |
| 124 | } |
| 125 | if (reps < 3) { |
| 126 | depth_histo[0] += reps; |
| 127 | } else { |
| 128 | reps -= 2; |
| 129 | while (reps > 0) { |
| 130 | ++depth_histo[17]; |
| 131 | // Add the 3 extra bits for the 17 code length code. |
| 132 | bits += 3; |
| 133 | reps >>= 3; |
| 134 | } |
| 135 | } |
| 136 | } |
Zoltan Szabadka | c6b9c7c | 2013-11-15 19:02:17 +0100 | [diff] [blame] | 137 | } |
Zoltan Szabadka | 667f70a | 2015-06-12 15:29:06 +0200 | [diff] [blame] | 138 | // Add the estimated encoding cost of the code length code histogram. |
| 139 | bits += 18 + 2 * max_depth; |
| 140 | // Add the entropy of the code length code histogram. |
| 141 | bits += BitsEntropy(depth_histo, kCodeLengthCodes); |
Zoltan Szabadka | c66e4e3 | 2013-10-23 13:06:13 +0200 | [diff] [blame] | 142 | return bits; |
| 143 | } |
| 144 | |
| 145 | } // namespace brotli |
| 146 | |
| 147 | #endif // BROTLI_ENC_BIT_COST_H_ |