blob: bb3e3b840a5a1dc0c64e84a301fe9df52f82bfdf [file] [log] [blame]
Zoltan Szabadka79e99af2013-10-23 13:06:13 +02001// Copyright 2013 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Implementation of Brotli compressor.
16
17#include "./encode.h"
18
19#include <algorithm>
20#include <limits>
21
22#include "./backward_references.h"
23#include "./bit_cost.h"
24#include "./block_splitter.h"
25#include "./cluster.h"
26#include "./context.h"
27#include "./entropy_encode.h"
28#include "./fast_log.h"
Zoltan Szabadka1571db32013-11-15 19:02:17 +010029#include "./hash.h"
Zoltan Szabadka79e99af2013-10-23 13:06:13 +020030#include "./histogram.h"
Zoltan Szabadka1571db32013-11-15 19:02:17 +010031#include "./literal_cost.h"
Zoltan Szabadka79e99af2013-10-23 13:06:13 +020032#include "./prefix.h"
33#include "./write_bits.h"
34
35namespace brotli {
36
37template<int kSize>
38double Entropy(const std::vector<Histogram<kSize> >& histograms) {
39 double retval = 0;
40 for (int i = 0; i < histograms.size(); ++i) {
41 retval += histograms[i].EntropyBitCost();
42 }
43 return retval;
44}
45
Zoltan Szabadka1571db32013-11-15 19:02:17 +010046template<int kSize>
47double TotalBitCost(const std::vector<Histogram<kSize> >& histograms) {
48 double retval = 0;
49 for (int i = 0; i < histograms.size(); ++i) {
50 retval += PopulationCost(histograms[i]);
51 }
52 return retval;
53}
54
Zoltan Szabadka79e99af2013-10-23 13:06:13 +020055void EncodeSize(size_t len, int* storage_ix, uint8_t* storage) {
56 std::vector<uint8_t> len_bytes;
Zoltan Szabadka1571db32013-11-15 19:02:17 +010057 do {
Zoltan Szabadka79e99af2013-10-23 13:06:13 +020058 len_bytes.push_back(len & 0xff);
59 len >>= 8;
Zoltan Szabadka1571db32013-11-15 19:02:17 +010060 } while (len > 0);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +020061 WriteBits(3, len_bytes.size(), storage_ix, storage);
62 for (int i = 0; i < len_bytes.size(); ++i) {
63 WriteBits(8, len_bytes[i], storage_ix, storage);
64 }
65}
66
Zoltan Szabadka1571db32013-11-15 19:02:17 +010067void EncodeMetaBlockLength(size_t meta_block_size,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +020068 int* storage_ix, uint8_t* storage) {
Zoltan Szabadka1571db32013-11-15 19:02:17 +010069 WriteBits(1, 0, storage_ix, storage);
70 int num_bits = Log2Floor(meta_block_size) + 1;
71 WriteBits(3, (num_bits + 3) >> 2, storage_ix, storage);
72 while (num_bits > 0) {
73 WriteBits(4, meta_block_size & 0xf, storage_ix, storage);
74 meta_block_size >>= 4;
75 num_bits -= 4;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +020076 }
Zoltan Szabadka1571db32013-11-15 19:02:17 +010077 if (num_bits > 0) {
78 WriteBits(num_bits, meta_block_size, storage_ix, storage);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +020079 }
80}
81
82template<int kSize>
83void EntropyEncode(int val, const EntropyCode<kSize>& code,
84 int* storage_ix, uint8_t* storage) {
85 if (code.count_ <= 1) {
86 return;
87 };
88 WriteBits(code.depth_[val], code.bits_[val], storage_ix, storage);
89}
90
91void StoreHuffmanTreeOfHuffmanTreeToBitMask(
92 const uint8_t* code_length_bitdepth,
93 int* storage_ix, uint8_t* storage) {
94 static const uint8_t kStorageOrder[kCodeLengthCodes] = {
Zoltan Szabadka1571db32013-11-15 19:02:17 +010095 1, 2, 3, 4, 0, 17, 18, 5, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15
Zoltan Szabadka79e99af2013-10-23 13:06:13 +020096 };
97 // Throw away trailing zeros:
98 int codes_to_store = kCodeLengthCodes;
99 for (; codes_to_store > 4; --codes_to_store) {
100 if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
101 break;
102 }
103 }
104 WriteBits(4, codes_to_store - 4, storage_ix, storage);
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100105 const int skip_two_first =
106 code_length_bitdepth[kStorageOrder[0]] == 0 &&
107 code_length_bitdepth[kStorageOrder[1]] == 0;
108 WriteBits(1, skip_two_first, storage_ix, storage);
109
110 for (int i = skip_two_first * 2; i < codes_to_store; ++i) {
111 uint8_t len[] = { 2, 4, 3, 2, 2, 4 };
112 uint8_t bits[] = { 0, 7, 3, 1, 2, 15 };
113 int v = code_length_bitdepth[kStorageOrder[i]];
114 WriteBits(len[v], bits[v], storage_ix, storage);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200115 }
116}
117
118void StoreHuffmanTreeToBitMask(
119 const uint8_t* huffman_tree,
120 const uint8_t* huffman_tree_extra_bits,
121 const int huffman_tree_size,
122 const EntropyCode<kCodeLengthCodes>& entropy,
123 int* storage_ix, uint8_t* storage) {
124 for (int i = 0; i < huffman_tree_size; ++i) {
125 const int ix = huffman_tree[i];
126 const int extra_bits = huffman_tree_extra_bits[i];
127 EntropyEncode(ix, entropy, storage_ix, storage);
128 switch (ix) {
129 case 16:
130 WriteBits(2, extra_bits, storage_ix, storage);
131 break;
132 case 17:
133 WriteBits(3, extra_bits, storage_ix, storage);
134 break;
135 case 18:
136 WriteBits(7, extra_bits, storage_ix, storage);
137 break;
138 }
139 }
140}
141
142template<int kSize>
143void StoreHuffmanCode(const EntropyCode<kSize>& code, int alphabet_size,
144 int* storage_ix, uint8_t* storage) {
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100145 const uint8_t *depth = &code.depth_[0];
146 int max_bits_counter = alphabet_size - 1;
147 int max_bits = 0;
148 while (max_bits_counter) {
149 max_bits_counter >>= 1;
150 ++max_bits;
151 }
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200152 if (code.count_ == 0) { // emit minimal tree for empty cases
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100153 // bits: small tree marker: 1, count-1: 0, max_bits-sized encoding for 0
154 WriteBits(3 + max_bits, 0x01, storage_ix, storage);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200155 return;
156 }
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100157 if (code.count_ <= 4) {
158 int symbols[4];
159 // Quadratic sort.
160 int k, j;
161 for (k = 0; k < code.count_; ++k) {
162 symbols[k] = code.symbols_[k];
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200163 }
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100164 for (k = 0; k < code.count_; ++k) {
165 for (j = k + 1; j < code.count_; ++j) {
166 if (depth[symbols[j]] < depth[symbols[k]]) {
167 int t = symbols[k];
168 symbols[k] = symbols[j];
169 symbols[j] = t;
170 }
171 }
172 }
173 // Small tree marker to encode 1-4 symbols.
174 WriteBits(1, 1, storage_ix, storage);
175 WriteBits(2, code.count_ - 1, storage_ix, storage);
176 for (int i = 0; i < code.count_; ++i) {
177 WriteBits(max_bits, symbols[i], storage_ix, storage);
178 }
179 if (code.count_ == 4) {
180 if (depth[symbols[0]] == 2 &&
181 depth[symbols[1]] == 2 &&
182 depth[symbols[2]] == 2 &&
183 depth[symbols[3]] == 2) {
184 WriteBits(1, 0, storage_ix, storage);
185 } else {
186 WriteBits(1, 1, storage_ix, storage);
187 }
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200188 }
189 return;
190 }
191 WriteBits(1, 0, storage_ix, storage);
192
193 uint8_t huffman_tree[kSize];
194 uint8_t huffman_tree_extra_bits[kSize];
195 int huffman_tree_size = 0;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100196 WriteHuffmanTree(depth,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200197 alphabet_size,
198 &huffman_tree[0],
199 &huffman_tree_extra_bits[0],
200 &huffman_tree_size);
201 Histogram<kCodeLengthCodes> huffman_tree_histogram;
202 memset(huffman_tree_histogram.data_, 0, sizeof(huffman_tree_histogram.data_));
203 for (int i = 0; i < huffman_tree_size; ++i) {
204 huffman_tree_histogram.Add(huffman_tree[i]);
205 }
206 EntropyCode<kCodeLengthCodes> huffman_tree_entropy;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100207 BuildEntropyCode(huffman_tree_histogram, 5, kCodeLengthCodes,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200208 &huffman_tree_entropy);
209 Histogram<kCodeLengthCodes> trimmed_histogram = huffman_tree_histogram;
210 uint8_t* last_code = &huffman_tree[huffman_tree_size - 1];
211 while (*last_code == 0 || *last_code >= 17) {
212 trimmed_histogram.Remove(*last_code--);
213 }
214 int trimmed_size = trimmed_histogram.total_count_;
215 bool write_length = false;
216 if (trimmed_size > 1 && trimmed_size < huffman_tree_size) {
217 EntropyCode<kCodeLengthCodes> trimmed_entropy;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100218 BuildEntropyCode(trimmed_histogram, 5, kCodeLengthCodes, &trimmed_entropy);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200219 int huffman_bit_cost = HuffmanTreeBitCost(huffman_tree_histogram,
220 huffman_tree_entropy);
221 int trimmed_bit_cost = HuffmanTreeBitCost(trimmed_histogram,
222 trimmed_entropy);;
223 const int nbits = Log2Ceiling(trimmed_size - 1);
224 const int nbitpairs = (nbits == 0) ? 1 : (nbits + 1) / 2;
225 if (trimmed_bit_cost + 3 + 2 * nbitpairs < huffman_bit_cost) {
226 write_length = true;
227 huffman_tree_size = trimmed_size;
228 huffman_tree_entropy = trimmed_entropy;
229 }
230 }
231
232 StoreHuffmanTreeOfHuffmanTreeToBitMask(
233 &huffman_tree_entropy.depth_[0], storage_ix, storage);
234 WriteBits(1, write_length, storage_ix, storage);
235 if (write_length) {
236 const int nbits = Log2Ceiling(huffman_tree_size - 1);
237 const int nbitpairs = (nbits == 0) ? 1 : (nbits + 1) / 2;
238 WriteBits(3, nbitpairs - 1, storage_ix, storage);
239 WriteBits(nbitpairs * 2, huffman_tree_size - 2, storage_ix, storage);
240 }
241 StoreHuffmanTreeToBitMask(&huffman_tree[0], &huffman_tree_extra_bits[0],
242 huffman_tree_size, huffman_tree_entropy,
243 storage_ix, storage);
244}
245
246template<int kSize>
247void StoreHuffmanCodes(const std::vector<EntropyCode<kSize> >& codes,
248 int alphabet_size,
249 int* storage_ix, uint8_t* storage) {
250 for (int i = 0; i < codes.size(); ++i) {
251 StoreHuffmanCode(codes[i], alphabet_size, storage_ix, storage);
252 }
253}
254
255void EncodeCommand(const Command& cmd,
256 const EntropyCodeCommand& entropy,
257 int* storage_ix, uint8_t* storage) {
258 int code = cmd.command_prefix_;
259 EntropyEncode(code, entropy, storage_ix, storage);
260 if (code >= 128) {
261 code -= 128;
262 }
263 int insert_extra_bits = InsertLengthExtraBits(code);
264 uint64_t insert_extra_bits_val =
265 cmd.insert_length_ - InsertLengthOffset(code);
266 int copy_extra_bits = CopyLengthExtraBits(code);
267 uint64_t copy_extra_bits_val = cmd.copy_length_ - CopyLengthOffset(code);
268 if (insert_extra_bits > 0) {
269 WriteBits(insert_extra_bits, insert_extra_bits_val, storage_ix, storage);
270 }
271 if (copy_extra_bits > 0) {
272 WriteBits(copy_extra_bits, copy_extra_bits_val, storage_ix, storage);
273 }
274}
275
276void EncodeCopyDistance(const Command& cmd, const EntropyCodeDistance& entropy,
277 int* storage_ix, uint8_t* storage) {
278 int code = cmd.distance_prefix_;
279 int extra_bits = cmd.distance_extra_bits_;
280 uint64_t extra_bits_val = cmd.distance_extra_bits_value_;
281 EntropyEncode(code, entropy, storage_ix, storage);
282 if (extra_bits > 0) {
283 WriteBits(extra_bits, extra_bits_val, storage_ix, storage);
284 }
285}
286
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100287void ComputeDistanceShortCodes(std::vector<Command>* cmds,
288 int* dist_ringbuffer,
289 size_t* ringbuffer_idx) {
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200290 static const int kIndexOffset[16] = {
291 3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2
292 };
293 static const int kValueOffset[16] = {
294 0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
295 };
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200296 for (int i = 0; i < cmds->size(); ++i) {
297 int cur_dist = (*cmds)[i].copy_distance_;
298 if (cur_dist == 0) break;
299 int dist_code = cur_dist + 16;
300 for (int k = 0; k < 16; ++k) {
301 // Only accept more popular choices.
302 if (cur_dist < 11 && ((k >= 2 && k < 4) || k >= 6)) {
303 // Typically unpopular ranges, don't replace a short distance
304 // with them.
305 continue;
306 }
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100307 int comp = (dist_ringbuffer[(*ringbuffer_idx + kIndexOffset[k]) & 3] +
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200308 kValueOffset[k]);
309 if (cur_dist == comp) {
310 dist_code = k + 1;
311 break;
312 }
313 }
314 if (dist_code > 1) {
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100315 dist_ringbuffer[*ringbuffer_idx & 3] = cur_dist;
316 ++(*ringbuffer_idx);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200317 }
318 (*cmds)[i].distance_code_ = dist_code;
319 }
320}
321
322void ComputeCommandPrefixes(std::vector<Command>* cmds,
323 int num_direct_distance_codes,
324 int distance_postfix_bits) {
325 for (int i = 0; i < cmds->size(); ++i) {
326 Command* cmd = &(*cmds)[i];
327 cmd->command_prefix_ = CommandPrefix(cmd->insert_length_,
328 cmd->copy_length_);
329 if (cmd->copy_length_ > 0) {
330 PrefixEncodeCopyDistance(cmd->distance_code_,
331 num_direct_distance_codes,
332 distance_postfix_bits,
333 &cmd->distance_prefix_,
334 &cmd->distance_extra_bits_,
335 &cmd->distance_extra_bits_value_);
336 }
337 if (cmd->command_prefix_ < 128 && cmd->distance_prefix_ == 0) {
338 cmd->distance_prefix_ = 0xffff;
339 } else {
340 cmd->command_prefix_ += 128;
341 }
342 }
343}
344
345int IndexOf(const std::vector<int>& v, int value) {
346 for (int i = 0; i < v.size(); ++i) {
347 if (v[i] == value) return i;
348 }
349 return -1;
350}
351
352void MoveToFront(std::vector<int>* v, int index) {
353 int value = (*v)[index];
354 for (int i = index; i > 0; --i) {
355 (*v)[i] = (*v)[i - 1];
356 }
357 (*v)[0] = value;
358}
359
360std::vector<int> MoveToFrontTransform(const std::vector<int>& v) {
361 if (v.empty()) return v;
362 std::vector<int> mtf(*max_element(v.begin(), v.end()) + 1);
363 for (int i = 0; i < mtf.size(); ++i) mtf[i] = i;
364 std::vector<int> result(v.size());
365 for (int i = 0; i < v.size(); ++i) {
366 int index = IndexOf(mtf, v[i]);
367 result[i] = index;
368 MoveToFront(&mtf, index);
369 }
370 return result;
371}
372
373// Finds runs of zeros in v_in and replaces them with a prefix code of the run
374// length plus extra bits in *v_out and *extra_bits. Non-zero values in v_in are
375// shifted by *max_length_prefix. Will not create prefix codes bigger than the
376// initial value of *max_run_length_prefix. The prefix code of run length L is
377// simply Log2Floor(L) and the number of extra bits is the same as the prefix
378// code.
379void RunLengthCodeZeros(const std::vector<int>& v_in,
380 int* max_run_length_prefix,
381 std::vector<int>* v_out,
382 std::vector<int>* extra_bits) {
383 int max_reps = 0;
384 for (int i = 0; i < v_in.size();) {
385 for (; i < v_in.size() && v_in[i] != 0; ++i) ;
386 int reps = 0;
387 for (; i < v_in.size() && v_in[i] == 0; ++i) {
388 ++reps;
389 }
390 max_reps = std::max(reps, max_reps);
391 }
392 int max_prefix = max_reps > 0 ? Log2Floor(max_reps) : 0;
393 *max_run_length_prefix = std::min(max_prefix, *max_run_length_prefix);
394 for (int i = 0; i < v_in.size();) {
395 if (v_in[i] != 0) {
396 v_out->push_back(v_in[i] + *max_run_length_prefix);
397 extra_bits->push_back(0);
398 ++i;
399 } else {
400 int reps = 1;
401 for (uint32_t k = i + 1; k < v_in.size() && v_in[k] == 0; ++k) {
402 ++reps;
403 }
404 i += reps;
405 while (reps) {
406 if (reps < (2 << *max_run_length_prefix)) {
407 int run_length_prefix = Log2Floor(reps);
408 v_out->push_back(run_length_prefix);
409 extra_bits->push_back(reps - (1 << run_length_prefix));
410 break;
411 } else {
412 v_out->push_back(*max_run_length_prefix);
413 extra_bits->push_back((1 << *max_run_length_prefix) - 1);
414 reps -= (2 << *max_run_length_prefix) - 1;
415 }
416 }
417 }
418 }
419}
420
421// Returns a maximum zero-run-length-prefix value such that run-length coding
422// zeros in v with this maximum prefix value and then encoding the resulting
423// histogram and entropy-coding v produces the least amount of bits.
424int BestMaxZeroRunLengthPrefix(const std::vector<int>& v) {
425 int min_cost = std::numeric_limits<int>::max();
426 int best_max_prefix = 0;
427 for (int max_prefix = 0; max_prefix <= 16; ++max_prefix) {
428 std::vector<int> rle_symbols;
429 std::vector<int> extra_bits;
430 int max_run_length_prefix = max_prefix;
431 RunLengthCodeZeros(v, &max_run_length_prefix, &rle_symbols, &extra_bits);
432 if (max_run_length_prefix < max_prefix) break;
433 HistogramLiteral histogram;
434 for (int i = 0; i < rle_symbols.size(); ++i) {
435 histogram.Add(rle_symbols[i]);
436 }
437 int bit_cost = PopulationCost(histogram);
438 if (max_prefix > 0) {
439 bit_cost += 4;
440 }
441 for (int i = 1; i <= max_prefix; ++i) {
442 bit_cost += histogram.data_[i] * i; // extra bits
443 }
444 if (bit_cost < min_cost) {
445 min_cost = bit_cost;
446 best_max_prefix = max_prefix;
447 }
448 }
449 return best_max_prefix;
450}
451
452void EncodeContextMap(const std::vector<int>& context_map,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200453 int num_clusters,
454 int* storage_ix, uint8_t* storage) {
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200455 WriteBits(8, num_clusters - 1, storage_ix, storage);
456
457 if (num_clusters == 1 || num_clusters == context_map.size()) {
458 return;
459 }
460
461 std::vector<int> transformed_symbols = MoveToFrontTransform(context_map);
462 std::vector<int> rle_symbols;
463 std::vector<int> extra_bits;
464 int max_run_length_prefix = BestMaxZeroRunLengthPrefix(transformed_symbols);
465 RunLengthCodeZeros(transformed_symbols, &max_run_length_prefix,
466 &rle_symbols, &extra_bits);
467 HistogramLiteral symbol_histogram;
468 for (int i = 0; i < rle_symbols.size(); ++i) {
469 symbol_histogram.Add(rle_symbols[i]);
470 }
471 EntropyCodeLiteral symbol_code;
472 BuildEntropyCode(symbol_histogram, 15, num_clusters + max_run_length_prefix,
473 &symbol_code);
474 bool use_rle = max_run_length_prefix > 0;
475 WriteBits(1, use_rle, storage_ix, storage);
476 if (use_rle) {
477 WriteBits(4, max_run_length_prefix - 1, storage_ix, storage);
478 }
479 StoreHuffmanCode(symbol_code, num_clusters + max_run_length_prefix,
480 storage_ix, storage);
481 for (int i = 0; i < rle_symbols.size(); ++i) {
482 EntropyEncode(rle_symbols[i], symbol_code, storage_ix, storage);
483 if (rle_symbols[i] > 0 && rle_symbols[i] <= max_run_length_prefix) {
484 WriteBits(rle_symbols[i], extra_bits[i], storage_ix, storage);
485 }
486 }
487 WriteBits(1, 1, storage_ix, storage); // use move-to-front
488}
489
490template<int kSize>
491void BuildEntropyCodes(const std::vector<Histogram<kSize> >& histograms,
492 int alphabet_size,
493 std::vector<EntropyCode<kSize> >* entropy_codes) {
494 entropy_codes->resize(histograms.size());
495 for (int i = 0; i < histograms.size(); ++i) {
496 BuildEntropyCode(histograms[i], 15, alphabet_size, &(*entropy_codes)[i]);
497 }
498}
499
500struct BlockSplitCode {
501 EntropyCodeLiteral block_type_code;
502 EntropyCodeBlockLength block_len_code;
503};
504
505void EncodeBlockLength(const EntropyCodeBlockLength& entropy,
506 int length,
507 int* storage_ix, uint8_t* storage) {
508 int len_code = BlockLengthPrefix(length);
509 int extra_bits = BlockLengthExtraBits(len_code);
510 int extra_bits_value = length - BlockLengthOffset(len_code);
511 EntropyEncode(len_code, entropy, storage_ix, storage);
512
513 if (extra_bits > 0) {
514 WriteBits(extra_bits, extra_bits_value, storage_ix, storage);
515 }
516}
517
518void ComputeBlockTypeShortCodes(BlockSplit* split) {
519 if (split->num_types_ <= 1) {
520 split->num_types_ = 1;
521 return;
522 }
523 int ringbuffer[2] = { 0, 1 };
524 size_t index = 0;
525 for (int i = 0; i < split->types_.size(); ++i) {
526 int type = split->types_[i];
527 int type_code;
528 if (type == ringbuffer[index & 1]) {
529 type_code = 0;
530 } else if (type == ringbuffer[(index - 1) & 1] + 1) {
531 type_code = 1;
532 } else {
533 type_code = type + 2;
534 }
535 ringbuffer[index & 1] = type;
536 ++index;
537 split->type_codes_.push_back(type_code);
538 }
539}
540
541void BuildAndEncodeBlockSplitCode(const BlockSplit& split,
542 BlockSplitCode* code,
543 int* storage_ix, uint8_t* storage) {
544 if (split.num_types_ <= 1) {
545 WriteBits(1, 0, storage_ix, storage);
546 return;
547 }
548 WriteBits(1, 1, storage_ix, storage);
549 HistogramLiteral type_histo;
550 for (int i = 0; i < split.type_codes_.size(); ++i) {
551 type_histo.Add(split.type_codes_[i]);
552 }
553 BuildEntropyCode(type_histo, 15, split.num_types_ + 2,
554 &code->block_type_code);
555 HistogramBlockLength length_histo;
556 for (int i = 0; i < split.lengths_.size(); ++i) {
557 length_histo.Add(BlockLengthPrefix(split.lengths_[i]));
558 }
559 BuildEntropyCode(length_histo, 15, kNumBlockLenPrefixes,
560 &code->block_len_code);
561 WriteBits(8, split.num_types_ - 1, storage_ix, storage);
562 StoreHuffmanCode(code->block_type_code, split.num_types_ + 2,
563 storage_ix, storage);
564 StoreHuffmanCode(code->block_len_code, kNumBlockLenPrefixes,
565 storage_ix, storage);
566 EncodeBlockLength(code->block_len_code, split.lengths_[0],
567 storage_ix, storage);
568}
569
570void MoveAndEncode(const BlockSplitCode& code,
571 BlockSplitIterator* it,
572 int* storage_ix, uint8_t* storage) {
573 if (it->length_ == 0) {
574 ++it->idx_;
575 it->type_ = it->split_.types_[it->idx_];
576 it->length_ = it->split_.lengths_[it->idx_];
577 uint8_t type_code = it->split_.type_codes_[it->idx_];
578 EntropyEncode(type_code, code.block_type_code, storage_ix, storage);
579 EncodeBlockLength(code.block_len_code, it->length_, storage_ix, storage);
580 }
581 --it->length_;
582}
583
584struct EncodingParams {
585 int num_direct_distance_codes;
586 int distance_postfix_bits;
587 int literal_context_mode;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200588};
589
590struct MetaBlock {
591 std::vector<Command> cmds;
592 EncodingParams params;
593 BlockSplit literal_split;
594 BlockSplit command_split;
595 BlockSplit distance_split;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100596 std::vector<int> literal_context_modes;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200597 std::vector<int> literal_context_map;
598 std::vector<int> distance_context_map;
599 std::vector<HistogramLiteral> literal_histograms;
600 std::vector<HistogramCommand> command_histograms;
601 std::vector<HistogramDistance> distance_histograms;
602};
603
604void BuildMetaBlock(const EncodingParams& params,
605 const std::vector<Command>& cmds,
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100606 const uint8_t* ringbuffer,
607 const size_t pos,
608 const size_t mask,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200609 MetaBlock* mb) {
610 mb->cmds = cmds;
611 mb->params = params;
612 ComputeCommandPrefixes(&mb->cmds,
613 mb->params.num_direct_distance_codes,
614 mb->params.distance_postfix_bits);
615 SplitBlock(mb->cmds,
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100616 &ringbuffer[pos & mask],
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200617 &mb->literal_split,
618 &mb->command_split,
619 &mb->distance_split);
620 ComputeBlockTypeShortCodes(&mb->literal_split);
621 ComputeBlockTypeShortCodes(&mb->command_split);
622 ComputeBlockTypeShortCodes(&mb->distance_split);
623
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100624 mb->literal_context_modes.resize(mb->literal_split.num_types_,
625 mb->params.literal_context_mode);
626
627
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200628 int num_literal_contexts =
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100629 mb->literal_split.num_types_ << kLiteralContextBits;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200630 int num_distance_contexts =
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100631 mb->distance_split.num_types_ << kDistanceContextBits;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200632 std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
633 mb->command_histograms.resize(mb->command_split.num_types_);
634 std::vector<HistogramDistance> distance_histograms(num_distance_contexts);
635 BuildHistograms(mb->cmds,
636 mb->literal_split,
637 mb->command_split,
638 mb->distance_split,
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100639 ringbuffer,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200640 pos,
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100641 mask,
642 mb->literal_context_modes,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200643 &literal_histograms,
644 &mb->command_histograms,
645 &distance_histograms);
646
647 // Histogram ids need to fit in one byte and there are 16 ids reserved for
648 // run length codes, which leaves a maximum number of 240 histograms.
649 static const int kMaxNumberOfHistograms = 240;
650
651 mb->literal_histograms = literal_histograms;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100652 ClusterHistograms(literal_histograms,
653 1 << kLiteralContextBits,
654 mb->literal_split.num_types_,
655 kMaxNumberOfHistograms,
656 &mb->literal_histograms,
657 &mb->literal_context_map);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200658
659 mb->distance_histograms = distance_histograms;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100660 ClusterHistograms(distance_histograms,
661 1 << kDistanceContextBits,
662 mb->distance_split.num_types_,
663 kMaxNumberOfHistograms,
664 &mb->distance_histograms,
665 &mb->distance_context_map);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200666}
667
668size_t MetaBlockLength(const std::vector<Command>& cmds) {
669 size_t length = 0;
670 for (int i = 0; i < cmds.size(); ++i) {
671 const Command& cmd = cmds[i];
672 length += cmd.insert_length_ + cmd.copy_length_;
673 }
674 return length;
675}
676
677void StoreMetaBlock(const MetaBlock& mb,
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100678 const uint8_t* ringbuffer,
679 const size_t mask,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200680 size_t* pos,
681 int* storage_ix, uint8_t* storage) {
682 size_t length = MetaBlockLength(mb.cmds);
683 const size_t end_pos = *pos + length;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100684 EncodeMetaBlockLength(length - 1,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200685 storage_ix, storage);
686 BlockSplitCode literal_split_code;
687 BlockSplitCode command_split_code;
688 BlockSplitCode distance_split_code;
689 BuildAndEncodeBlockSplitCode(mb.literal_split, &literal_split_code,
690 storage_ix, storage);
691 BuildAndEncodeBlockSplitCode(mb.command_split, &command_split_code,
692 storage_ix, storage);
693 BuildAndEncodeBlockSplitCode(mb.distance_split, &distance_split_code,
694 storage_ix, storage);
695 WriteBits(2, mb.params.distance_postfix_bits, storage_ix, storage);
696 WriteBits(4,
697 mb.params.num_direct_distance_codes >>
698 mb.params.distance_postfix_bits, storage_ix, storage);
699 int num_distance_codes =
700 kNumDistanceShortCodes + mb.params.num_direct_distance_codes +
701 (48 << mb.params.distance_postfix_bits);
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100702 for (int i = 0; i < mb.literal_split.num_types_; ++i) {
703 WriteBits(2, mb.literal_context_modes[i], storage_ix, storage);
704 }
705 EncodeContextMap(mb.literal_context_map, mb.literal_histograms.size(), storage_ix, storage);
706 EncodeContextMap(mb.distance_context_map, mb.distance_histograms.size(), storage_ix, storage);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200707 std::vector<EntropyCodeLiteral> literal_codes;
708 std::vector<EntropyCodeCommand> command_codes;
709 std::vector<EntropyCodeDistance> distance_codes;
710 BuildEntropyCodes(mb.literal_histograms, 256, &literal_codes);
711 BuildEntropyCodes(mb.command_histograms, kNumCommandPrefixes,
712 &command_codes);
713 BuildEntropyCodes(mb.distance_histograms, num_distance_codes,
714 &distance_codes);
715 StoreHuffmanCodes(literal_codes, 256, storage_ix, storage);
716 StoreHuffmanCodes(command_codes, kNumCommandPrefixes, storage_ix, storage);
717 StoreHuffmanCodes(distance_codes, num_distance_codes, storage_ix, storage);
718 BlockSplitIterator literal_it(mb.literal_split);
719 BlockSplitIterator command_it(mb.command_split);
720 BlockSplitIterator distance_it(mb.distance_split);
721 for (int i = 0; i < mb.cmds.size(); ++i) {
722 const Command& cmd = mb.cmds[i];
723 MoveAndEncode(command_split_code, &command_it, storage_ix, storage);
724 EncodeCommand(cmd, command_codes[command_it.type_], storage_ix, storage);
725 for (int j = 0; j < cmd.insert_length_; ++j) {
726 MoveAndEncode(literal_split_code, &literal_it, storage_ix, storage);
727 int histogram_idx = literal_it.type_;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100728 uint8_t prev_byte = *pos > 0 ? ringbuffer[(*pos - 1) & mask] : 0;
729 uint8_t prev_byte2 = *pos > 1 ? ringbuffer[(*pos - 2) & mask] : 0;
730 int context = ((literal_it.type_ << kLiteralContextBits) +
731 Context(prev_byte, prev_byte2,
732 mb.literal_context_modes[literal_it.type_]));
733 histogram_idx = mb.literal_context_map[context];
734 EntropyEncode(ringbuffer[*pos & mask],
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200735 literal_codes[histogram_idx], storage_ix, storage);
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100736 ++(*pos);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200737 }
738 if (*pos < end_pos && cmd.distance_prefix_ != 0xffff) {
739 MoveAndEncode(distance_split_code, &distance_it, storage_ix, storage);
740 int histogram_index = distance_it.type_;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100741 int context = (distance_it.type_ << 2) +
742 ((cmd.copy_length_ > 4) ? 3 : cmd.copy_length_ - 2);
743 histogram_index = mb.distance_context_map[context];
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200744 EncodeCopyDistance(cmd, distance_codes[histogram_index],
745 storage_ix, storage);
746 }
747 *pos += cmd.copy_length_;
748 }
749}
750
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100751static const int kWindowBits = 22;
752// To make decoding faster, we allow the decoder to write 16 bytes ahead in
753// its ringbuffer, therefore the encoder has to decrease max distance by this
754// amount.
755static const int kDecoderRingBufferWriteAheadSlack = 16;
756static const int kMaxBackwardDistance =
757 (1 << kWindowBits) - kDecoderRingBufferWriteAheadSlack;
758
759static const int kMetaBlockSizeBits = 21;
760static const int kRingBufferBits = 23;
761static const int kRingBufferMask = (1 << kRingBufferBits) - 1;
762
763BrotliCompressor::BrotliCompressor()
764 : hasher_(new Hasher),
765 dist_ringbuffer_idx_(0),
766 input_pos_(0),
767 ringbuffer_(kRingBufferBits, kMetaBlockSizeBits),
768 literal_cost_(1 << kRingBufferBits),
769 storage_ix_(0),
770 storage_(new uint8_t[2 << kMetaBlockSizeBits]) {
771 dist_ringbuffer_[0] = 4;
772 dist_ringbuffer_[1] = 11;
773 dist_ringbuffer_[2] = 15;
774 dist_ringbuffer_[3] = 16;
775 storage_[0] = 0;
776 }
777
778BrotliCompressor::~BrotliCompressor() {
779 delete hasher_;
780 delete[] storage_;
781}
782
783void BrotliCompressor::WriteStreamHeader() {
784 // Don't encode input size.
785 WriteBits(3, 0, &storage_ix_, storage_);
786 // Encode window size.
787 WriteBits(1, 1, &storage_ix_, storage_);
788 WriteBits(3, kWindowBits - 17, &storage_ix_, storage_);
789}
790
791void BrotliCompressor::WriteMetaBlock(const size_t input_size,
792 const uint8_t* input_buffer,
793 size_t* encoded_size,
794 uint8_t* encoded_buffer) {
795 ringbuffer_.Write(input_buffer, input_size);
796 EstimateBitCostsForLiterals(input_pos_, input_size,
797 kRingBufferMask, ringbuffer_.start(),
798 &literal_cost_[0]);
799 std::vector<Command> commands;
800 CreateBackwardReferences(input_size, input_pos_,
801 ringbuffer_.start(),
802 &literal_cost_[0],
803 kRingBufferMask, kMaxBackwardDistance,
804 hasher_,
805 &commands);
806 ComputeDistanceShortCodes(&commands, dist_ringbuffer_,
807 &dist_ringbuffer_idx_);
808 EncodingParams params;
809 params.num_direct_distance_codes = 12;
810 params.distance_postfix_bits = 1;
811 params.literal_context_mode = CONTEXT_SIGNED;
812 MetaBlock mb;
813 BuildMetaBlock(params, commands, ringbuffer_.start(), input_pos_,
814 kRingBufferMask, &mb);
815 StoreMetaBlock(mb, ringbuffer_.start(), kRingBufferMask,
816 &input_pos_, &storage_ix_, storage_);
817 size_t output_size = storage_ix_ >> 3;
818 memcpy(encoded_buffer, storage_, output_size);
819 *encoded_size = output_size;
820 storage_ix_ -= output_size << 3;
821 storage_[storage_ix_ >> 3] = storage_[output_size];
822}
823
824void BrotliCompressor::FinishStream(
825 size_t* encoded_size, uint8_t* encoded_buffer) {
826 WriteBits(1, 1, &storage_ix_, storage_);
827 *encoded_size = (storage_ix_ + 7) >> 3;
828 memcpy(encoded_buffer, storage_, *encoded_size);
829}
830
831
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200832int BrotliCompressBuffer(size_t input_size,
833 const uint8_t* input_buffer,
834 size_t* encoded_size,
835 uint8_t* encoded_buffer) {
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200836 if (input_size == 0) {
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100837 encoded_buffer[0] = 1;
838 encoded_buffer[1] = 0;
839 *encoded_size = 2;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200840 return 1;
841 }
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200842
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100843 BrotliCompressor compressor;
844 compressor.WriteStreamHeader();
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200845
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100846 const int max_block_size = 1 << kMetaBlockSizeBits;
847 size_t max_output_size = *encoded_size;
848 const uint8_t* input_end = input_buffer + input_size;
849 *encoded_size = 0;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200850
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100851 while (input_buffer < input_end) {
852 int block_size = max_block_size;
853 if (block_size >= input_end - input_buffer) {
854 block_size = input_end - input_buffer;
855 }
856 size_t output_size = max_output_size;
857 compressor.WriteMetaBlock(block_size, input_buffer,
858 &output_size, &encoded_buffer[*encoded_size]);
859 input_buffer += block_size;
860 *encoded_size += output_size;
861 max_output_size -= output_size;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200862 }
863
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100864 size_t output_size = max_output_size;
865 compressor.FinishStream(&output_size, &encoded_buffer[*encoded_size]);
866 *encoded_size += output_size;
867
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200868 return 1;
869}
870
871} // namespace brotli