blob: cefc7dc4f14dadaec78b763a6fc90eb0c2d7c3bd [file] [log] [blame]
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +02001// Copyright 2013 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Implementation of Brotli compressor.
16
17#include "./encode.h"
18
19#include <algorithm>
20#include <limits>
21
22#include "./backward_references.h"
23#include "./bit_cost.h"
24#include "./block_splitter.h"
25#include "./cluster.h"
26#include "./context.h"
27#include "./entropy_encode.h"
28#include "./fast_log.h"
29#include "./histogram.h"
30#include "./prefix.h"
31#include "./write_bits.h"
32
33namespace brotli {
34
35template<int kSize>
36double Entropy(const std::vector<Histogram<kSize> >& histograms) {
37 double retval = 0;
38 for (int i = 0; i < histograms.size(); ++i) {
39 retval += histograms[i].EntropyBitCost();
40 }
41 return retval;
42}
43
44void EncodeSize(size_t len, int* storage_ix, uint8_t* storage) {
45 std::vector<uint8_t> len_bytes;
46 while (len > 0) {
47 len_bytes.push_back(len & 0xff);
48 len >>= 8;
49 };
50 WriteBits(3, len_bytes.size(), storage_ix, storage);
51 for (int i = 0; i < len_bytes.size(); ++i) {
52 WriteBits(8, len_bytes[i], storage_ix, storage);
53 }
54}
55
56void EncodeMetaBlockLength(int input_size_bits,
57 size_t meta_block_size,
58 bool is_last_meta_block,
59 int* storage_ix, uint8_t* storage) {
60 WriteBits(1, is_last_meta_block, storage_ix, storage);
61 if (is_last_meta_block) return;
62 while (input_size_bits > 0) {
63 WriteBits(8, meta_block_size & 0xff, storage_ix, storage);
64 meta_block_size >>= 8;
65 input_size_bits -= 8;
66 }
67 if (input_size_bits > 0) {
68 WriteBits(input_size_bits, meta_block_size, storage_ix, storage);
69 }
70}
71
72template<int kSize>
73void EntropyEncode(int val, const EntropyCode<kSize>& code,
74 int* storage_ix, uint8_t* storage) {
75 if (code.count_ <= 1) {
76 return;
77 };
78 WriteBits(code.depth_[val], code.bits_[val], storage_ix, storage);
79}
80
81void StoreHuffmanTreeOfHuffmanTreeToBitMask(
82 const uint8_t* code_length_bitdepth,
83 int* storage_ix, uint8_t* storage) {
84 static const uint8_t kStorageOrder[kCodeLengthCodes] = {
85 17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
86 };
87 // Throw away trailing zeros:
88 int codes_to_store = kCodeLengthCodes;
89 for (; codes_to_store > 4; --codes_to_store) {
90 if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
91 break;
92 }
93 }
94 WriteBits(4, codes_to_store - 4, storage_ix, storage);
95 for (int i = 0; i < codes_to_store; ++i) {
96 WriteBits(3, code_length_bitdepth[kStorageOrder[i]], storage_ix, storage);
97 }
98}
99
100void StoreHuffmanTreeToBitMask(
101 const uint8_t* huffman_tree,
102 const uint8_t* huffman_tree_extra_bits,
103 const int huffman_tree_size,
104 const EntropyCode<kCodeLengthCodes>& entropy,
105 int* storage_ix, uint8_t* storage) {
106 for (int i = 0; i < huffman_tree_size; ++i) {
107 const int ix = huffman_tree[i];
108 const int extra_bits = huffman_tree_extra_bits[i];
109 EntropyEncode(ix, entropy, storage_ix, storage);
110 switch (ix) {
111 case 16:
112 WriteBits(2, extra_bits, storage_ix, storage);
113 break;
114 case 17:
115 WriteBits(3, extra_bits, storage_ix, storage);
116 break;
117 case 18:
118 WriteBits(7, extra_bits, storage_ix, storage);
119 break;
120 }
121 }
122}
123
124template<int kSize>
125void StoreHuffmanCode(const EntropyCode<kSize>& code, int alphabet_size,
126 int* storage_ix, uint8_t* storage) {
127 const int kMaxBits = 8;
128 const int kMaxSymbol = 1 << kMaxBits;
129
130 if (code.count_ == 0) { // emit minimal tree for empty cases
131 // bits: small tree marker: 1, count-1: 0, large 8-bit code: 0, code: 0
132 WriteBits(4, 0x01, storage_ix, storage);
133 return;
134 }
135 if (code.count_ <= 2 &&
136 code.symbols_[0] < kMaxSymbol &&
137 code.symbols_[1] < kMaxSymbol) {
138 // Small tree marker to encode 1 or 2 symbols.
139 WriteBits(1, 1, storage_ix, storage);
140 WriteBits(1, code.count_ - 1, storage_ix, storage);
141 if (code.symbols_[0] <= 1) {
142 // Code bit for small (1 bit) symbol value.
143 WriteBits(1, 0, storage_ix, storage);
144 WriteBits(1, code.symbols_[0], storage_ix, storage);
145 } else {
146 WriteBits(1, 1, storage_ix, storage);
147 WriteBits(8, code.symbols_[0], storage_ix, storage);
148 }
149 if (code.count_ == 2) {
150 WriteBits(8, code.symbols_[1], storage_ix, storage);
151 }
152 return;
153 }
154 WriteBits(1, 0, storage_ix, storage);
155
156 uint8_t huffman_tree[kSize];
157 uint8_t huffman_tree_extra_bits[kSize];
158 int huffman_tree_size = 0;
159 WriteHuffmanTree(&code.depth_[0],
160 alphabet_size,
161 &huffman_tree[0],
162 &huffman_tree_extra_bits[0],
163 &huffman_tree_size);
164 Histogram<kCodeLengthCodes> huffman_tree_histogram;
165 memset(huffman_tree_histogram.data_, 0, sizeof(huffman_tree_histogram.data_));
166 for (int i = 0; i < huffman_tree_size; ++i) {
167 huffman_tree_histogram.Add(huffman_tree[i]);
168 }
169 EntropyCode<kCodeLengthCodes> huffman_tree_entropy;
170 BuildEntropyCode(huffman_tree_histogram, 7, kCodeLengthCodes,
171 &huffman_tree_entropy);
172 Histogram<kCodeLengthCodes> trimmed_histogram = huffman_tree_histogram;
173 uint8_t* last_code = &huffman_tree[huffman_tree_size - 1];
174 while (*last_code == 0 || *last_code >= 17) {
175 trimmed_histogram.Remove(*last_code--);
176 }
177 int trimmed_size = trimmed_histogram.total_count_;
178 bool write_length = false;
179 if (trimmed_size > 1 && trimmed_size < huffman_tree_size) {
180 EntropyCode<kCodeLengthCodes> trimmed_entropy;
181 BuildEntropyCode(trimmed_histogram, 7, kCodeLengthCodes, &trimmed_entropy);
182 int huffman_bit_cost = HuffmanTreeBitCost(huffman_tree_histogram,
183 huffman_tree_entropy);
184 int trimmed_bit_cost = HuffmanTreeBitCost(trimmed_histogram,
185 trimmed_entropy);;
186 const int nbits = Log2Ceiling(trimmed_size - 1);
187 const int nbitpairs = (nbits == 0) ? 1 : (nbits + 1) / 2;
188 if (trimmed_bit_cost + 3 + 2 * nbitpairs < huffman_bit_cost) {
189 write_length = true;
190 huffman_tree_size = trimmed_size;
191 huffman_tree_entropy = trimmed_entropy;
192 }
193 }
194
195 StoreHuffmanTreeOfHuffmanTreeToBitMask(
196 &huffman_tree_entropy.depth_[0], storage_ix, storage);
197 WriteBits(1, write_length, storage_ix, storage);
198 if (write_length) {
199 const int nbits = Log2Ceiling(huffman_tree_size - 1);
200 const int nbitpairs = (nbits == 0) ? 1 : (nbits + 1) / 2;
201 WriteBits(3, nbitpairs - 1, storage_ix, storage);
202 WriteBits(nbitpairs * 2, huffman_tree_size - 2, storage_ix, storage);
203 }
204 StoreHuffmanTreeToBitMask(&huffman_tree[0], &huffman_tree_extra_bits[0],
205 huffman_tree_size, huffman_tree_entropy,
206 storage_ix, storage);
207}
208
209template<int kSize>
210void StoreHuffmanCodes(const std::vector<EntropyCode<kSize> >& codes,
211 int alphabet_size,
212 int* storage_ix, uint8_t* storage) {
213 for (int i = 0; i < codes.size(); ++i) {
214 StoreHuffmanCode(codes[i], alphabet_size, storage_ix, storage);
215 }
216}
217
218void EncodeCommand(const Command& cmd,
219 const EntropyCodeCommand& entropy,
220 int* storage_ix, uint8_t* storage) {
221 int code = cmd.command_prefix_;
222 EntropyEncode(code, entropy, storage_ix, storage);
223 if (code >= 128) {
224 code -= 128;
225 }
226 int insert_extra_bits = InsertLengthExtraBits(code);
227 uint64_t insert_extra_bits_val =
228 cmd.insert_length_ - InsertLengthOffset(code);
229 int copy_extra_bits = CopyLengthExtraBits(code);
230 uint64_t copy_extra_bits_val = cmd.copy_length_ - CopyLengthOffset(code);
231 if (insert_extra_bits > 0) {
232 WriteBits(insert_extra_bits, insert_extra_bits_val, storage_ix, storage);
233 }
234 if (copy_extra_bits > 0) {
235 WriteBits(copy_extra_bits, copy_extra_bits_val, storage_ix, storage);
236 }
237}
238
239void EncodeCopyDistance(const Command& cmd, const EntropyCodeDistance& entropy,
240 int* storage_ix, uint8_t* storage) {
241 int code = cmd.distance_prefix_;
242 int extra_bits = cmd.distance_extra_bits_;
243 uint64_t extra_bits_val = cmd.distance_extra_bits_value_;
244 EntropyEncode(code, entropy, storage_ix, storage);
245 if (extra_bits > 0) {
246 WriteBits(extra_bits, extra_bits_val, storage_ix, storage);
247 }
248}
249
250
251void ComputeDistanceShortCodes(std::vector<Command>* cmds) {
252 static const int kIndexOffset[16] = {
253 3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2
254 };
255 static const int kValueOffset[16] = {
256 0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
257 };
258 int dist_ringbuffer[4] = { 4, 11, 15, 16 };
259 int ringbuffer_idx = 0;
260 for (int i = 0; i < cmds->size(); ++i) {
261 int cur_dist = (*cmds)[i].copy_distance_;
262 if (cur_dist == 0) break;
263 int dist_code = cur_dist + 16;
264 for (int k = 0; k < 16; ++k) {
265 // Only accept more popular choices.
266 if (cur_dist < 11 && ((k >= 2 && k < 4) || k >= 6)) {
267 // Typically unpopular ranges, don't replace a short distance
268 // with them.
269 continue;
270 }
271 int comp = (dist_ringbuffer[(ringbuffer_idx + kIndexOffset[k]) & 3] +
272 kValueOffset[k]);
273 if (cur_dist == comp) {
274 dist_code = k + 1;
275 break;
276 }
277 }
278 if (dist_code > 1) {
279 dist_ringbuffer[ringbuffer_idx & 3] = cur_dist;
280 ++ringbuffer_idx;
281 }
282 (*cmds)[i].distance_code_ = dist_code;
283 }
284}
285
286void ComputeCommandPrefixes(std::vector<Command>* cmds,
287 int num_direct_distance_codes,
288 int distance_postfix_bits) {
289 for (int i = 0; i < cmds->size(); ++i) {
290 Command* cmd = &(*cmds)[i];
291 cmd->command_prefix_ = CommandPrefix(cmd->insert_length_,
292 cmd->copy_length_);
293 if (cmd->copy_length_ > 0) {
294 PrefixEncodeCopyDistance(cmd->distance_code_,
295 num_direct_distance_codes,
296 distance_postfix_bits,
297 &cmd->distance_prefix_,
298 &cmd->distance_extra_bits_,
299 &cmd->distance_extra_bits_value_);
300 }
301 if (cmd->command_prefix_ < 128 && cmd->distance_prefix_ == 0) {
302 cmd->distance_prefix_ = 0xffff;
303 } else {
304 cmd->command_prefix_ += 128;
305 }
306 }
307}
308
309int IndexOf(const std::vector<int>& v, int value) {
310 for (int i = 0; i < v.size(); ++i) {
311 if (v[i] == value) return i;
312 }
313 return -1;
314}
315
316void MoveToFront(std::vector<int>* v, int index) {
317 int value = (*v)[index];
318 for (int i = index; i > 0; --i) {
319 (*v)[i] = (*v)[i - 1];
320 }
321 (*v)[0] = value;
322}
323
324std::vector<int> MoveToFrontTransform(const std::vector<int>& v) {
325 if (v.empty()) return v;
326 std::vector<int> mtf(*max_element(v.begin(), v.end()) + 1);
327 for (int i = 0; i < mtf.size(); ++i) mtf[i] = i;
328 std::vector<int> result(v.size());
329 for (int i = 0; i < v.size(); ++i) {
330 int index = IndexOf(mtf, v[i]);
331 result[i] = index;
332 MoveToFront(&mtf, index);
333 }
334 return result;
335}
336
337// Finds runs of zeros in v_in and replaces them with a prefix code of the run
338// length plus extra bits in *v_out and *extra_bits. Non-zero values in v_in are
339// shifted by *max_length_prefix. Will not create prefix codes bigger than the
340// initial value of *max_run_length_prefix. The prefix code of run length L is
341// simply Log2Floor(L) and the number of extra bits is the same as the prefix
342// code.
343void RunLengthCodeZeros(const std::vector<int>& v_in,
344 int* max_run_length_prefix,
345 std::vector<int>* v_out,
346 std::vector<int>* extra_bits) {
347 int max_reps = 0;
348 for (int i = 0; i < v_in.size();) {
349 for (; i < v_in.size() && v_in[i] != 0; ++i) ;
350 int reps = 0;
351 for (; i < v_in.size() && v_in[i] == 0; ++i) {
352 ++reps;
353 }
354 max_reps = std::max(reps, max_reps);
355 }
356 int max_prefix = max_reps > 0 ? Log2Floor(max_reps) : 0;
357 *max_run_length_prefix = std::min(max_prefix, *max_run_length_prefix);
358 for (int i = 0; i < v_in.size();) {
359 if (v_in[i] != 0) {
360 v_out->push_back(v_in[i] + *max_run_length_prefix);
361 extra_bits->push_back(0);
362 ++i;
363 } else {
364 int reps = 1;
365 for (uint32_t k = i + 1; k < v_in.size() && v_in[k] == 0; ++k) {
366 ++reps;
367 }
368 i += reps;
369 while (reps) {
370 if (reps < (2 << *max_run_length_prefix)) {
371 int run_length_prefix = Log2Floor(reps);
372 v_out->push_back(run_length_prefix);
373 extra_bits->push_back(reps - (1 << run_length_prefix));
374 break;
375 } else {
376 v_out->push_back(*max_run_length_prefix);
377 extra_bits->push_back((1 << *max_run_length_prefix) - 1);
378 reps -= (2 << *max_run_length_prefix) - 1;
379 }
380 }
381 }
382 }
383}
384
385// Returns a maximum zero-run-length-prefix value such that run-length coding
386// zeros in v with this maximum prefix value and then encoding the resulting
387// histogram and entropy-coding v produces the least amount of bits.
388int BestMaxZeroRunLengthPrefix(const std::vector<int>& v) {
389 int min_cost = std::numeric_limits<int>::max();
390 int best_max_prefix = 0;
391 for (int max_prefix = 0; max_prefix <= 16; ++max_prefix) {
392 std::vector<int> rle_symbols;
393 std::vector<int> extra_bits;
394 int max_run_length_prefix = max_prefix;
395 RunLengthCodeZeros(v, &max_run_length_prefix, &rle_symbols, &extra_bits);
396 if (max_run_length_prefix < max_prefix) break;
397 HistogramLiteral histogram;
398 for (int i = 0; i < rle_symbols.size(); ++i) {
399 histogram.Add(rle_symbols[i]);
400 }
401 int bit_cost = PopulationCost(histogram);
402 if (max_prefix > 0) {
403 bit_cost += 4;
404 }
405 for (int i = 1; i <= max_prefix; ++i) {
406 bit_cost += histogram.data_[i] * i; // extra bits
407 }
408 if (bit_cost < min_cost) {
409 min_cost = bit_cost;
410 best_max_prefix = max_prefix;
411 }
412 }
413 return best_max_prefix;
414}
415
416void EncodeContextMap(const std::vector<int>& context_map,
417 int context_mode,
418 int context_mode_bits,
419 int num_clusters,
420 int* storage_ix, uint8_t* storage) {
421 if (context_mode == 0) {
422 WriteBits(1, 0, storage_ix, storage); // no context
423 return;
424 }
425
426 WriteBits(1, 1, storage_ix, storage); // have context
427 if (context_mode_bits > 0) {
428 WriteBits(context_mode_bits, context_mode - 1, storage_ix, storage);
429 }
430 WriteBits(8, num_clusters - 1, storage_ix, storage);
431
432 if (num_clusters == 1 || num_clusters == context_map.size()) {
433 return;
434 }
435
436 std::vector<int> transformed_symbols = MoveToFrontTransform(context_map);
437 std::vector<int> rle_symbols;
438 std::vector<int> extra_bits;
439 int max_run_length_prefix = BestMaxZeroRunLengthPrefix(transformed_symbols);
440 RunLengthCodeZeros(transformed_symbols, &max_run_length_prefix,
441 &rle_symbols, &extra_bits);
442 HistogramLiteral symbol_histogram;
443 for (int i = 0; i < rle_symbols.size(); ++i) {
444 symbol_histogram.Add(rle_symbols[i]);
445 }
446 EntropyCodeLiteral symbol_code;
447 BuildEntropyCode(symbol_histogram, 15, num_clusters + max_run_length_prefix,
448 &symbol_code);
449 bool use_rle = max_run_length_prefix > 0;
450 WriteBits(1, use_rle, storage_ix, storage);
451 if (use_rle) {
452 WriteBits(4, max_run_length_prefix - 1, storage_ix, storage);
453 }
454 StoreHuffmanCode(symbol_code, num_clusters + max_run_length_prefix,
455 storage_ix, storage);
456 for (int i = 0; i < rle_symbols.size(); ++i) {
457 EntropyEncode(rle_symbols[i], symbol_code, storage_ix, storage);
458 if (rle_symbols[i] > 0 && rle_symbols[i] <= max_run_length_prefix) {
459 WriteBits(rle_symbols[i], extra_bits[i], storage_ix, storage);
460 }
461 }
462 WriteBits(1, 1, storage_ix, storage); // use move-to-front
463}
464
465template<int kSize>
466void BuildEntropyCodes(const std::vector<Histogram<kSize> >& histograms,
467 int alphabet_size,
468 std::vector<EntropyCode<kSize> >* entropy_codes) {
469 entropy_codes->resize(histograms.size());
470 for (int i = 0; i < histograms.size(); ++i) {
471 BuildEntropyCode(histograms[i], 15, alphabet_size, &(*entropy_codes)[i]);
472 }
473}
474
475struct BlockSplitCode {
476 EntropyCodeLiteral block_type_code;
477 EntropyCodeBlockLength block_len_code;
478};
479
480void EncodeBlockLength(const EntropyCodeBlockLength& entropy,
481 int length,
482 int* storage_ix, uint8_t* storage) {
483 int len_code = BlockLengthPrefix(length);
484 int extra_bits = BlockLengthExtraBits(len_code);
485 int extra_bits_value = length - BlockLengthOffset(len_code);
486 EntropyEncode(len_code, entropy, storage_ix, storage);
487
488 if (extra_bits > 0) {
489 WriteBits(extra_bits, extra_bits_value, storage_ix, storage);
490 }
491}
492
493void ComputeBlockTypeShortCodes(BlockSplit* split) {
494 if (split->num_types_ <= 1) {
495 split->num_types_ = 1;
496 return;
497 }
498 int ringbuffer[2] = { 0, 1 };
499 size_t index = 0;
500 for (int i = 0; i < split->types_.size(); ++i) {
501 int type = split->types_[i];
502 int type_code;
503 if (type == ringbuffer[index & 1]) {
504 type_code = 0;
505 } else if (type == ringbuffer[(index - 1) & 1] + 1) {
506 type_code = 1;
507 } else {
508 type_code = type + 2;
509 }
510 ringbuffer[index & 1] = type;
511 ++index;
512 split->type_codes_.push_back(type_code);
513 }
514}
515
516void BuildAndEncodeBlockSplitCode(const BlockSplit& split,
517 BlockSplitCode* code,
518 int* storage_ix, uint8_t* storage) {
519 if (split.num_types_ <= 1) {
520 WriteBits(1, 0, storage_ix, storage);
521 return;
522 }
523 WriteBits(1, 1, storage_ix, storage);
524 HistogramLiteral type_histo;
525 for (int i = 0; i < split.type_codes_.size(); ++i) {
526 type_histo.Add(split.type_codes_[i]);
527 }
528 BuildEntropyCode(type_histo, 15, split.num_types_ + 2,
529 &code->block_type_code);
530 HistogramBlockLength length_histo;
531 for (int i = 0; i < split.lengths_.size(); ++i) {
532 length_histo.Add(BlockLengthPrefix(split.lengths_[i]));
533 }
534 BuildEntropyCode(length_histo, 15, kNumBlockLenPrefixes,
535 &code->block_len_code);
536 WriteBits(8, split.num_types_ - 1, storage_ix, storage);
537 StoreHuffmanCode(code->block_type_code, split.num_types_ + 2,
538 storage_ix, storage);
539 StoreHuffmanCode(code->block_len_code, kNumBlockLenPrefixes,
540 storage_ix, storage);
541 EncodeBlockLength(code->block_len_code, split.lengths_[0],
542 storage_ix, storage);
543}
544
545void MoveAndEncode(const BlockSplitCode& code,
546 BlockSplitIterator* it,
547 int* storage_ix, uint8_t* storage) {
548 if (it->length_ == 0) {
549 ++it->idx_;
550 it->type_ = it->split_.types_[it->idx_];
551 it->length_ = it->split_.lengths_[it->idx_];
552 uint8_t type_code = it->split_.type_codes_[it->idx_];
553 EntropyEncode(type_code, code.block_type_code, storage_ix, storage);
554 EncodeBlockLength(code.block_len_code, it->length_, storage_ix, storage);
555 }
556 --it->length_;
557}
558
559struct EncodingParams {
560 int num_direct_distance_codes;
561 int distance_postfix_bits;
562 int literal_context_mode;
563 int distance_context_mode;
564};
565
566struct MetaBlock {
567 std::vector<Command> cmds;
568 EncodingParams params;
569 BlockSplit literal_split;
570 BlockSplit command_split;
571 BlockSplit distance_split;
572 std::vector<int> literal_context_map;
573 std::vector<int> distance_context_map;
574 std::vector<HistogramLiteral> literal_histograms;
575 std::vector<HistogramCommand> command_histograms;
576 std::vector<HistogramDistance> distance_histograms;
577};
578
579void BuildMetaBlock(const EncodingParams& params,
580 const std::vector<Command>& cmds,
581 const uint8_t* input_buffer,
582 size_t pos,
583 MetaBlock* mb) {
584 mb->cmds = cmds;
585 mb->params = params;
586 ComputeCommandPrefixes(&mb->cmds,
587 mb->params.num_direct_distance_codes,
588 mb->params.distance_postfix_bits);
589 SplitBlock(mb->cmds,
590 input_buffer + pos,
591 &mb->literal_split,
592 &mb->command_split,
593 &mb->distance_split);
594 ComputeBlockTypeShortCodes(&mb->literal_split);
595 ComputeBlockTypeShortCodes(&mb->command_split);
596 ComputeBlockTypeShortCodes(&mb->distance_split);
597
598 int num_literal_contexts_per_block_type =
599 NumContexts(mb->params.literal_context_mode);
600 int num_literal_contexts =
601 mb->literal_split.num_types_ *
602 num_literal_contexts_per_block_type;
603 int num_distance_contexts_per_block_type =
604 (mb->params.distance_context_mode > 0 ? 4 : 1);
605 int num_distance_contexts =
606 mb->distance_split.num_types_ *
607 num_distance_contexts_per_block_type;
608 std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
609 mb->command_histograms.resize(mb->command_split.num_types_);
610 std::vector<HistogramDistance> distance_histograms(num_distance_contexts);
611 BuildHistograms(mb->cmds,
612 mb->literal_split,
613 mb->command_split,
614 mb->distance_split,
615 input_buffer,
616 pos,
617 mb->params.literal_context_mode,
618 mb->params.distance_context_mode,
619 &literal_histograms,
620 &mb->command_histograms,
621 &distance_histograms);
622
623 // Histogram ids need to fit in one byte and there are 16 ids reserved for
624 // run length codes, which leaves a maximum number of 240 histograms.
625 static const int kMaxNumberOfHistograms = 240;
626
627 mb->literal_histograms = literal_histograms;
628 if (mb->params.literal_context_mode > 0) {
629 ClusterHistograms(literal_histograms,
630 num_literal_contexts_per_block_type,
631 mb->literal_split.num_types_,
632 kMaxNumberOfHistograms,
633 &mb->literal_histograms,
634 &mb->literal_context_map);
635 }
636
637 mb->distance_histograms = distance_histograms;
638 if (mb->params.distance_context_mode > 0) {
639 ClusterHistograms(distance_histograms,
640 num_distance_contexts_per_block_type,
641 mb->distance_split.num_types_,
642 kMaxNumberOfHistograms,
643 &mb->distance_histograms,
644 &mb->distance_context_map);
645 }
646}
647
648size_t MetaBlockLength(const std::vector<Command>& cmds) {
649 size_t length = 0;
650 for (int i = 0; i < cmds.size(); ++i) {
651 const Command& cmd = cmds[i];
652 length += cmd.insert_length_ + cmd.copy_length_;
653 }
654 return length;
655}
656
657void StoreMetaBlock(const MetaBlock& mb,
658 const uint8_t* input_buffer,
659 int input_size_bits,
660 bool is_last,
661 size_t* pos,
662 int* storage_ix, uint8_t* storage) {
663 size_t length = MetaBlockLength(mb.cmds);
664 const size_t end_pos = *pos + length;
665 EncodeMetaBlockLength(input_size_bits, length - 1, is_last,
666 storage_ix, storage);
667 BlockSplitCode literal_split_code;
668 BlockSplitCode command_split_code;
669 BlockSplitCode distance_split_code;
670 BuildAndEncodeBlockSplitCode(mb.literal_split, &literal_split_code,
671 storage_ix, storage);
672 BuildAndEncodeBlockSplitCode(mb.command_split, &command_split_code,
673 storage_ix, storage);
674 BuildAndEncodeBlockSplitCode(mb.distance_split, &distance_split_code,
675 storage_ix, storage);
676 WriteBits(2, mb.params.distance_postfix_bits, storage_ix, storage);
677 WriteBits(4,
678 mb.params.num_direct_distance_codes >>
679 mb.params.distance_postfix_bits, storage_ix, storage);
680 int num_distance_codes =
681 kNumDistanceShortCodes + mb.params.num_direct_distance_codes +
682 (48 << mb.params.distance_postfix_bits);
683 EncodeContextMap(mb.literal_context_map, mb.params.literal_context_mode, 4,
684 mb.literal_histograms.size(), storage_ix, storage);
685 EncodeContextMap(mb.distance_context_map, mb.params.distance_context_mode, 0,
686 mb.distance_histograms.size(), storage_ix, storage);
687 std::vector<EntropyCodeLiteral> literal_codes;
688 std::vector<EntropyCodeCommand> command_codes;
689 std::vector<EntropyCodeDistance> distance_codes;
690 BuildEntropyCodes(mb.literal_histograms, 256, &literal_codes);
691 BuildEntropyCodes(mb.command_histograms, kNumCommandPrefixes,
692 &command_codes);
693 BuildEntropyCodes(mb.distance_histograms, num_distance_codes,
694 &distance_codes);
695 StoreHuffmanCodes(literal_codes, 256, storage_ix, storage);
696 StoreHuffmanCodes(command_codes, kNumCommandPrefixes, storage_ix, storage);
697 StoreHuffmanCodes(distance_codes, num_distance_codes, storage_ix, storage);
698 BlockSplitIterator literal_it(mb.literal_split);
699 BlockSplitIterator command_it(mb.command_split);
700 BlockSplitIterator distance_it(mb.distance_split);
701 for (int i = 0; i < mb.cmds.size(); ++i) {
702 const Command& cmd = mb.cmds[i];
703 MoveAndEncode(command_split_code, &command_it, storage_ix, storage);
704 EncodeCommand(cmd, command_codes[command_it.type_], storage_ix, storage);
705 for (int j = 0; j < cmd.insert_length_; ++j) {
706 MoveAndEncode(literal_split_code, &literal_it, storage_ix, storage);
707 int histogram_idx = literal_it.type_;
708 if (mb.params.literal_context_mode > 0) {
709 uint8_t prev_byte = *pos > 0 ? input_buffer[*pos - 1] : 0;
710 uint8_t prev_byte2 = *pos > 1 ? input_buffer[*pos - 2] : 0;
711 uint8_t prev_byte3 = *pos > 2 ? input_buffer[*pos - 3] : 0;
712 int context = (literal_it.type_ *
713 NumContexts(mb.params.literal_context_mode) +
714 Context(prev_byte, prev_byte2, prev_byte3,
715 mb.params.literal_context_mode));
716 histogram_idx = mb.literal_context_map[context];
717 }
718 EntropyEncode(input_buffer[(*pos)++],
719 literal_codes[histogram_idx], storage_ix, storage);
720 }
721 if (*pos < end_pos && cmd.distance_prefix_ != 0xffff) {
722 MoveAndEncode(distance_split_code, &distance_it, storage_ix, storage);
723 int histogram_index = distance_it.type_;
724 if (mb.params.distance_context_mode > 0) {
725 int context = distance_it.type_ << 2;
726 context += (cmd.copy_length_ > 4) ? 3 : cmd.copy_length_ - 2;
727 histogram_index = mb.distance_context_map[context];
728 }
729 EncodeCopyDistance(cmd, distance_codes[histogram_index],
730 storage_ix, storage);
731 }
732 *pos += cmd.copy_length_;
733 }
734}
735
736int BrotliCompressBuffer(size_t input_size,
737 const uint8_t* input_buffer,
738 size_t* encoded_size,
739 uint8_t* encoded_buffer) {
740 int storage_ix = 0;
741 uint8_t* storage = encoded_buffer;
742 WriteBitsPrepareStorage(storage_ix, storage);
743 EncodeSize(input_size, &storage_ix, storage);
744
745 if (input_size == 0) {
746 *encoded_size = (storage_ix + 7) >> 3;
747 return 1;
748 }
749 int input_size_bits = Log2Ceiling(input_size);
750
751 std::vector<Command> all_commands;
752 CreateBackwardReferences(input_buffer, input_size, &all_commands);
753 ComputeDistanceShortCodes(&all_commands);
754
755 std::vector<std::vector<Command> > meta_block_commands;
756 SplitBlockByTotalLength(all_commands, input_size, 2 << 20,
757 &meta_block_commands);
758
759 size_t pos = 0;
760 for (int block_idx = 0; block_idx < meta_block_commands.size(); ++block_idx) {
761 const std::vector<Command>& commands = meta_block_commands[block_idx];
762 bool is_last_meta_block = (block_idx + 1 == meta_block_commands.size());
763 EncodingParams params;
764 params.num_direct_distance_codes = 12;
765 params.distance_postfix_bits = 1;
766 params.literal_context_mode = CONTEXT_SIGNED_MIXED_3BYTE;
767 params.distance_context_mode = 1;
768 MetaBlock mb;
769 BuildMetaBlock(params, commands, input_buffer, pos, &mb);
770 StoreMetaBlock(mb, input_buffer, input_size_bits, is_last_meta_block,
771 &pos, &storage_ix, storage);
772 }
773
774 *encoded_size = (storage_ix + 7) >> 3;
775 return 1;
776}
777
778} // namespace brotli