blob: 88e1c4c9aee788c09fd84d34ec65bf91e7306a70 [file] [log] [blame]
Zoltan Szabadka79e99af2013-10-23 13:06:13 +02001// Copyright 2013 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Implementation of Brotli compressor.
16
17#include "./encode.h"
18
19#include <algorithm>
20#include <limits>
21
22#include "./backward_references.h"
23#include "./bit_cost.h"
24#include "./block_splitter.h"
25#include "./cluster.h"
26#include "./context.h"
27#include "./entropy_encode.h"
28#include "./fast_log.h"
Zoltan Szabadka1571db32013-11-15 19:02:17 +010029#include "./hash.h"
Zoltan Szabadka79e99af2013-10-23 13:06:13 +020030#include "./histogram.h"
Zoltan Szabadka1571db32013-11-15 19:02:17 +010031#include "./literal_cost.h"
Zoltan Szabadka79e99af2013-10-23 13:06:13 +020032#include "./prefix.h"
33#include "./write_bits.h"
34
35namespace brotli {
36
Roderick Sheeter437bbad2013-11-19 14:32:56 -080037static const int kWindowBits = 22;
38// To make decoding faster, we allow the decoder to write 16 bytes ahead in
39// its ringbuffer, therefore the encoder has to decrease max distance by this
40// amount.
41static const int kDecoderRingBufferWriteAheadSlack = 16;
42static const int kMaxBackwardDistance =
43 (1 << kWindowBits) - kDecoderRingBufferWriteAheadSlack;
44
45static const int kMetaBlockSizeBits = 21;
46static const int kRingBufferBits = 23;
47static const int kRingBufferMask = (1 << kRingBufferBits) - 1;
48
Zoltan Szabadka79e99af2013-10-23 13:06:13 +020049template<int kSize>
50double Entropy(const std::vector<Histogram<kSize> >& histograms) {
51 double retval = 0;
52 for (int i = 0; i < histograms.size(); ++i) {
53 retval += histograms[i].EntropyBitCost();
54 }
55 return retval;
56}
57
Zoltan Szabadka1571db32013-11-15 19:02:17 +010058template<int kSize>
59double TotalBitCost(const std::vector<Histogram<kSize> >& histograms) {
60 double retval = 0;
61 for (int i = 0; i < histograms.size(); ++i) {
62 retval += PopulationCost(histograms[i]);
63 }
64 return retval;
65}
66
Zoltan Szabadka79e99af2013-10-23 13:06:13 +020067void EncodeSize(size_t len, int* storage_ix, uint8_t* storage) {
68 std::vector<uint8_t> len_bytes;
Zoltan Szabadka1571db32013-11-15 19:02:17 +010069 do {
Zoltan Szabadka79e99af2013-10-23 13:06:13 +020070 len_bytes.push_back(len & 0xff);
71 len >>= 8;
Zoltan Szabadka1571db32013-11-15 19:02:17 +010072 } while (len > 0);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +020073 WriteBits(3, len_bytes.size(), storage_ix, storage);
74 for (int i = 0; i < len_bytes.size(); ++i) {
75 WriteBits(8, len_bytes[i], storage_ix, storage);
76 }
77}
78
Zoltan Szabadka1571db32013-11-15 19:02:17 +010079void EncodeMetaBlockLength(size_t meta_block_size,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +020080 int* storage_ix, uint8_t* storage) {
Zoltan Szabadka1571db32013-11-15 19:02:17 +010081 WriteBits(1, 0, storage_ix, storage);
82 int num_bits = Log2Floor(meta_block_size) + 1;
83 WriteBits(3, (num_bits + 3) >> 2, storage_ix, storage);
84 while (num_bits > 0) {
85 WriteBits(4, meta_block_size & 0xf, storage_ix, storage);
86 meta_block_size >>= 4;
87 num_bits -= 4;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +020088 }
Zoltan Szabadka1571db32013-11-15 19:02:17 +010089 if (num_bits > 0) {
90 WriteBits(num_bits, meta_block_size, storage_ix, storage);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +020091 }
92}
93
94template<int kSize>
95void EntropyEncode(int val, const EntropyCode<kSize>& code,
96 int* storage_ix, uint8_t* storage) {
97 if (code.count_ <= 1) {
98 return;
99 };
100 WriteBits(code.depth_[val], code.bits_[val], storage_ix, storage);
101}
102
103void StoreHuffmanTreeOfHuffmanTreeToBitMask(
104 const uint8_t* code_length_bitdepth,
105 int* storage_ix, uint8_t* storage) {
106 static const uint8_t kStorageOrder[kCodeLengthCodes] = {
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100107 1, 2, 3, 4, 0, 17, 18, 5, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200108 };
109 // Throw away trailing zeros:
110 int codes_to_store = kCodeLengthCodes;
111 for (; codes_to_store > 4; --codes_to_store) {
112 if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
113 break;
114 }
115 }
116 WriteBits(4, codes_to_store - 4, storage_ix, storage);
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100117 const int skip_two_first =
118 code_length_bitdepth[kStorageOrder[0]] == 0 &&
119 code_length_bitdepth[kStorageOrder[1]] == 0;
120 WriteBits(1, skip_two_first, storage_ix, storage);
121
122 for (int i = skip_two_first * 2; i < codes_to_store; ++i) {
123 uint8_t len[] = { 2, 4, 3, 2, 2, 4 };
124 uint8_t bits[] = { 0, 7, 3, 1, 2, 15 };
125 int v = code_length_bitdepth[kStorageOrder[i]];
126 WriteBits(len[v], bits[v], storage_ix, storage);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200127 }
128}
129
130void StoreHuffmanTreeToBitMask(
131 const uint8_t* huffman_tree,
132 const uint8_t* huffman_tree_extra_bits,
133 const int huffman_tree_size,
134 const EntropyCode<kCodeLengthCodes>& entropy,
135 int* storage_ix, uint8_t* storage) {
136 for (int i = 0; i < huffman_tree_size; ++i) {
137 const int ix = huffman_tree[i];
138 const int extra_bits = huffman_tree_extra_bits[i];
139 EntropyEncode(ix, entropy, storage_ix, storage);
140 switch (ix) {
141 case 16:
142 WriteBits(2, extra_bits, storage_ix, storage);
143 break;
144 case 17:
145 WriteBits(3, extra_bits, storage_ix, storage);
146 break;
147 case 18:
148 WriteBits(7, extra_bits, storage_ix, storage);
149 break;
150 }
151 }
152}
153
154template<int kSize>
155void StoreHuffmanCode(const EntropyCode<kSize>& code, int alphabet_size,
156 int* storage_ix, uint8_t* storage) {
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100157 const uint8_t *depth = &code.depth_[0];
158 int max_bits_counter = alphabet_size - 1;
159 int max_bits = 0;
160 while (max_bits_counter) {
161 max_bits_counter >>= 1;
162 ++max_bits;
163 }
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200164 if (code.count_ == 0) { // emit minimal tree for empty cases
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100165 // bits: small tree marker: 1, count-1: 0, max_bits-sized encoding for 0
166 WriteBits(3 + max_bits, 0x01, storage_ix, storage);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200167 return;
168 }
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100169 if (code.count_ <= 4) {
170 int symbols[4];
171 // Quadratic sort.
172 int k, j;
173 for (k = 0; k < code.count_; ++k) {
174 symbols[k] = code.symbols_[k];
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200175 }
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100176 for (k = 0; k < code.count_; ++k) {
177 for (j = k + 1; j < code.count_; ++j) {
178 if (depth[symbols[j]] < depth[symbols[k]]) {
179 int t = symbols[k];
180 symbols[k] = symbols[j];
181 symbols[j] = t;
182 }
183 }
184 }
185 // Small tree marker to encode 1-4 symbols.
186 WriteBits(1, 1, storage_ix, storage);
187 WriteBits(2, code.count_ - 1, storage_ix, storage);
188 for (int i = 0; i < code.count_; ++i) {
189 WriteBits(max_bits, symbols[i], storage_ix, storage);
190 }
191 if (code.count_ == 4) {
192 if (depth[symbols[0]] == 2 &&
193 depth[symbols[1]] == 2 &&
194 depth[symbols[2]] == 2 &&
195 depth[symbols[3]] == 2) {
196 WriteBits(1, 0, storage_ix, storage);
197 } else {
198 WriteBits(1, 1, storage_ix, storage);
199 }
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200200 }
201 return;
202 }
203 WriteBits(1, 0, storage_ix, storage);
204
205 uint8_t huffman_tree[kSize];
206 uint8_t huffman_tree_extra_bits[kSize];
207 int huffman_tree_size = 0;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100208 WriteHuffmanTree(depth,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200209 alphabet_size,
210 &huffman_tree[0],
211 &huffman_tree_extra_bits[0],
212 &huffman_tree_size);
213 Histogram<kCodeLengthCodes> huffman_tree_histogram;
214 memset(huffman_tree_histogram.data_, 0, sizeof(huffman_tree_histogram.data_));
215 for (int i = 0; i < huffman_tree_size; ++i) {
216 huffman_tree_histogram.Add(huffman_tree[i]);
217 }
218 EntropyCode<kCodeLengthCodes> huffman_tree_entropy;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100219 BuildEntropyCode(huffman_tree_histogram, 5, kCodeLengthCodes,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200220 &huffman_tree_entropy);
221 Histogram<kCodeLengthCodes> trimmed_histogram = huffman_tree_histogram;
222 uint8_t* last_code = &huffman_tree[huffman_tree_size - 1];
223 while (*last_code == 0 || *last_code >= 17) {
224 trimmed_histogram.Remove(*last_code--);
225 }
226 int trimmed_size = trimmed_histogram.total_count_;
227 bool write_length = false;
228 if (trimmed_size > 1 && trimmed_size < huffman_tree_size) {
229 EntropyCode<kCodeLengthCodes> trimmed_entropy;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100230 BuildEntropyCode(trimmed_histogram, 5, kCodeLengthCodes, &trimmed_entropy);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200231 int huffman_bit_cost = HuffmanTreeBitCost(huffman_tree_histogram,
232 huffman_tree_entropy);
233 int trimmed_bit_cost = HuffmanTreeBitCost(trimmed_histogram,
234 trimmed_entropy);;
235 const int nbits = Log2Ceiling(trimmed_size - 1);
236 const int nbitpairs = (nbits == 0) ? 1 : (nbits + 1) / 2;
237 if (trimmed_bit_cost + 3 + 2 * nbitpairs < huffman_bit_cost) {
238 write_length = true;
239 huffman_tree_size = trimmed_size;
240 huffman_tree_entropy = trimmed_entropy;
241 }
242 }
243
244 StoreHuffmanTreeOfHuffmanTreeToBitMask(
245 &huffman_tree_entropy.depth_[0], storage_ix, storage);
246 WriteBits(1, write_length, storage_ix, storage);
247 if (write_length) {
248 const int nbits = Log2Ceiling(huffman_tree_size - 1);
249 const int nbitpairs = (nbits == 0) ? 1 : (nbits + 1) / 2;
250 WriteBits(3, nbitpairs - 1, storage_ix, storage);
251 WriteBits(nbitpairs * 2, huffman_tree_size - 2, storage_ix, storage);
252 }
253 StoreHuffmanTreeToBitMask(&huffman_tree[0], &huffman_tree_extra_bits[0],
254 huffman_tree_size, huffman_tree_entropy,
255 storage_ix, storage);
256}
257
258template<int kSize>
259void StoreHuffmanCodes(const std::vector<EntropyCode<kSize> >& codes,
260 int alphabet_size,
261 int* storage_ix, uint8_t* storage) {
262 for (int i = 0; i < codes.size(); ++i) {
263 StoreHuffmanCode(codes[i], alphabet_size, storage_ix, storage);
264 }
265}
266
267void EncodeCommand(const Command& cmd,
268 const EntropyCodeCommand& entropy,
269 int* storage_ix, uint8_t* storage) {
270 int code = cmd.command_prefix_;
271 EntropyEncode(code, entropy, storage_ix, storage);
272 if (code >= 128) {
273 code -= 128;
274 }
275 int insert_extra_bits = InsertLengthExtraBits(code);
276 uint64_t insert_extra_bits_val =
277 cmd.insert_length_ - InsertLengthOffset(code);
278 int copy_extra_bits = CopyLengthExtraBits(code);
Roderick Sheeter437bbad2013-11-19 14:32:56 -0800279 uint64_t copy_extra_bits_val = cmd.copy_length_code_ - CopyLengthOffset(code);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200280 if (insert_extra_bits > 0) {
281 WriteBits(insert_extra_bits, insert_extra_bits_val, storage_ix, storage);
282 }
283 if (copy_extra_bits > 0) {
284 WriteBits(copy_extra_bits, copy_extra_bits_val, storage_ix, storage);
285 }
286}
287
288void EncodeCopyDistance(const Command& cmd, const EntropyCodeDistance& entropy,
289 int* storage_ix, uint8_t* storage) {
290 int code = cmd.distance_prefix_;
291 int extra_bits = cmd.distance_extra_bits_;
292 uint64_t extra_bits_val = cmd.distance_extra_bits_value_;
293 EntropyEncode(code, entropy, storage_ix, storage);
294 if (extra_bits > 0) {
295 WriteBits(extra_bits, extra_bits_val, storage_ix, storage);
296 }
297}
298
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100299void ComputeDistanceShortCodes(std::vector<Command>* cmds,
300 int* dist_ringbuffer,
301 size_t* ringbuffer_idx) {
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200302 static const int kIndexOffset[16] = {
303 3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2
304 };
305 static const int kValueOffset[16] = {
306 0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
307 };
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200308 for (int i = 0; i < cmds->size(); ++i) {
309 int cur_dist = (*cmds)[i].copy_distance_;
310 if (cur_dist == 0) break;
311 int dist_code = cur_dist + 16;
312 for (int k = 0; k < 16; ++k) {
313 // Only accept more popular choices.
314 if (cur_dist < 11 && ((k >= 2 && k < 4) || k >= 6)) {
315 // Typically unpopular ranges, don't replace a short distance
316 // with them.
317 continue;
318 }
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100319 int comp = (dist_ringbuffer[(*ringbuffer_idx + kIndexOffset[k]) & 3] +
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200320 kValueOffset[k]);
321 if (cur_dist == comp) {
322 dist_code = k + 1;
323 break;
324 }
325 }
326 if (dist_code > 1) {
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100327 dist_ringbuffer[*ringbuffer_idx & 3] = cur_dist;
328 ++(*ringbuffer_idx);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200329 }
330 (*cmds)[i].distance_code_ = dist_code;
331 }
332}
333
334void ComputeCommandPrefixes(std::vector<Command>* cmds,
335 int num_direct_distance_codes,
336 int distance_postfix_bits) {
337 for (int i = 0; i < cmds->size(); ++i) {
338 Command* cmd = &(*cmds)[i];
339 cmd->command_prefix_ = CommandPrefix(cmd->insert_length_,
Roderick Sheeter437bbad2013-11-19 14:32:56 -0800340 cmd->copy_length_code_);
341 if (cmd->copy_length_code_ > 0) {
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200342 PrefixEncodeCopyDistance(cmd->distance_code_,
343 num_direct_distance_codes,
344 distance_postfix_bits,
345 &cmd->distance_prefix_,
346 &cmd->distance_extra_bits_,
347 &cmd->distance_extra_bits_value_);
348 }
349 if (cmd->command_prefix_ < 128 && cmd->distance_prefix_ == 0) {
350 cmd->distance_prefix_ = 0xffff;
351 } else {
352 cmd->command_prefix_ += 128;
353 }
354 }
355}
356
357int IndexOf(const std::vector<int>& v, int value) {
358 for (int i = 0; i < v.size(); ++i) {
359 if (v[i] == value) return i;
360 }
361 return -1;
362}
363
364void MoveToFront(std::vector<int>* v, int index) {
365 int value = (*v)[index];
366 for (int i = index; i > 0; --i) {
367 (*v)[i] = (*v)[i - 1];
368 }
369 (*v)[0] = value;
370}
371
372std::vector<int> MoveToFrontTransform(const std::vector<int>& v) {
373 if (v.empty()) return v;
374 std::vector<int> mtf(*max_element(v.begin(), v.end()) + 1);
375 for (int i = 0; i < mtf.size(); ++i) mtf[i] = i;
376 std::vector<int> result(v.size());
377 for (int i = 0; i < v.size(); ++i) {
378 int index = IndexOf(mtf, v[i]);
379 result[i] = index;
380 MoveToFront(&mtf, index);
381 }
382 return result;
383}
384
385// Finds runs of zeros in v_in and replaces them with a prefix code of the run
386// length plus extra bits in *v_out and *extra_bits. Non-zero values in v_in are
387// shifted by *max_length_prefix. Will not create prefix codes bigger than the
388// initial value of *max_run_length_prefix. The prefix code of run length L is
389// simply Log2Floor(L) and the number of extra bits is the same as the prefix
390// code.
391void RunLengthCodeZeros(const std::vector<int>& v_in,
392 int* max_run_length_prefix,
393 std::vector<int>* v_out,
394 std::vector<int>* extra_bits) {
395 int max_reps = 0;
396 for (int i = 0; i < v_in.size();) {
397 for (; i < v_in.size() && v_in[i] != 0; ++i) ;
398 int reps = 0;
399 for (; i < v_in.size() && v_in[i] == 0; ++i) {
400 ++reps;
401 }
402 max_reps = std::max(reps, max_reps);
403 }
404 int max_prefix = max_reps > 0 ? Log2Floor(max_reps) : 0;
405 *max_run_length_prefix = std::min(max_prefix, *max_run_length_prefix);
406 for (int i = 0; i < v_in.size();) {
407 if (v_in[i] != 0) {
408 v_out->push_back(v_in[i] + *max_run_length_prefix);
409 extra_bits->push_back(0);
410 ++i;
411 } else {
412 int reps = 1;
413 for (uint32_t k = i + 1; k < v_in.size() && v_in[k] == 0; ++k) {
414 ++reps;
415 }
416 i += reps;
417 while (reps) {
418 if (reps < (2 << *max_run_length_prefix)) {
419 int run_length_prefix = Log2Floor(reps);
420 v_out->push_back(run_length_prefix);
421 extra_bits->push_back(reps - (1 << run_length_prefix));
422 break;
423 } else {
424 v_out->push_back(*max_run_length_prefix);
425 extra_bits->push_back((1 << *max_run_length_prefix) - 1);
426 reps -= (2 << *max_run_length_prefix) - 1;
427 }
428 }
429 }
430 }
431}
432
433// Returns a maximum zero-run-length-prefix value such that run-length coding
434// zeros in v with this maximum prefix value and then encoding the resulting
435// histogram and entropy-coding v produces the least amount of bits.
436int BestMaxZeroRunLengthPrefix(const std::vector<int>& v) {
437 int min_cost = std::numeric_limits<int>::max();
438 int best_max_prefix = 0;
439 for (int max_prefix = 0; max_prefix <= 16; ++max_prefix) {
440 std::vector<int> rle_symbols;
441 std::vector<int> extra_bits;
442 int max_run_length_prefix = max_prefix;
443 RunLengthCodeZeros(v, &max_run_length_prefix, &rle_symbols, &extra_bits);
444 if (max_run_length_prefix < max_prefix) break;
445 HistogramLiteral histogram;
446 for (int i = 0; i < rle_symbols.size(); ++i) {
447 histogram.Add(rle_symbols[i]);
448 }
449 int bit_cost = PopulationCost(histogram);
450 if (max_prefix > 0) {
451 bit_cost += 4;
452 }
453 for (int i = 1; i <= max_prefix; ++i) {
454 bit_cost += histogram.data_[i] * i; // extra bits
455 }
456 if (bit_cost < min_cost) {
457 min_cost = bit_cost;
458 best_max_prefix = max_prefix;
459 }
460 }
461 return best_max_prefix;
462}
463
464void EncodeContextMap(const std::vector<int>& context_map,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200465 int num_clusters,
466 int* storage_ix, uint8_t* storage) {
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200467 WriteBits(8, num_clusters - 1, storage_ix, storage);
468
Roderick Sheeter437bbad2013-11-19 14:32:56 -0800469 if (num_clusters == 1) {
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200470 return;
471 }
472
473 std::vector<int> transformed_symbols = MoveToFrontTransform(context_map);
474 std::vector<int> rle_symbols;
475 std::vector<int> extra_bits;
476 int max_run_length_prefix = BestMaxZeroRunLengthPrefix(transformed_symbols);
477 RunLengthCodeZeros(transformed_symbols, &max_run_length_prefix,
478 &rle_symbols, &extra_bits);
479 HistogramLiteral symbol_histogram;
480 for (int i = 0; i < rle_symbols.size(); ++i) {
481 symbol_histogram.Add(rle_symbols[i]);
482 }
483 EntropyCodeLiteral symbol_code;
484 BuildEntropyCode(symbol_histogram, 15, num_clusters + max_run_length_prefix,
485 &symbol_code);
486 bool use_rle = max_run_length_prefix > 0;
487 WriteBits(1, use_rle, storage_ix, storage);
488 if (use_rle) {
489 WriteBits(4, max_run_length_prefix - 1, storage_ix, storage);
490 }
491 StoreHuffmanCode(symbol_code, num_clusters + max_run_length_prefix,
492 storage_ix, storage);
493 for (int i = 0; i < rle_symbols.size(); ++i) {
494 EntropyEncode(rle_symbols[i], symbol_code, storage_ix, storage);
495 if (rle_symbols[i] > 0 && rle_symbols[i] <= max_run_length_prefix) {
496 WriteBits(rle_symbols[i], extra_bits[i], storage_ix, storage);
497 }
498 }
499 WriteBits(1, 1, storage_ix, storage); // use move-to-front
500}
501
502template<int kSize>
503void BuildEntropyCodes(const std::vector<Histogram<kSize> >& histograms,
504 int alphabet_size,
505 std::vector<EntropyCode<kSize> >* entropy_codes) {
506 entropy_codes->resize(histograms.size());
507 for (int i = 0; i < histograms.size(); ++i) {
508 BuildEntropyCode(histograms[i], 15, alphabet_size, &(*entropy_codes)[i]);
509 }
510}
511
512struct BlockSplitCode {
513 EntropyCodeLiteral block_type_code;
514 EntropyCodeBlockLength block_len_code;
515};
516
517void EncodeBlockLength(const EntropyCodeBlockLength& entropy,
518 int length,
519 int* storage_ix, uint8_t* storage) {
520 int len_code = BlockLengthPrefix(length);
521 int extra_bits = BlockLengthExtraBits(len_code);
522 int extra_bits_value = length - BlockLengthOffset(len_code);
523 EntropyEncode(len_code, entropy, storage_ix, storage);
524
525 if (extra_bits > 0) {
526 WriteBits(extra_bits, extra_bits_value, storage_ix, storage);
527 }
528}
529
530void ComputeBlockTypeShortCodes(BlockSplit* split) {
531 if (split->num_types_ <= 1) {
532 split->num_types_ = 1;
533 return;
534 }
535 int ringbuffer[2] = { 0, 1 };
536 size_t index = 0;
537 for (int i = 0; i < split->types_.size(); ++i) {
538 int type = split->types_[i];
539 int type_code;
540 if (type == ringbuffer[index & 1]) {
541 type_code = 0;
542 } else if (type == ringbuffer[(index - 1) & 1] + 1) {
543 type_code = 1;
544 } else {
545 type_code = type + 2;
546 }
547 ringbuffer[index & 1] = type;
548 ++index;
549 split->type_codes_.push_back(type_code);
550 }
551}
552
553void BuildAndEncodeBlockSplitCode(const BlockSplit& split,
554 BlockSplitCode* code,
555 int* storage_ix, uint8_t* storage) {
556 if (split.num_types_ <= 1) {
557 WriteBits(1, 0, storage_ix, storage);
558 return;
559 }
560 WriteBits(1, 1, storage_ix, storage);
561 HistogramLiteral type_histo;
562 for (int i = 0; i < split.type_codes_.size(); ++i) {
563 type_histo.Add(split.type_codes_[i]);
564 }
565 BuildEntropyCode(type_histo, 15, split.num_types_ + 2,
566 &code->block_type_code);
567 HistogramBlockLength length_histo;
568 for (int i = 0; i < split.lengths_.size(); ++i) {
569 length_histo.Add(BlockLengthPrefix(split.lengths_[i]));
570 }
571 BuildEntropyCode(length_histo, 15, kNumBlockLenPrefixes,
572 &code->block_len_code);
573 WriteBits(8, split.num_types_ - 1, storage_ix, storage);
574 StoreHuffmanCode(code->block_type_code, split.num_types_ + 2,
575 storage_ix, storage);
576 StoreHuffmanCode(code->block_len_code, kNumBlockLenPrefixes,
577 storage_ix, storage);
578 EncodeBlockLength(code->block_len_code, split.lengths_[0],
579 storage_ix, storage);
580}
581
582void MoveAndEncode(const BlockSplitCode& code,
583 BlockSplitIterator* it,
584 int* storage_ix, uint8_t* storage) {
585 if (it->length_ == 0) {
586 ++it->idx_;
587 it->type_ = it->split_.types_[it->idx_];
588 it->length_ = it->split_.lengths_[it->idx_];
589 uint8_t type_code = it->split_.type_codes_[it->idx_];
590 EntropyEncode(type_code, code.block_type_code, storage_ix, storage);
591 EncodeBlockLength(code.block_len_code, it->length_, storage_ix, storage);
592 }
593 --it->length_;
594}
595
596struct EncodingParams {
597 int num_direct_distance_codes;
598 int distance_postfix_bits;
599 int literal_context_mode;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200600};
601
602struct MetaBlock {
603 std::vector<Command> cmds;
604 EncodingParams params;
605 BlockSplit literal_split;
606 BlockSplit command_split;
607 BlockSplit distance_split;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100608 std::vector<int> literal_context_modes;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200609 std::vector<int> literal_context_map;
610 std::vector<int> distance_context_map;
611 std::vector<HistogramLiteral> literal_histograms;
612 std::vector<HistogramCommand> command_histograms;
613 std::vector<HistogramDistance> distance_histograms;
614};
615
616void BuildMetaBlock(const EncodingParams& params,
617 const std::vector<Command>& cmds,
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100618 const uint8_t* ringbuffer,
619 const size_t pos,
620 const size_t mask,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200621 MetaBlock* mb) {
622 mb->cmds = cmds;
623 mb->params = params;
624 ComputeCommandPrefixes(&mb->cmds,
625 mb->params.num_direct_distance_codes,
626 mb->params.distance_postfix_bits);
627 SplitBlock(mb->cmds,
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100628 &ringbuffer[pos & mask],
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200629 &mb->literal_split,
630 &mb->command_split,
631 &mb->distance_split);
632 ComputeBlockTypeShortCodes(&mb->literal_split);
633 ComputeBlockTypeShortCodes(&mb->command_split);
634 ComputeBlockTypeShortCodes(&mb->distance_split);
635
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100636 mb->literal_context_modes.resize(mb->literal_split.num_types_,
637 mb->params.literal_context_mode);
638
639
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200640 int num_literal_contexts =
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100641 mb->literal_split.num_types_ << kLiteralContextBits;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200642 int num_distance_contexts =
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100643 mb->distance_split.num_types_ << kDistanceContextBits;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200644 std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
645 mb->command_histograms.resize(mb->command_split.num_types_);
646 std::vector<HistogramDistance> distance_histograms(num_distance_contexts);
647 BuildHistograms(mb->cmds,
648 mb->literal_split,
649 mb->command_split,
650 mb->distance_split,
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100651 ringbuffer,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200652 pos,
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100653 mask,
654 mb->literal_context_modes,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200655 &literal_histograms,
656 &mb->command_histograms,
657 &distance_histograms);
658
659 // Histogram ids need to fit in one byte and there are 16 ids reserved for
660 // run length codes, which leaves a maximum number of 240 histograms.
661 static const int kMaxNumberOfHistograms = 240;
662
663 mb->literal_histograms = literal_histograms;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100664 ClusterHistograms(literal_histograms,
665 1 << kLiteralContextBits,
666 mb->literal_split.num_types_,
667 kMaxNumberOfHistograms,
668 &mb->literal_histograms,
669 &mb->literal_context_map);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200670
671 mb->distance_histograms = distance_histograms;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100672 ClusterHistograms(distance_histograms,
673 1 << kDistanceContextBits,
674 mb->distance_split.num_types_,
675 kMaxNumberOfHistograms,
676 &mb->distance_histograms,
677 &mb->distance_context_map);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200678}
679
680size_t MetaBlockLength(const std::vector<Command>& cmds) {
681 size_t length = 0;
682 for (int i = 0; i < cmds.size(); ++i) {
683 const Command& cmd = cmds[i];
684 length += cmd.insert_length_ + cmd.copy_length_;
685 }
686 return length;
687}
688
689void StoreMetaBlock(const MetaBlock& mb,
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100690 const uint8_t* ringbuffer,
691 const size_t mask,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200692 size_t* pos,
693 int* storage_ix, uint8_t* storage) {
694 size_t length = MetaBlockLength(mb.cmds);
695 const size_t end_pos = *pos + length;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100696 EncodeMetaBlockLength(length - 1,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200697 storage_ix, storage);
698 BlockSplitCode literal_split_code;
699 BlockSplitCode command_split_code;
700 BlockSplitCode distance_split_code;
701 BuildAndEncodeBlockSplitCode(mb.literal_split, &literal_split_code,
702 storage_ix, storage);
703 BuildAndEncodeBlockSplitCode(mb.command_split, &command_split_code,
704 storage_ix, storage);
705 BuildAndEncodeBlockSplitCode(mb.distance_split, &distance_split_code,
706 storage_ix, storage);
707 WriteBits(2, mb.params.distance_postfix_bits, storage_ix, storage);
708 WriteBits(4,
709 mb.params.num_direct_distance_codes >>
710 mb.params.distance_postfix_bits, storage_ix, storage);
711 int num_distance_codes =
712 kNumDistanceShortCodes + mb.params.num_direct_distance_codes +
713 (48 << mb.params.distance_postfix_bits);
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100714 for (int i = 0; i < mb.literal_split.num_types_; ++i) {
715 WriteBits(2, mb.literal_context_modes[i], storage_ix, storage);
716 }
717 EncodeContextMap(mb.literal_context_map, mb.literal_histograms.size(), storage_ix, storage);
718 EncodeContextMap(mb.distance_context_map, mb.distance_histograms.size(), storage_ix, storage);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200719 std::vector<EntropyCodeLiteral> literal_codes;
720 std::vector<EntropyCodeCommand> command_codes;
721 std::vector<EntropyCodeDistance> distance_codes;
722 BuildEntropyCodes(mb.literal_histograms, 256, &literal_codes);
723 BuildEntropyCodes(mb.command_histograms, kNumCommandPrefixes,
724 &command_codes);
725 BuildEntropyCodes(mb.distance_histograms, num_distance_codes,
726 &distance_codes);
727 StoreHuffmanCodes(literal_codes, 256, storage_ix, storage);
728 StoreHuffmanCodes(command_codes, kNumCommandPrefixes, storage_ix, storage);
729 StoreHuffmanCodes(distance_codes, num_distance_codes, storage_ix, storage);
730 BlockSplitIterator literal_it(mb.literal_split);
731 BlockSplitIterator command_it(mb.command_split);
732 BlockSplitIterator distance_it(mb.distance_split);
733 for (int i = 0; i < mb.cmds.size(); ++i) {
734 const Command& cmd = mb.cmds[i];
735 MoveAndEncode(command_split_code, &command_it, storage_ix, storage);
736 EncodeCommand(cmd, command_codes[command_it.type_], storage_ix, storage);
737 for (int j = 0; j < cmd.insert_length_; ++j) {
738 MoveAndEncode(literal_split_code, &literal_it, storage_ix, storage);
739 int histogram_idx = literal_it.type_;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100740 uint8_t prev_byte = *pos > 0 ? ringbuffer[(*pos - 1) & mask] : 0;
741 uint8_t prev_byte2 = *pos > 1 ? ringbuffer[(*pos - 2) & mask] : 0;
742 int context = ((literal_it.type_ << kLiteralContextBits) +
743 Context(prev_byte, prev_byte2,
744 mb.literal_context_modes[literal_it.type_]));
745 histogram_idx = mb.literal_context_map[context];
746 EntropyEncode(ringbuffer[*pos & mask],
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200747 literal_codes[histogram_idx], storage_ix, storage);
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100748 ++(*pos);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200749 }
750 if (*pos < end_pos && cmd.distance_prefix_ != 0xffff) {
751 MoveAndEncode(distance_split_code, &distance_it, storage_ix, storage);
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100752 int context = (distance_it.type_ << 2) +
Roderick Sheeter437bbad2013-11-19 14:32:56 -0800753 ((cmd.copy_length_code_ > 4) ? 3 : cmd.copy_length_code_ - 2);
754 int histogram_index = mb.distance_context_map[context];
755 size_t max_distance = std::min(*pos, (size_t)kMaxBackwardDistance);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200756 EncodeCopyDistance(cmd, distance_codes[histogram_index],
757 storage_ix, storage);
758 }
759 *pos += cmd.copy_length_;
760 }
761}
762
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100763BrotliCompressor::BrotliCompressor()
Roderick Sheeter437bbad2013-11-19 14:32:56 -0800764 : window_bits_(kWindowBits),
765 hasher_(new Hasher),
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100766 dist_ringbuffer_idx_(0),
767 input_pos_(0),
768 ringbuffer_(kRingBufferBits, kMetaBlockSizeBits),
769 literal_cost_(1 << kRingBufferBits),
770 storage_ix_(0),
771 storage_(new uint8_t[2 << kMetaBlockSizeBits]) {
Roderick Sheeter437bbad2013-11-19 14:32:56 -0800772 dist_ringbuffer_[0] = 4;
773 dist_ringbuffer_[1] = 11;
774 dist_ringbuffer_[2] = 15;
775 dist_ringbuffer_[3] = 16;
776 storage_[0] = 0;
777}
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100778
779BrotliCompressor::~BrotliCompressor() {
780 delete hasher_;
781 delete[] storage_;
782}
783
784void BrotliCompressor::WriteStreamHeader() {
785 // Don't encode input size.
786 WriteBits(3, 0, &storage_ix_, storage_);
787 // Encode window size.
Roderick Sheeter437bbad2013-11-19 14:32:56 -0800788 if (window_bits_ == 16) {
789 WriteBits(1, 0, &storage_ix_, storage_);
790 } else {
791 WriteBits(1, 1, &storage_ix_, storage_);
792 WriteBits(3, window_bits_ - 17, &storage_ix_, storage_);
793 }
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100794}
795
796void BrotliCompressor::WriteMetaBlock(const size_t input_size,
797 const uint8_t* input_buffer,
798 size_t* encoded_size,
799 uint8_t* encoded_buffer) {
800 ringbuffer_.Write(input_buffer, input_size);
801 EstimateBitCostsForLiterals(input_pos_, input_size,
802 kRingBufferMask, ringbuffer_.start(),
803 &literal_cost_[0]);
804 std::vector<Command> commands;
805 CreateBackwardReferences(input_size, input_pos_,
806 ringbuffer_.start(),
807 &literal_cost_[0],
808 kRingBufferMask, kMaxBackwardDistance,
809 hasher_,
810 &commands);
811 ComputeDistanceShortCodes(&commands, dist_ringbuffer_,
812 &dist_ringbuffer_idx_);
813 EncodingParams params;
814 params.num_direct_distance_codes = 12;
815 params.distance_postfix_bits = 1;
816 params.literal_context_mode = CONTEXT_SIGNED;
817 MetaBlock mb;
818 BuildMetaBlock(params, commands, ringbuffer_.start(), input_pos_,
819 kRingBufferMask, &mb);
820 StoreMetaBlock(mb, ringbuffer_.start(), kRingBufferMask,
821 &input_pos_, &storage_ix_, storage_);
822 size_t output_size = storage_ix_ >> 3;
823 memcpy(encoded_buffer, storage_, output_size);
824 *encoded_size = output_size;
825 storage_ix_ -= output_size << 3;
826 storage_[storage_ix_ >> 3] = storage_[output_size];
827}
828
829void BrotliCompressor::FinishStream(
830 size_t* encoded_size, uint8_t* encoded_buffer) {
831 WriteBits(1, 1, &storage_ix_, storage_);
832 *encoded_size = (storage_ix_ + 7) >> 3;
833 memcpy(encoded_buffer, storage_, *encoded_size);
834}
835
836
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200837int BrotliCompressBuffer(size_t input_size,
838 const uint8_t* input_buffer,
839 size_t* encoded_size,
840 uint8_t* encoded_buffer) {
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200841 if (input_size == 0) {
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100842 encoded_buffer[0] = 1;
843 encoded_buffer[1] = 0;
844 *encoded_size = 2;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200845 return 1;
846 }
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200847
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100848 BrotliCompressor compressor;
849 compressor.WriteStreamHeader();
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200850
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100851 const int max_block_size = 1 << kMetaBlockSizeBits;
852 size_t max_output_size = *encoded_size;
853 const uint8_t* input_end = input_buffer + input_size;
854 *encoded_size = 0;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200855
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100856 while (input_buffer < input_end) {
857 int block_size = max_block_size;
858 if (block_size >= input_end - input_buffer) {
859 block_size = input_end - input_buffer;
860 }
861 size_t output_size = max_output_size;
862 compressor.WriteMetaBlock(block_size, input_buffer,
863 &output_size, &encoded_buffer[*encoded_size]);
864 input_buffer += block_size;
865 *encoded_size += output_size;
866 max_output_size -= output_size;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200867 }
868
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100869 size_t output_size = max_output_size;
870 compressor.FinishStream(&output_size, &encoded_buffer[*encoded_size]);
871 *encoded_size += output_size;
872
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200873 return 1;
874}
875
876} // namespace brotli