blob: 7d54dbece637a40be6646ae31e0a6f126cd78597 [file] [log] [blame]
Zoltan Szabadka79e99af2013-10-23 13:06:13 +02001// Copyright 2013 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Implementation of Brotli compressor.
16
17#include "./encode.h"
18
19#include <algorithm>
20#include <limits>
21
22#include "./backward_references.h"
23#include "./bit_cost.h"
24#include "./block_splitter.h"
25#include "./cluster.h"
26#include "./context.h"
27#include "./entropy_encode.h"
28#include "./fast_log.h"
Zoltan Szabadka1571db32013-11-15 19:02:17 +010029#include "./hash.h"
Zoltan Szabadka79e99af2013-10-23 13:06:13 +020030#include "./histogram.h"
Zoltan Szabadka1571db32013-11-15 19:02:17 +010031#include "./literal_cost.h"
Zoltan Szabadka79e99af2013-10-23 13:06:13 +020032#include "./prefix.h"
33#include "./write_bits.h"
34
35namespace brotli {
36
Roderick Sheeter437bbad2013-11-19 14:32:56 -080037static const int kWindowBits = 22;
38// To make decoding faster, we allow the decoder to write 16 bytes ahead in
39// its ringbuffer, therefore the encoder has to decrease max distance by this
40// amount.
41static const int kDecoderRingBufferWriteAheadSlack = 16;
42static const int kMaxBackwardDistance =
43 (1 << kWindowBits) - kDecoderRingBufferWriteAheadSlack;
44
45static const int kMetaBlockSizeBits = 21;
46static const int kRingBufferBits = 23;
47static const int kRingBufferMask = (1 << kRingBufferBits) - 1;
48
Zoltan Szabadka79e99af2013-10-23 13:06:13 +020049template<int kSize>
50double Entropy(const std::vector<Histogram<kSize> >& histograms) {
51 double retval = 0;
52 for (int i = 0; i < histograms.size(); ++i) {
53 retval += histograms[i].EntropyBitCost();
54 }
55 return retval;
56}
57
Zoltan Szabadka1571db32013-11-15 19:02:17 +010058template<int kSize>
59double TotalBitCost(const std::vector<Histogram<kSize> >& histograms) {
60 double retval = 0;
61 for (int i = 0; i < histograms.size(); ++i) {
62 retval += PopulationCost(histograms[i]);
63 }
64 return retval;
65}
66
Zoltan Szabadkae7094912013-12-12 13:18:04 +010067void EncodeVarLenUint8(int n, int* storage_ix, uint8_t* storage) {
68 if (n == 0) {
69 WriteBits(1, 0, storage_ix, storage);
70 } else {
71 WriteBits(1, 1, storage_ix, storage);
72 int nbits = Log2Floor(n);
73 WriteBits(3, nbits, storage_ix, storage);
74 if (nbits > 0) {
75 WriteBits(nbits, n - (1 << nbits), storage_ix, storage);
76 }
Zoltan Szabadka79e99af2013-10-23 13:06:13 +020077 }
78}
79
Zoltan Szabadka1571db32013-11-15 19:02:17 +010080void EncodeMetaBlockLength(size_t meta_block_size,
Zoltan Szabadkae7094912013-12-12 13:18:04 +010081 bool is_last,
82 bool is_uncompressed,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +020083 int* storage_ix, uint8_t* storage) {
Zoltan Szabadkae7094912013-12-12 13:18:04 +010084 WriteBits(1, is_last, storage_ix, storage);
85 if (is_last) {
86 if (meta_block_size == 0) {
87 WriteBits(1, 1, storage_ix, storage);
88 return;
89 }
90 WriteBits(1, 0, storage_ix, storage);
91 }
92 --meta_block_size;
Zoltan Szabadka1571db32013-11-15 19:02:17 +010093 int num_bits = Log2Floor(meta_block_size) + 1;
Zoltan Szabadkafe79fac2013-11-28 17:37:13 +010094 if (num_bits < 16) {
95 num_bits = 16;
96 }
97 WriteBits(2, (num_bits - 13) >> 2, storage_ix, storage);
Zoltan Szabadka1571db32013-11-15 19:02:17 +010098 while (num_bits > 0) {
99 WriteBits(4, meta_block_size & 0xf, storage_ix, storage);
100 meta_block_size >>= 4;
101 num_bits -= 4;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200102 }
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100103 if (!is_last) {
104 WriteBits(1, is_uncompressed, storage_ix, storage);
105 }
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200106}
107
108template<int kSize>
109void EntropyEncode(int val, const EntropyCode<kSize>& code,
110 int* storage_ix, uint8_t* storage) {
111 if (code.count_ <= 1) {
112 return;
113 };
114 WriteBits(code.depth_[val], code.bits_[val], storage_ix, storage);
115}
116
117void StoreHuffmanTreeOfHuffmanTreeToBitMask(
118 const uint8_t* code_length_bitdepth,
119 int* storage_ix, uint8_t* storage) {
120 static const uint8_t kStorageOrder[kCodeLengthCodes] = {
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100121 1, 2, 3, 4, 0, 17, 5, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200122 };
123 // Throw away trailing zeros:
124 int codes_to_store = kCodeLengthCodes;
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100125 for (; codes_to_store > 3; --codes_to_store) {
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200126 if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
127 break;
128 }
129 }
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100130 WriteBits(4, codes_to_store - 3, storage_ix, storage);
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100131 const int skip_two_first =
132 code_length_bitdepth[kStorageOrder[0]] == 0 &&
133 code_length_bitdepth[kStorageOrder[1]] == 0;
134 WriteBits(1, skip_two_first, storage_ix, storage);
135
136 for (int i = skip_two_first * 2; i < codes_to_store; ++i) {
137 uint8_t len[] = { 2, 4, 3, 2, 2, 4 };
Zoltan Szabadkafe79fac2013-11-28 17:37:13 +0100138 uint8_t bits[] = { 0, 5, 1, 3, 2, 13 };
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100139 int v = code_length_bitdepth[kStorageOrder[i]];
140 WriteBits(len[v], bits[v], storage_ix, storage);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200141 }
142}
143
144void StoreHuffmanTreeToBitMask(
145 const uint8_t* huffman_tree,
146 const uint8_t* huffman_tree_extra_bits,
147 const int huffman_tree_size,
148 const EntropyCode<kCodeLengthCodes>& entropy,
149 int* storage_ix, uint8_t* storage) {
150 for (int i = 0; i < huffman_tree_size; ++i) {
151 const int ix = huffman_tree[i];
152 const int extra_bits = huffman_tree_extra_bits[i];
153 EntropyEncode(ix, entropy, storage_ix, storage);
154 switch (ix) {
155 case 16:
156 WriteBits(2, extra_bits, storage_ix, storage);
157 break;
158 case 17:
159 WriteBits(3, extra_bits, storage_ix, storage);
160 break;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200161 }
162 }
163}
164
165template<int kSize>
166void StoreHuffmanCode(const EntropyCode<kSize>& code, int alphabet_size,
167 int* storage_ix, uint8_t* storage) {
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100168 const uint8_t *depth = &code.depth_[0];
169 int max_bits_counter = alphabet_size - 1;
170 int max_bits = 0;
171 while (max_bits_counter) {
172 max_bits_counter >>= 1;
173 ++max_bits;
174 }
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200175 if (code.count_ == 0) { // emit minimal tree for empty cases
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100176 // bits: small tree marker: 1, count-1: 0, max_bits-sized encoding for 0
177 WriteBits(3 + max_bits, 0x01, storage_ix, storage);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200178 return;
179 }
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100180 if (code.count_ <= 4) {
181 int symbols[4];
182 // Quadratic sort.
183 int k, j;
184 for (k = 0; k < code.count_; ++k) {
185 symbols[k] = code.symbols_[k];
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200186 }
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100187 for (k = 0; k < code.count_; ++k) {
188 for (j = k + 1; j < code.count_; ++j) {
189 if (depth[symbols[j]] < depth[symbols[k]]) {
190 int t = symbols[k];
191 symbols[k] = symbols[j];
192 symbols[j] = t;
193 }
194 }
195 }
196 // Small tree marker to encode 1-4 symbols.
197 WriteBits(1, 1, storage_ix, storage);
198 WriteBits(2, code.count_ - 1, storage_ix, storage);
199 for (int i = 0; i < code.count_; ++i) {
200 WriteBits(max_bits, symbols[i], storage_ix, storage);
201 }
202 if (code.count_ == 4) {
203 if (depth[symbols[0]] == 2 &&
204 depth[symbols[1]] == 2 &&
205 depth[symbols[2]] == 2 &&
206 depth[symbols[3]] == 2) {
207 WriteBits(1, 0, storage_ix, storage);
208 } else {
209 WriteBits(1, 1, storage_ix, storage);
210 }
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200211 }
212 return;
213 }
214 WriteBits(1, 0, storage_ix, storage);
215
216 uint8_t huffman_tree[kSize];
217 uint8_t huffman_tree_extra_bits[kSize];
218 int huffman_tree_size = 0;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100219 WriteHuffmanTree(depth,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200220 alphabet_size,
221 &huffman_tree[0],
222 &huffman_tree_extra_bits[0],
223 &huffman_tree_size);
224 Histogram<kCodeLengthCodes> huffman_tree_histogram;
225 memset(huffman_tree_histogram.data_, 0, sizeof(huffman_tree_histogram.data_));
226 for (int i = 0; i < huffman_tree_size; ++i) {
227 huffman_tree_histogram.Add(huffman_tree[i]);
228 }
229 EntropyCode<kCodeLengthCodes> huffman_tree_entropy;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100230 BuildEntropyCode(huffman_tree_histogram, 5, kCodeLengthCodes,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200231 &huffman_tree_entropy);
232 Histogram<kCodeLengthCodes> trimmed_histogram = huffman_tree_histogram;
233 uint8_t* last_code = &huffman_tree[huffman_tree_size - 1];
234 while (*last_code == 0 || *last_code >= 17) {
235 trimmed_histogram.Remove(*last_code--);
236 }
237 int trimmed_size = trimmed_histogram.total_count_;
238 bool write_length = false;
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100239 if (trimmed_size >= 4 && trimmed_size <= 195 &&
240 trimmed_size < huffman_tree_size) {
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200241 EntropyCode<kCodeLengthCodes> trimmed_entropy;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100242 BuildEntropyCode(trimmed_histogram, 5, kCodeLengthCodes, &trimmed_entropy);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200243 int huffman_bit_cost = HuffmanTreeBitCost(huffman_tree_histogram,
244 huffman_tree_entropy);
245 int trimmed_bit_cost = HuffmanTreeBitCost(trimmed_histogram,
246 trimmed_entropy);;
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100247 trimmed_bit_cost += (trimmed_size < 68 ? 7 : 8);
248 if (trimmed_bit_cost < huffman_bit_cost) {
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200249 write_length = true;
250 huffman_tree_size = trimmed_size;
251 huffman_tree_entropy = trimmed_entropy;
252 }
253 }
254
255 StoreHuffmanTreeOfHuffmanTreeToBitMask(
256 &huffman_tree_entropy.depth_[0], storage_ix, storage);
257 WriteBits(1, write_length, storage_ix, storage);
258 if (write_length) {
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100259 WriteBits(1, huffman_tree_size >= 68, storage_ix, storage);
260 if (huffman_tree_size < 68) {
261 WriteBits(6, huffman_tree_size - 4, storage_ix, storage);
262 } else {
263 WriteBits(7, huffman_tree_size - 68, storage_ix, storage);
264 }
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200265 }
266 StoreHuffmanTreeToBitMask(&huffman_tree[0], &huffman_tree_extra_bits[0],
267 huffman_tree_size, huffman_tree_entropy,
268 storage_ix, storage);
269}
270
271template<int kSize>
272void StoreHuffmanCodes(const std::vector<EntropyCode<kSize> >& codes,
273 int alphabet_size,
274 int* storage_ix, uint8_t* storage) {
275 for (int i = 0; i < codes.size(); ++i) {
276 StoreHuffmanCode(codes[i], alphabet_size, storage_ix, storage);
277 }
278}
279
280void EncodeCommand(const Command& cmd,
281 const EntropyCodeCommand& entropy,
282 int* storage_ix, uint8_t* storage) {
283 int code = cmd.command_prefix_;
284 EntropyEncode(code, entropy, storage_ix, storage);
285 if (code >= 128) {
286 code -= 128;
287 }
288 int insert_extra_bits = InsertLengthExtraBits(code);
289 uint64_t insert_extra_bits_val =
290 cmd.insert_length_ - InsertLengthOffset(code);
291 int copy_extra_bits = CopyLengthExtraBits(code);
Roderick Sheeter437bbad2013-11-19 14:32:56 -0800292 uint64_t copy_extra_bits_val = cmd.copy_length_code_ - CopyLengthOffset(code);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200293 if (insert_extra_bits > 0) {
294 WriteBits(insert_extra_bits, insert_extra_bits_val, storage_ix, storage);
295 }
296 if (copy_extra_bits > 0) {
297 WriteBits(copy_extra_bits, copy_extra_bits_val, storage_ix, storage);
298 }
299}
300
301void EncodeCopyDistance(const Command& cmd, const EntropyCodeDistance& entropy,
302 int* storage_ix, uint8_t* storage) {
303 int code = cmd.distance_prefix_;
304 int extra_bits = cmd.distance_extra_bits_;
305 uint64_t extra_bits_val = cmd.distance_extra_bits_value_;
306 EntropyEncode(code, entropy, storage_ix, storage);
307 if (extra_bits > 0) {
308 WriteBits(extra_bits, extra_bits_val, storage_ix, storage);
309 }
310}
311
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100312void ComputeDistanceShortCodes(std::vector<Command>* cmds,
313 int* dist_ringbuffer,
314 size_t* ringbuffer_idx) {
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200315 static const int kIndexOffset[16] = {
316 3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2
317 };
318 static const int kValueOffset[16] = {
319 0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
320 };
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200321 for (int i = 0; i < cmds->size(); ++i) {
322 int cur_dist = (*cmds)[i].copy_distance_;
323 if (cur_dist == 0) break;
324 int dist_code = cur_dist + 16;
325 for (int k = 0; k < 16; ++k) {
326 // Only accept more popular choices.
327 if (cur_dist < 11 && ((k >= 2 && k < 4) || k >= 6)) {
328 // Typically unpopular ranges, don't replace a short distance
329 // with them.
330 continue;
331 }
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100332 int comp = (dist_ringbuffer[(*ringbuffer_idx + kIndexOffset[k]) & 3] +
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200333 kValueOffset[k]);
334 if (cur_dist == comp) {
335 dist_code = k + 1;
336 break;
337 }
338 }
339 if (dist_code > 1) {
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100340 dist_ringbuffer[*ringbuffer_idx & 3] = cur_dist;
341 ++(*ringbuffer_idx);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200342 }
343 (*cmds)[i].distance_code_ = dist_code;
344 }
345}
346
347void ComputeCommandPrefixes(std::vector<Command>* cmds,
348 int num_direct_distance_codes,
349 int distance_postfix_bits) {
350 for (int i = 0; i < cmds->size(); ++i) {
351 Command* cmd = &(*cmds)[i];
352 cmd->command_prefix_ = CommandPrefix(cmd->insert_length_,
Roderick Sheeter437bbad2013-11-19 14:32:56 -0800353 cmd->copy_length_code_);
354 if (cmd->copy_length_code_ > 0) {
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200355 PrefixEncodeCopyDistance(cmd->distance_code_,
356 num_direct_distance_codes,
357 distance_postfix_bits,
358 &cmd->distance_prefix_,
359 &cmd->distance_extra_bits_,
360 &cmd->distance_extra_bits_value_);
361 }
362 if (cmd->command_prefix_ < 128 && cmd->distance_prefix_ == 0) {
363 cmd->distance_prefix_ = 0xffff;
364 } else {
365 cmd->command_prefix_ += 128;
366 }
367 }
368}
369
370int IndexOf(const std::vector<int>& v, int value) {
371 for (int i = 0; i < v.size(); ++i) {
372 if (v[i] == value) return i;
373 }
374 return -1;
375}
376
377void MoveToFront(std::vector<int>* v, int index) {
378 int value = (*v)[index];
379 for (int i = index; i > 0; --i) {
380 (*v)[i] = (*v)[i - 1];
381 }
382 (*v)[0] = value;
383}
384
385std::vector<int> MoveToFrontTransform(const std::vector<int>& v) {
386 if (v.empty()) return v;
387 std::vector<int> mtf(*max_element(v.begin(), v.end()) + 1);
388 for (int i = 0; i < mtf.size(); ++i) mtf[i] = i;
389 std::vector<int> result(v.size());
390 for (int i = 0; i < v.size(); ++i) {
391 int index = IndexOf(mtf, v[i]);
392 result[i] = index;
393 MoveToFront(&mtf, index);
394 }
395 return result;
396}
397
398// Finds runs of zeros in v_in and replaces them with a prefix code of the run
399// length plus extra bits in *v_out and *extra_bits. Non-zero values in v_in are
400// shifted by *max_length_prefix. Will not create prefix codes bigger than the
401// initial value of *max_run_length_prefix. The prefix code of run length L is
402// simply Log2Floor(L) and the number of extra bits is the same as the prefix
403// code.
404void RunLengthCodeZeros(const std::vector<int>& v_in,
405 int* max_run_length_prefix,
406 std::vector<int>* v_out,
407 std::vector<int>* extra_bits) {
408 int max_reps = 0;
409 for (int i = 0; i < v_in.size();) {
410 for (; i < v_in.size() && v_in[i] != 0; ++i) ;
411 int reps = 0;
412 for (; i < v_in.size() && v_in[i] == 0; ++i) {
413 ++reps;
414 }
415 max_reps = std::max(reps, max_reps);
416 }
417 int max_prefix = max_reps > 0 ? Log2Floor(max_reps) : 0;
418 *max_run_length_prefix = std::min(max_prefix, *max_run_length_prefix);
419 for (int i = 0; i < v_in.size();) {
420 if (v_in[i] != 0) {
421 v_out->push_back(v_in[i] + *max_run_length_prefix);
422 extra_bits->push_back(0);
423 ++i;
424 } else {
425 int reps = 1;
426 for (uint32_t k = i + 1; k < v_in.size() && v_in[k] == 0; ++k) {
427 ++reps;
428 }
429 i += reps;
430 while (reps) {
431 if (reps < (2 << *max_run_length_prefix)) {
432 int run_length_prefix = Log2Floor(reps);
433 v_out->push_back(run_length_prefix);
434 extra_bits->push_back(reps - (1 << run_length_prefix));
435 break;
436 } else {
437 v_out->push_back(*max_run_length_prefix);
438 extra_bits->push_back((1 << *max_run_length_prefix) - 1);
439 reps -= (2 << *max_run_length_prefix) - 1;
440 }
441 }
442 }
443 }
444}
445
446// Returns a maximum zero-run-length-prefix value such that run-length coding
447// zeros in v with this maximum prefix value and then encoding the resulting
448// histogram and entropy-coding v produces the least amount of bits.
449int BestMaxZeroRunLengthPrefix(const std::vector<int>& v) {
450 int min_cost = std::numeric_limits<int>::max();
451 int best_max_prefix = 0;
452 for (int max_prefix = 0; max_prefix <= 16; ++max_prefix) {
453 std::vector<int> rle_symbols;
454 std::vector<int> extra_bits;
455 int max_run_length_prefix = max_prefix;
456 RunLengthCodeZeros(v, &max_run_length_prefix, &rle_symbols, &extra_bits);
457 if (max_run_length_prefix < max_prefix) break;
458 HistogramLiteral histogram;
459 for (int i = 0; i < rle_symbols.size(); ++i) {
460 histogram.Add(rle_symbols[i]);
461 }
462 int bit_cost = PopulationCost(histogram);
463 if (max_prefix > 0) {
464 bit_cost += 4;
465 }
466 for (int i = 1; i <= max_prefix; ++i) {
467 bit_cost += histogram.data_[i] * i; // extra bits
468 }
469 if (bit_cost < min_cost) {
470 min_cost = bit_cost;
471 best_max_prefix = max_prefix;
472 }
473 }
474 return best_max_prefix;
475}
476
477void EncodeContextMap(const std::vector<int>& context_map,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200478 int num_clusters,
479 int* storage_ix, uint8_t* storage) {
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100480 EncodeVarLenUint8(num_clusters - 1, storage_ix, storage);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200481
Roderick Sheeter437bbad2013-11-19 14:32:56 -0800482 if (num_clusters == 1) {
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200483 return;
484 }
485
486 std::vector<int> transformed_symbols = MoveToFrontTransform(context_map);
487 std::vector<int> rle_symbols;
488 std::vector<int> extra_bits;
489 int max_run_length_prefix = BestMaxZeroRunLengthPrefix(transformed_symbols);
490 RunLengthCodeZeros(transformed_symbols, &max_run_length_prefix,
491 &rle_symbols, &extra_bits);
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100492 HistogramContextMap symbol_histogram;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200493 for (int i = 0; i < rle_symbols.size(); ++i) {
494 symbol_histogram.Add(rle_symbols[i]);
495 }
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100496 EntropyCodeContextMap symbol_code;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200497 BuildEntropyCode(symbol_histogram, 15, num_clusters + max_run_length_prefix,
498 &symbol_code);
499 bool use_rle = max_run_length_prefix > 0;
500 WriteBits(1, use_rle, storage_ix, storage);
501 if (use_rle) {
502 WriteBits(4, max_run_length_prefix - 1, storage_ix, storage);
503 }
504 StoreHuffmanCode(symbol_code, num_clusters + max_run_length_prefix,
505 storage_ix, storage);
506 for (int i = 0; i < rle_symbols.size(); ++i) {
507 EntropyEncode(rle_symbols[i], symbol_code, storage_ix, storage);
508 if (rle_symbols[i] > 0 && rle_symbols[i] <= max_run_length_prefix) {
509 WriteBits(rle_symbols[i], extra_bits[i], storage_ix, storage);
510 }
511 }
512 WriteBits(1, 1, storage_ix, storage); // use move-to-front
513}
514
515template<int kSize>
516void BuildEntropyCodes(const std::vector<Histogram<kSize> >& histograms,
517 int alphabet_size,
518 std::vector<EntropyCode<kSize> >* entropy_codes) {
519 entropy_codes->resize(histograms.size());
520 for (int i = 0; i < histograms.size(); ++i) {
521 BuildEntropyCode(histograms[i], 15, alphabet_size, &(*entropy_codes)[i]);
522 }
523}
524
525struct BlockSplitCode {
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100526 EntropyCodeBlockType block_type_code;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200527 EntropyCodeBlockLength block_len_code;
528};
529
530void EncodeBlockLength(const EntropyCodeBlockLength& entropy,
531 int length,
532 int* storage_ix, uint8_t* storage) {
533 int len_code = BlockLengthPrefix(length);
534 int extra_bits = BlockLengthExtraBits(len_code);
535 int extra_bits_value = length - BlockLengthOffset(len_code);
536 EntropyEncode(len_code, entropy, storage_ix, storage);
537
538 if (extra_bits > 0) {
539 WriteBits(extra_bits, extra_bits_value, storage_ix, storage);
540 }
541}
542
543void ComputeBlockTypeShortCodes(BlockSplit* split) {
544 if (split->num_types_ <= 1) {
545 split->num_types_ = 1;
546 return;
547 }
548 int ringbuffer[2] = { 0, 1 };
549 size_t index = 0;
550 for (int i = 0; i < split->types_.size(); ++i) {
551 int type = split->types_[i];
552 int type_code;
553 if (type == ringbuffer[index & 1]) {
554 type_code = 0;
555 } else if (type == ringbuffer[(index - 1) & 1] + 1) {
556 type_code = 1;
557 } else {
558 type_code = type + 2;
559 }
560 ringbuffer[index & 1] = type;
561 ++index;
562 split->type_codes_.push_back(type_code);
563 }
564}
565
566void BuildAndEncodeBlockSplitCode(const BlockSplit& split,
567 BlockSplitCode* code,
568 int* storage_ix, uint8_t* storage) {
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100569 EncodeVarLenUint8(split.num_types_ - 1, storage_ix, storage);
570 if (split.num_types_ == 1) {
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200571 return;
572 }
Zoltan Szabadkafe79fac2013-11-28 17:37:13 +0100573
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100574 HistogramBlockType type_histo;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200575 for (int i = 0; i < split.type_codes_.size(); ++i) {
576 type_histo.Add(split.type_codes_[i]);
577 }
578 BuildEntropyCode(type_histo, 15, split.num_types_ + 2,
579 &code->block_type_code);
580 HistogramBlockLength length_histo;
581 for (int i = 0; i < split.lengths_.size(); ++i) {
582 length_histo.Add(BlockLengthPrefix(split.lengths_[i]));
583 }
584 BuildEntropyCode(length_histo, 15, kNumBlockLenPrefixes,
585 &code->block_len_code);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200586 StoreHuffmanCode(code->block_type_code, split.num_types_ + 2,
587 storage_ix, storage);
588 StoreHuffmanCode(code->block_len_code, kNumBlockLenPrefixes,
589 storage_ix, storage);
590 EncodeBlockLength(code->block_len_code, split.lengths_[0],
591 storage_ix, storage);
592}
593
594void MoveAndEncode(const BlockSplitCode& code,
595 BlockSplitIterator* it,
596 int* storage_ix, uint8_t* storage) {
597 if (it->length_ == 0) {
598 ++it->idx_;
599 it->type_ = it->split_.types_[it->idx_];
600 it->length_ = it->split_.lengths_[it->idx_];
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100601 int type_code = it->split_.type_codes_[it->idx_];
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200602 EntropyEncode(type_code, code.block_type_code, storage_ix, storage);
603 EncodeBlockLength(code.block_len_code, it->length_, storage_ix, storage);
604 }
605 --it->length_;
606}
607
608struct EncodingParams {
609 int num_direct_distance_codes;
610 int distance_postfix_bits;
611 int literal_context_mode;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200612};
613
614struct MetaBlock {
615 std::vector<Command> cmds;
616 EncodingParams params;
617 BlockSplit literal_split;
618 BlockSplit command_split;
619 BlockSplit distance_split;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100620 std::vector<int> literal_context_modes;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200621 std::vector<int> literal_context_map;
622 std::vector<int> distance_context_map;
623 std::vector<HistogramLiteral> literal_histograms;
624 std::vector<HistogramCommand> command_histograms;
625 std::vector<HistogramDistance> distance_histograms;
626};
627
628void BuildMetaBlock(const EncodingParams& params,
629 const std::vector<Command>& cmds,
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100630 const uint8_t* ringbuffer,
631 const size_t pos,
632 const size_t mask,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200633 MetaBlock* mb) {
634 mb->cmds = cmds;
635 mb->params = params;
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100636 if (cmds.empty()) {
637 return;
638 }
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200639 ComputeCommandPrefixes(&mb->cmds,
640 mb->params.num_direct_distance_codes,
641 mb->params.distance_postfix_bits);
642 SplitBlock(mb->cmds,
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100643 &ringbuffer[pos & mask],
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200644 &mb->literal_split,
645 &mb->command_split,
646 &mb->distance_split);
647 ComputeBlockTypeShortCodes(&mb->literal_split);
648 ComputeBlockTypeShortCodes(&mb->command_split);
649 ComputeBlockTypeShortCodes(&mb->distance_split);
650
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100651 mb->literal_context_modes.resize(mb->literal_split.num_types_,
652 mb->params.literal_context_mode);
653
654
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200655 int num_literal_contexts =
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100656 mb->literal_split.num_types_ << kLiteralContextBits;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200657 int num_distance_contexts =
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100658 mb->distance_split.num_types_ << kDistanceContextBits;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200659 std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
660 mb->command_histograms.resize(mb->command_split.num_types_);
661 std::vector<HistogramDistance> distance_histograms(num_distance_contexts);
662 BuildHistograms(mb->cmds,
663 mb->literal_split,
664 mb->command_split,
665 mb->distance_split,
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100666 ringbuffer,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200667 pos,
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100668 mask,
669 mb->literal_context_modes,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200670 &literal_histograms,
671 &mb->command_histograms,
672 &distance_histograms);
673
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100674 // Histogram ids need to fit in one byte.
675 static const int kMaxNumberOfHistograms = 256;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200676
677 mb->literal_histograms = literal_histograms;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100678 ClusterHistograms(literal_histograms,
679 1 << kLiteralContextBits,
680 mb->literal_split.num_types_,
681 kMaxNumberOfHistograms,
682 &mb->literal_histograms,
683 &mb->literal_context_map);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200684
685 mb->distance_histograms = distance_histograms;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100686 ClusterHistograms(distance_histograms,
687 1 << kDistanceContextBits,
688 mb->distance_split.num_types_,
689 kMaxNumberOfHistograms,
690 &mb->distance_histograms,
691 &mb->distance_context_map);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200692}
693
694size_t MetaBlockLength(const std::vector<Command>& cmds) {
695 size_t length = 0;
696 for (int i = 0; i < cmds.size(); ++i) {
697 const Command& cmd = cmds[i];
698 length += cmd.insert_length_ + cmd.copy_length_;
699 }
700 return length;
701}
702
703void StoreMetaBlock(const MetaBlock& mb,
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100704 const bool is_last,
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100705 const uint8_t* ringbuffer,
706 const size_t mask,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200707 size_t* pos,
708 int* storage_ix, uint8_t* storage) {
709 size_t length = MetaBlockLength(mb.cmds);
710 const size_t end_pos = *pos + length;
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100711 EncodeMetaBlockLength(length,
712 is_last,
713 false,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200714 storage_ix, storage);
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100715 if (length == 0) {
716 return;
717 }
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200718 BlockSplitCode literal_split_code;
719 BlockSplitCode command_split_code;
720 BlockSplitCode distance_split_code;
721 BuildAndEncodeBlockSplitCode(mb.literal_split, &literal_split_code,
722 storage_ix, storage);
723 BuildAndEncodeBlockSplitCode(mb.command_split, &command_split_code,
724 storage_ix, storage);
725 BuildAndEncodeBlockSplitCode(mb.distance_split, &distance_split_code,
726 storage_ix, storage);
727 WriteBits(2, mb.params.distance_postfix_bits, storage_ix, storage);
728 WriteBits(4,
729 mb.params.num_direct_distance_codes >>
730 mb.params.distance_postfix_bits, storage_ix, storage);
731 int num_distance_codes =
732 kNumDistanceShortCodes + mb.params.num_direct_distance_codes +
733 (48 << mb.params.distance_postfix_bits);
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100734 for (int i = 0; i < mb.literal_split.num_types_; ++i) {
735 WriteBits(2, mb.literal_context_modes[i], storage_ix, storage);
736 }
737 EncodeContextMap(mb.literal_context_map, mb.literal_histograms.size(), storage_ix, storage);
738 EncodeContextMap(mb.distance_context_map, mb.distance_histograms.size(), storage_ix, storage);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200739 std::vector<EntropyCodeLiteral> literal_codes;
740 std::vector<EntropyCodeCommand> command_codes;
741 std::vector<EntropyCodeDistance> distance_codes;
742 BuildEntropyCodes(mb.literal_histograms, 256, &literal_codes);
743 BuildEntropyCodes(mb.command_histograms, kNumCommandPrefixes,
744 &command_codes);
745 BuildEntropyCodes(mb.distance_histograms, num_distance_codes,
746 &distance_codes);
747 StoreHuffmanCodes(literal_codes, 256, storage_ix, storage);
748 StoreHuffmanCodes(command_codes, kNumCommandPrefixes, storage_ix, storage);
749 StoreHuffmanCodes(distance_codes, num_distance_codes, storage_ix, storage);
750 BlockSplitIterator literal_it(mb.literal_split);
751 BlockSplitIterator command_it(mb.command_split);
752 BlockSplitIterator distance_it(mb.distance_split);
753 for (int i = 0; i < mb.cmds.size(); ++i) {
754 const Command& cmd = mb.cmds[i];
755 MoveAndEncode(command_split_code, &command_it, storage_ix, storage);
756 EncodeCommand(cmd, command_codes[command_it.type_], storage_ix, storage);
757 for (int j = 0; j < cmd.insert_length_; ++j) {
758 MoveAndEncode(literal_split_code, &literal_it, storage_ix, storage);
759 int histogram_idx = literal_it.type_;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100760 uint8_t prev_byte = *pos > 0 ? ringbuffer[(*pos - 1) & mask] : 0;
761 uint8_t prev_byte2 = *pos > 1 ? ringbuffer[(*pos - 2) & mask] : 0;
762 int context = ((literal_it.type_ << kLiteralContextBits) +
763 Context(prev_byte, prev_byte2,
764 mb.literal_context_modes[literal_it.type_]));
765 histogram_idx = mb.literal_context_map[context];
766 EntropyEncode(ringbuffer[*pos & mask],
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200767 literal_codes[histogram_idx], storage_ix, storage);
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100768 ++(*pos);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200769 }
770 if (*pos < end_pos && cmd.distance_prefix_ != 0xffff) {
771 MoveAndEncode(distance_split_code, &distance_it, storage_ix, storage);
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100772 int context = (distance_it.type_ << 2) +
Roderick Sheeter437bbad2013-11-19 14:32:56 -0800773 ((cmd.copy_length_code_ > 4) ? 3 : cmd.copy_length_code_ - 2);
774 int histogram_index = mb.distance_context_map[context];
775 size_t max_distance = std::min(*pos, (size_t)kMaxBackwardDistance);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200776 EncodeCopyDistance(cmd, distance_codes[histogram_index],
777 storage_ix, storage);
778 }
779 *pos += cmd.copy_length_;
780 }
781}
782
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100783BrotliCompressor::BrotliCompressor()
Roderick Sheeter437bbad2013-11-19 14:32:56 -0800784 : window_bits_(kWindowBits),
785 hasher_(new Hasher),
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100786 dist_ringbuffer_idx_(0),
787 input_pos_(0),
788 ringbuffer_(kRingBufferBits, kMetaBlockSizeBits),
789 literal_cost_(1 << kRingBufferBits),
790 storage_ix_(0),
791 storage_(new uint8_t[2 << kMetaBlockSizeBits]) {
Zoltan Szabadkafe79fac2013-11-28 17:37:13 +0100792 dist_ringbuffer_[0] = 16;
793 dist_ringbuffer_[1] = 15;
794 dist_ringbuffer_[2] = 11;
795 dist_ringbuffer_[3] = 4;
Roderick Sheeter437bbad2013-11-19 14:32:56 -0800796 storage_[0] = 0;
797}
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100798
799BrotliCompressor::~BrotliCompressor() {
800 delete hasher_;
801 delete[] storage_;
802}
803
804void BrotliCompressor::WriteStreamHeader() {
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100805 // Encode window size.
Roderick Sheeter437bbad2013-11-19 14:32:56 -0800806 if (window_bits_ == 16) {
807 WriteBits(1, 0, &storage_ix_, storage_);
808 } else {
809 WriteBits(1, 1, &storage_ix_, storage_);
810 WriteBits(3, window_bits_ - 17, &storage_ix_, storage_);
811 }
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100812}
813
814void BrotliCompressor::WriteMetaBlock(const size_t input_size,
815 const uint8_t* input_buffer,
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100816 const bool is_last,
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100817 size_t* encoded_size,
818 uint8_t* encoded_buffer) {
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100819 std::vector<Command> commands;
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100820 if (input_size > 0) {
821 ringbuffer_.Write(input_buffer, input_size);
822 EstimateBitCostsForLiterals(input_pos_, input_size,
823 kRingBufferMask, ringbuffer_.start(),
824 &literal_cost_[0]);
825 CreateBackwardReferences(input_size, input_pos_,
826 ringbuffer_.start(),
827 &literal_cost_[0],
828 kRingBufferMask, kMaxBackwardDistance,
829 hasher_,
830 &commands);
831 ComputeDistanceShortCodes(&commands, dist_ringbuffer_,
832 &dist_ringbuffer_idx_);
833 }
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100834 EncodingParams params;
835 params.num_direct_distance_codes = 12;
836 params.distance_postfix_bits = 1;
837 params.literal_context_mode = CONTEXT_SIGNED;
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100838 const int storage_ix0 = storage_ix_;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100839 MetaBlock mb;
840 BuildMetaBlock(params, commands, ringbuffer_.start(), input_pos_,
841 kRingBufferMask, &mb);
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100842 StoreMetaBlock(mb, is_last, ringbuffer_.start(), kRingBufferMask,
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100843 &input_pos_, &storage_ix_, storage_);
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100844 size_t output_size = is_last ? ((storage_ix_ + 7) >> 3) : (storage_ix_ >> 3);
845 if (input_size + 4 < output_size) {
846 storage_ix_ = storage_ix0;
847 storage_[storage_ix_ >> 3] &= (1 << (storage_ix_ & 7)) - 1;
848 EncodeMetaBlockLength(input_size, false, true, &storage_ix_, storage_);
849 size_t hdr_size = (storage_ix_ + 7) >> 3;
850 memcpy(encoded_buffer, storage_, hdr_size);
851 memcpy(encoded_buffer + hdr_size, input_buffer, input_size);
852 *encoded_size = hdr_size + input_size;
853 if (is_last) {
854 encoded_buffer[*encoded_size] = 0x3; // ISLAST, ISEMPTY
855 ++(*encoded_size);
856 }
857 storage_ix_ = 0;
858 storage_[0] = 0;
859 } else {
860 memcpy(encoded_buffer, storage_, output_size);
861 *encoded_size = output_size;
862 if (is_last) {
863 storage_ix_ = 0;
864 storage_[0] = 0;
865 } else {
866 storage_ix_ -= output_size << 3;
867 storage_[storage_ix_ >> 3] = storage_[output_size];
868 }
869 }
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100870}
871
872void BrotliCompressor::FinishStream(
873 size_t* encoded_size, uint8_t* encoded_buffer) {
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100874 WriteMetaBlock(0, NULL, true, encoded_size, encoded_buffer);
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100875}
876
877
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200878int BrotliCompressBuffer(size_t input_size,
879 const uint8_t* input_buffer,
880 size_t* encoded_size,
881 uint8_t* encoded_buffer) {
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200882 if (input_size == 0) {
Zoltan Szabadkafe79fac2013-11-28 17:37:13 +0100883 encoded_buffer[0] = 6;
884 *encoded_size = 1;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200885 return 1;
886 }
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200887
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100888 BrotliCompressor compressor;
889 compressor.WriteStreamHeader();
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200890
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100891 const int max_block_size = 1 << kMetaBlockSizeBits;
892 size_t max_output_size = *encoded_size;
893 const uint8_t* input_end = input_buffer + input_size;
894 *encoded_size = 0;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200895
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100896 while (input_buffer < input_end) {
897 int block_size = max_block_size;
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100898 bool is_last = false;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100899 if (block_size >= input_end - input_buffer) {
900 block_size = input_end - input_buffer;
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100901 is_last = true;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100902 }
903 size_t output_size = max_output_size;
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100904 compressor.WriteMetaBlock(block_size, input_buffer, is_last,
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100905 &output_size, &encoded_buffer[*encoded_size]);
906 input_buffer += block_size;
907 *encoded_size += output_size;
908 max_output_size -= output_size;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200909 }
910
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200911 return 1;
912}
913
914} // namespace brotli