blob: c54f01d914d98a14750a712b9d742708a6edeffc [file] [log] [blame]
Zoltan Szabadka79e99af2013-10-23 13:06:13 +02001// Copyright 2013 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Implementation of Brotli compressor.
16
17#include "./encode.h"
18
19#include <algorithm>
20#include <limits>
21
22#include "./backward_references.h"
23#include "./bit_cost.h"
24#include "./block_splitter.h"
25#include "./cluster.h"
26#include "./context.h"
Zoltan Szabadka2733d6c2014-02-17 14:25:36 +010027#include "./transform.h"
Zoltan Szabadka79e99af2013-10-23 13:06:13 +020028#include "./entropy_encode.h"
29#include "./fast_log.h"
Zoltan Szabadka1571db32013-11-15 19:02:17 +010030#include "./hash.h"
Zoltan Szabadka79e99af2013-10-23 13:06:13 +020031#include "./histogram.h"
Zoltan Szabadka1571db32013-11-15 19:02:17 +010032#include "./literal_cost.h"
Zoltan Szabadka79e99af2013-10-23 13:06:13 +020033#include "./prefix.h"
34#include "./write_bits.h"
35
36namespace brotli {
37
Roderick Sheeter437bbad2013-11-19 14:32:56 -080038static const int kWindowBits = 22;
39// To make decoding faster, we allow the decoder to write 16 bytes ahead in
40// its ringbuffer, therefore the encoder has to decrease max distance by this
41// amount.
42static const int kDecoderRingBufferWriteAheadSlack = 16;
43static const int kMaxBackwardDistance =
44 (1 << kWindowBits) - kDecoderRingBufferWriteAheadSlack;
45
46static const int kMetaBlockSizeBits = 21;
47static const int kRingBufferBits = 23;
48static const int kRingBufferMask = (1 << kRingBufferBits) - 1;
49
Zoltan Szabadka79e99af2013-10-23 13:06:13 +020050template<int kSize>
51double Entropy(const std::vector<Histogram<kSize> >& histograms) {
52 double retval = 0;
53 for (int i = 0; i < histograms.size(); ++i) {
54 retval += histograms[i].EntropyBitCost();
55 }
56 return retval;
57}
58
Zoltan Szabadka1571db32013-11-15 19:02:17 +010059template<int kSize>
60double TotalBitCost(const std::vector<Histogram<kSize> >& histograms) {
61 double retval = 0;
62 for (int i = 0; i < histograms.size(); ++i) {
63 retval += PopulationCost(histograms[i]);
64 }
65 return retval;
66}
67
Zoltan Szabadkae7094912013-12-12 13:18:04 +010068void EncodeVarLenUint8(int n, int* storage_ix, uint8_t* storage) {
69 if (n == 0) {
70 WriteBits(1, 0, storage_ix, storage);
71 } else {
72 WriteBits(1, 1, storage_ix, storage);
73 int nbits = Log2Floor(n);
74 WriteBits(3, nbits, storage_ix, storage);
75 if (nbits > 0) {
76 WriteBits(nbits, n - (1 << nbits), storage_ix, storage);
77 }
Zoltan Szabadka79e99af2013-10-23 13:06:13 +020078 }
79}
80
Zoltan Szabadkacbd5cb52014-02-14 15:04:23 +010081int ParseAsUTF8(int* symbol, const uint8_t* input, int size) {
82 // ASCII
83 if ((input[0] & 0x80) == 0) {
84 *symbol = input[0];
85 if (*symbol > 0) {
86 return 1;
87 }
88 }
89 // 2-byte UTF8
90 if (size > 1 &&
91 (input[0] & 0xe0) == 0xc0 &&
92 (input[1] & 0xc0) == 0x80) {
93 *symbol = (((input[0] & 0x1f) << 6) |
94 (input[1] & 0x3f));
95 if (*symbol > 0x7f) {
96 return 2;
97 }
98 }
99 // 3-byte UFT8
100 if (size > 2 &&
101 (input[0] & 0xf0) == 0xe0 &&
102 (input[1] & 0xc0) == 0x80 &&
103 (input[2] & 0xc0) == 0x80) {
104 *symbol = (((input[0] & 0x0f) << 12) |
105 ((input[1] & 0x3f) << 6) |
106 (input[2] & 0x3f));
107 if (*symbol > 0x7ff) {
108 return 3;
109 }
110 }
111 // 4-byte UFT8
112 if (size > 3 &&
113 (input[0] & 0xf8) == 0xf0 &&
114 (input[1] & 0xc0) == 0x80 &&
115 (input[2] & 0xc0) == 0x80 &&
116 (input[3] & 0xc0) == 0x80) {
117 *symbol = (((input[0] & 0x07) << 18) |
118 ((input[1] & 0x3f) << 12) |
119 ((input[2] & 0x3f) << 6) |
120 (input[3] & 0x3f));
121 if (*symbol > 0xffff && *symbol <= 0x10ffff) {
122 return 4;
123 }
124 }
125 // Not UTF8, emit a special symbol above the UTF8-code space
126 *symbol = 0x110000 | input[0];
127 return 1;
128}
129
130// Returns true if at least min_fraction of the data is UTF8-encoded.
131bool IsMostlyUTF8(const uint8_t* data, size_t length, double min_fraction) {
132 size_t size_utf8 = 0;
133 size_t pos = 0;
134 while (pos < length) {
135 int symbol;
136 int bytes_read = ParseAsUTF8(&symbol, data + pos, length - pos);
137 pos += bytes_read;
138 if (symbol < 0x110000) size_utf8 += bytes_read;
139 }
140 return size_utf8 > min_fraction * length;
141}
142
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100143void EncodeMetaBlockLength(size_t meta_block_size,
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100144 bool is_last,
145 bool is_uncompressed,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200146 int* storage_ix, uint8_t* storage) {
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100147 WriteBits(1, is_last, storage_ix, storage);
148 if (is_last) {
149 if (meta_block_size == 0) {
150 WriteBits(1, 1, storage_ix, storage);
151 return;
152 }
153 WriteBits(1, 0, storage_ix, storage);
154 }
155 --meta_block_size;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100156 int num_bits = Log2Floor(meta_block_size) + 1;
Zoltan Szabadkafe79fac2013-11-28 17:37:13 +0100157 if (num_bits < 16) {
158 num_bits = 16;
159 }
160 WriteBits(2, (num_bits - 13) >> 2, storage_ix, storage);
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100161 while (num_bits > 0) {
162 WriteBits(4, meta_block_size & 0xf, storage_ix, storage);
163 meta_block_size >>= 4;
164 num_bits -= 4;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200165 }
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100166 if (!is_last) {
167 WriteBits(1, is_uncompressed, storage_ix, storage);
168 }
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200169}
170
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200171void StoreHuffmanTreeOfHuffmanTreeToBitMask(
172 const uint8_t* code_length_bitdepth,
173 int* storage_ix, uint8_t* storage) {
174 static const uint8_t kStorageOrder[kCodeLengthCodes] = {
Zoltan Szabadkacbd5cb52014-02-14 15:04:23 +0100175 1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200176 };
177 // Throw away trailing zeros:
178 int codes_to_store = kCodeLengthCodes;
Zoltan Szabadkab89f3be2013-12-17 17:17:57 +0100179 for (; codes_to_store > 0; --codes_to_store) {
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200180 if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
181 break;
182 }
183 }
Zoltan Szabadkab89f3be2013-12-17 17:17:57 +0100184 int num_codes = 0;
185 for (int i = 0; i < codes_to_store; ++i) {
186 if (code_length_bitdepth[kStorageOrder[i]] != 0) {
187 ++num_codes;
188 }
189 }
190 if (num_codes == 1) {
191 codes_to_store = kCodeLengthCodes;
192 }
Zoltan Szabadka4c8c7fd2014-01-08 12:28:28 +0100193 int skip_some = 0; // skips none.
194 if (code_length_bitdepth[kStorageOrder[0]] == 0 &&
195 code_length_bitdepth[kStorageOrder[1]] == 0) {
196 skip_some = 2; // skips two.
197 if (code_length_bitdepth[kStorageOrder[2]] == 0) {
198 skip_some = 3; // skips three.
199 }
200 }
201 WriteBits(2, skip_some, storage_ix, storage);
202 for (int i = skip_some; i < codes_to_store; ++i) {
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100203 uint8_t len[] = { 2, 4, 3, 2, 2, 4 };
Zoltan Szabadkacbd5cb52014-02-14 15:04:23 +0100204 uint8_t bits[] = { 0, 7, 3, 2, 1, 15 };
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100205 int v = code_length_bitdepth[kStorageOrder[i]];
206 WriteBits(len[v], bits[v], storage_ix, storage);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200207 }
208}
209
210void StoreHuffmanTreeToBitMask(
211 const uint8_t* huffman_tree,
212 const uint8_t* huffman_tree_extra_bits,
213 const int huffman_tree_size,
214 const EntropyCode<kCodeLengthCodes>& entropy,
215 int* storage_ix, uint8_t* storage) {
216 for (int i = 0; i < huffman_tree_size; ++i) {
217 const int ix = huffman_tree[i];
218 const int extra_bits = huffman_tree_extra_bits[i];
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100219 if (entropy.count_ > 1) {
220 WriteBits(entropy.depth_[ix], entropy.bits_[ix], storage_ix, storage);
221 }
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200222 switch (ix) {
223 case 16:
224 WriteBits(2, extra_bits, storage_ix, storage);
225 break;
226 case 17:
227 WriteBits(3, extra_bits, storage_ix, storage);
228 break;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200229 }
230 }
231}
232
233template<int kSize>
Zoltan Szabadkacbd5cb52014-02-14 15:04:23 +0100234void StoreHuffmanCodeSimple(
235 const EntropyCode<kSize>& code, int alphabet_size,
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100236 int max_bits, int* storage_ix, uint8_t* storage) {
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100237 const uint8_t *depth = &code.depth_[0];
Zoltan Szabadkacbd5cb52014-02-14 15:04:23 +0100238 int symbols[4];
239 // Quadratic sort.
240 int k, j;
241 for (k = 0; k < code.count_; ++k) {
242 symbols[k] = code.symbols_[k];
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100243 }
Zoltan Szabadkacbd5cb52014-02-14 15:04:23 +0100244 for (k = 0; k < code.count_; ++k) {
245 for (j = k + 1; j < code.count_; ++j) {
246 if (depth[symbols[j]] < depth[symbols[k]]) {
247 int t = symbols[k];
248 symbols[k] = symbols[j];
249 symbols[j] = t;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100250 }
251 }
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200252 }
Zoltan Szabadkacbd5cb52014-02-14 15:04:23 +0100253 // Small tree marker to encode 1-4 symbols.
254 WriteBits(2, 1, storage_ix, storage);
255 WriteBits(2, code.count_ - 1, storage_ix, storage);
256 for (int i = 0; i < code.count_; ++i) {
257 WriteBits(max_bits, symbols[i], storage_ix, storage);
258 }
259 if (code.count_ == 4) {
260 if (depth[symbols[0]] == 2 &&
261 depth[symbols[1]] == 2 &&
262 depth[symbols[2]] == 2 &&
263 depth[symbols[3]] == 2) {
264 WriteBits(1, 0, storage_ix, storage);
265 } else {
266 WriteBits(1, 1, storage_ix, storage);
267 }
268 }
269}
270
271template<int kSize>
272void StoreHuffmanCodeComplex(
273 const EntropyCode<kSize>& code, int alphabet_size,
274 int* storage_ix, uint8_t* storage) {
275 const uint8_t *depth = &code.depth_[0];
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200276 uint8_t huffman_tree[kSize];
277 uint8_t huffman_tree_extra_bits[kSize];
278 int huffman_tree_size = 0;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100279 WriteHuffmanTree(depth,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200280 alphabet_size,
281 &huffman_tree[0],
282 &huffman_tree_extra_bits[0],
283 &huffman_tree_size);
284 Histogram<kCodeLengthCodes> huffman_tree_histogram;
285 memset(huffman_tree_histogram.data_, 0, sizeof(huffman_tree_histogram.data_));
286 for (int i = 0; i < huffman_tree_size; ++i) {
287 huffman_tree_histogram.Add(huffman_tree[i]);
288 }
289 EntropyCode<kCodeLengthCodes> huffman_tree_entropy;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100290 BuildEntropyCode(huffman_tree_histogram, 5, kCodeLengthCodes,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200291 &huffman_tree_entropy);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200292 StoreHuffmanTreeOfHuffmanTreeToBitMask(
293 &huffman_tree_entropy.depth_[0], storage_ix, storage);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200294 StoreHuffmanTreeToBitMask(&huffman_tree[0], &huffman_tree_extra_bits[0],
295 huffman_tree_size, huffman_tree_entropy,
296 storage_ix, storage);
297}
298
Zoltan Szabadkacbd5cb52014-02-14 15:04:23 +0100299template<int kSize>
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100300void BuildAndStoreEntropyCode(const Histogram<kSize>& histogram,
301 const int tree_limit,
302 const int alphabet_size,
303 EntropyCode<kSize>* code,
304 int* storage_ix, uint8_t* storage) {
305 memset(code->depth_, 0, sizeof(code->depth_));
306 memset(code->bits_, 0, sizeof(code->bits_));
307 memset(code->symbols_, 0, sizeof(code->symbols_));
308 code->count_ = 0;
309
Zoltan Szabadkacbd5cb52014-02-14 15:04:23 +0100310 int max_bits_counter = alphabet_size - 1;
311 int max_bits = 0;
312 while (max_bits_counter) {
313 max_bits_counter >>= 1;
314 ++max_bits;
315 }
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100316
317 for (size_t i = 0; i < alphabet_size; i++) {
318 if (histogram.data_[i] > 0) {
319 if (code->count_ < 4) code->symbols_[code->count_] = i;
320 ++code->count_;
321 }
322 }
323
324 if (code->count_ <= 1) {
325 WriteBits(2, 1, storage_ix, storage);
326 WriteBits(2, 0, storage_ix, storage);
327 WriteBits(max_bits, code->symbols_[0], storage_ix, storage);
328 return;
329 }
330
331 if (alphabet_size >= 50 && code->count_ >= 16) {
332 std::vector<int> counts(alphabet_size);
333 memcpy(&counts[0], histogram.data_, sizeof(counts[0]) * alphabet_size);
334 OptimizeHuffmanCountsForRle(alphabet_size, &counts[0]);
335 CreateHuffmanTree(&counts[0], alphabet_size, tree_limit, code->depth_);
Zoltan Szabadkacbd5cb52014-02-14 15:04:23 +0100336 } else {
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100337 CreateHuffmanTree(histogram.data_, alphabet_size, tree_limit, code->depth_);
338 }
339 ConvertBitDepthsToSymbols(code->depth_, alphabet_size, code->bits_);
340
341 if (code->count_ <= 4) {
342 StoreHuffmanCodeSimple(*code, alphabet_size, max_bits, storage_ix, storage);
343 } else {
344 StoreHuffmanCodeComplex(*code, alphabet_size, storage_ix, storage);
Zoltan Szabadkacbd5cb52014-02-14 15:04:23 +0100345 }
346}
347
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200348template<int kSize>
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100349void BuildAndStoreEntropyCodes(
350 const std::vector<Histogram<kSize> >& histograms,
351 int alphabet_size,
352 std::vector<EntropyCode<kSize> >* entropy_codes,
353 int* storage_ix, uint8_t* storage) {
354 entropy_codes->resize(histograms.size());
355 for (int i = 0; i < histograms.size(); ++i) {
356 BuildAndStoreEntropyCode(histograms[i], 15, alphabet_size,
357 &(*entropy_codes)[i],
358 storage_ix, storage);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200359 }
360}
361
362void EncodeCommand(const Command& cmd,
363 const EntropyCodeCommand& entropy,
364 int* storage_ix, uint8_t* storage) {
365 int code = cmd.command_prefix_;
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100366 WriteBits(entropy.depth_[code], entropy.bits_[code], storage_ix, storage);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200367 if (code >= 128) {
368 code -= 128;
369 }
370 int insert_extra_bits = InsertLengthExtraBits(code);
371 uint64_t insert_extra_bits_val =
372 cmd.insert_length_ - InsertLengthOffset(code);
373 int copy_extra_bits = CopyLengthExtraBits(code);
Roderick Sheeter437bbad2013-11-19 14:32:56 -0800374 uint64_t copy_extra_bits_val = cmd.copy_length_code_ - CopyLengthOffset(code);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200375 if (insert_extra_bits > 0) {
376 WriteBits(insert_extra_bits, insert_extra_bits_val, storage_ix, storage);
377 }
378 if (copy_extra_bits > 0) {
379 WriteBits(copy_extra_bits, copy_extra_bits_val, storage_ix, storage);
380 }
381}
382
383void EncodeCopyDistance(const Command& cmd, const EntropyCodeDistance& entropy,
384 int* storage_ix, uint8_t* storage) {
385 int code = cmd.distance_prefix_;
386 int extra_bits = cmd.distance_extra_bits_;
387 uint64_t extra_bits_val = cmd.distance_extra_bits_value_;
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100388 WriteBits(entropy.depth_[code], entropy.bits_[code], storage_ix, storage);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200389 if (extra_bits > 0) {
390 WriteBits(extra_bits, extra_bits_val, storage_ix, storage);
391 }
392}
393
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100394void ComputeDistanceShortCodes(std::vector<Command>* cmds,
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100395 size_t pos,
396 const size_t max_backward,
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100397 int* dist_ringbuffer,
398 size_t* ringbuffer_idx) {
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200399 static const int kIndexOffset[16] = {
400 3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2
401 };
402 static const int kValueOffset[16] = {
403 0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
404 };
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200405 for (int i = 0; i < cmds->size(); ++i) {
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100406 pos += (*cmds)[i].insert_length_;
407 size_t max_distance = std::min(pos, max_backward);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200408 int cur_dist = (*cmds)[i].copy_distance_;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200409 int dist_code = cur_dist + 16;
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100410 if (cur_dist <= max_distance) {
411 if (cur_dist == 0) break;
412 int limits[16] = { 0, 0, 0, 0,
413 6, 6, 11, 11,
414 11, 11, 11, 11,
415 12, 12, 12, 12 };
416 for (int k = 0; k < 16; ++k) {
417 // Only accept more popular choices.
418 if (cur_dist < limits[k]) {
419 // Typically unpopular ranges, don't replace a short distance
420 // with them.
421 continue;
422 }
423 int comp = (dist_ringbuffer[(*ringbuffer_idx + kIndexOffset[k]) & 3] +
424 kValueOffset[k]);
425 if (cur_dist == comp) {
426 dist_code = k + 1;
427 break;
428 }
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200429 }
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100430 if (dist_code > 1) {
431 dist_ringbuffer[*ringbuffer_idx & 3] = cur_dist;
432 ++(*ringbuffer_idx);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200433 }
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100434 pos += (*cmds)[i].copy_length_;
435 } else {
436 int word_idx = cur_dist - max_distance - 1;
437 const std::string word =
438 GetTransformedDictionaryWord((*cmds)[i].copy_length_code_, word_idx);
439 pos += word.size();
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200440 }
441 (*cmds)[i].distance_code_ = dist_code;
442 }
443}
444
445void ComputeCommandPrefixes(std::vector<Command>* cmds,
446 int num_direct_distance_codes,
447 int distance_postfix_bits) {
448 for (int i = 0; i < cmds->size(); ++i) {
449 Command* cmd = &(*cmds)[i];
450 cmd->command_prefix_ = CommandPrefix(cmd->insert_length_,
Roderick Sheeter437bbad2013-11-19 14:32:56 -0800451 cmd->copy_length_code_);
452 if (cmd->copy_length_code_ > 0) {
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200453 PrefixEncodeCopyDistance(cmd->distance_code_,
454 num_direct_distance_codes,
455 distance_postfix_bits,
456 &cmd->distance_prefix_,
457 &cmd->distance_extra_bits_,
458 &cmd->distance_extra_bits_value_);
459 }
460 if (cmd->command_prefix_ < 128 && cmd->distance_prefix_ == 0) {
461 cmd->distance_prefix_ = 0xffff;
462 } else {
463 cmd->command_prefix_ += 128;
464 }
465 }
466}
467
468int IndexOf(const std::vector<int>& v, int value) {
469 for (int i = 0; i < v.size(); ++i) {
470 if (v[i] == value) return i;
471 }
472 return -1;
473}
474
475void MoveToFront(std::vector<int>* v, int index) {
476 int value = (*v)[index];
477 for (int i = index; i > 0; --i) {
478 (*v)[i] = (*v)[i - 1];
479 }
480 (*v)[0] = value;
481}
482
483std::vector<int> MoveToFrontTransform(const std::vector<int>& v) {
484 if (v.empty()) return v;
485 std::vector<int> mtf(*max_element(v.begin(), v.end()) + 1);
486 for (int i = 0; i < mtf.size(); ++i) mtf[i] = i;
487 std::vector<int> result(v.size());
488 for (int i = 0; i < v.size(); ++i) {
489 int index = IndexOf(mtf, v[i]);
490 result[i] = index;
491 MoveToFront(&mtf, index);
492 }
493 return result;
494}
495
496// Finds runs of zeros in v_in and replaces them with a prefix code of the run
497// length plus extra bits in *v_out and *extra_bits. Non-zero values in v_in are
498// shifted by *max_length_prefix. Will not create prefix codes bigger than the
499// initial value of *max_run_length_prefix. The prefix code of run length L is
500// simply Log2Floor(L) and the number of extra bits is the same as the prefix
501// code.
502void RunLengthCodeZeros(const std::vector<int>& v_in,
503 int* max_run_length_prefix,
504 std::vector<int>* v_out,
505 std::vector<int>* extra_bits) {
506 int max_reps = 0;
507 for (int i = 0; i < v_in.size();) {
508 for (; i < v_in.size() && v_in[i] != 0; ++i) ;
509 int reps = 0;
510 for (; i < v_in.size() && v_in[i] == 0; ++i) {
511 ++reps;
512 }
513 max_reps = std::max(reps, max_reps);
514 }
515 int max_prefix = max_reps > 0 ? Log2Floor(max_reps) : 0;
516 *max_run_length_prefix = std::min(max_prefix, *max_run_length_prefix);
517 for (int i = 0; i < v_in.size();) {
518 if (v_in[i] != 0) {
519 v_out->push_back(v_in[i] + *max_run_length_prefix);
520 extra_bits->push_back(0);
521 ++i;
522 } else {
523 int reps = 1;
524 for (uint32_t k = i + 1; k < v_in.size() && v_in[k] == 0; ++k) {
525 ++reps;
526 }
527 i += reps;
528 while (reps) {
529 if (reps < (2 << *max_run_length_prefix)) {
530 int run_length_prefix = Log2Floor(reps);
531 v_out->push_back(run_length_prefix);
532 extra_bits->push_back(reps - (1 << run_length_prefix));
533 break;
534 } else {
535 v_out->push_back(*max_run_length_prefix);
536 extra_bits->push_back((1 << *max_run_length_prefix) - 1);
537 reps -= (2 << *max_run_length_prefix) - 1;
538 }
539 }
540 }
541 }
542}
543
544// Returns a maximum zero-run-length-prefix value such that run-length coding
545// zeros in v with this maximum prefix value and then encoding the resulting
546// histogram and entropy-coding v produces the least amount of bits.
547int BestMaxZeroRunLengthPrefix(const std::vector<int>& v) {
548 int min_cost = std::numeric_limits<int>::max();
549 int best_max_prefix = 0;
550 for (int max_prefix = 0; max_prefix <= 16; ++max_prefix) {
551 std::vector<int> rle_symbols;
552 std::vector<int> extra_bits;
553 int max_run_length_prefix = max_prefix;
554 RunLengthCodeZeros(v, &max_run_length_prefix, &rle_symbols, &extra_bits);
555 if (max_run_length_prefix < max_prefix) break;
556 HistogramLiteral histogram;
557 for (int i = 0; i < rle_symbols.size(); ++i) {
558 histogram.Add(rle_symbols[i]);
559 }
560 int bit_cost = PopulationCost(histogram);
561 if (max_prefix > 0) {
562 bit_cost += 4;
563 }
564 for (int i = 1; i <= max_prefix; ++i) {
565 bit_cost += histogram.data_[i] * i; // extra bits
566 }
567 if (bit_cost < min_cost) {
568 min_cost = bit_cost;
569 best_max_prefix = max_prefix;
570 }
571 }
572 return best_max_prefix;
573}
574
575void EncodeContextMap(const std::vector<int>& context_map,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200576 int num_clusters,
577 int* storage_ix, uint8_t* storage) {
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100578 EncodeVarLenUint8(num_clusters - 1, storage_ix, storage);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200579
Roderick Sheeter437bbad2013-11-19 14:32:56 -0800580 if (num_clusters == 1) {
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200581 return;
582 }
583
584 std::vector<int> transformed_symbols = MoveToFrontTransform(context_map);
585 std::vector<int> rle_symbols;
586 std::vector<int> extra_bits;
587 int max_run_length_prefix = BestMaxZeroRunLengthPrefix(transformed_symbols);
588 RunLengthCodeZeros(transformed_symbols, &max_run_length_prefix,
589 &rle_symbols, &extra_bits);
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100590 HistogramContextMap symbol_histogram;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200591 for (int i = 0; i < rle_symbols.size(); ++i) {
592 symbol_histogram.Add(rle_symbols[i]);
593 }
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200594 bool use_rle = max_run_length_prefix > 0;
595 WriteBits(1, use_rle, storage_ix, storage);
596 if (use_rle) {
597 WriteBits(4, max_run_length_prefix - 1, storage_ix, storage);
598 }
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100599 EntropyCodeContextMap symbol_code;
600 BuildAndStoreEntropyCode(symbol_histogram, 15,
601 num_clusters + max_run_length_prefix,
602 &symbol_code,
603 storage_ix, storage);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200604 for (int i = 0; i < rle_symbols.size(); ++i) {
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100605 WriteBits(symbol_code.depth_[rle_symbols[i]],
606 symbol_code.bits_[rle_symbols[i]],
607 storage_ix, storage);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200608 if (rle_symbols[i] > 0 && rle_symbols[i] <= max_run_length_prefix) {
609 WriteBits(rle_symbols[i], extra_bits[i], storage_ix, storage);
610 }
611 }
612 WriteBits(1, 1, storage_ix, storage); // use move-to-front
613}
614
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200615struct BlockSplitCode {
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100616 EntropyCodeBlockType block_type_code;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200617 EntropyCodeBlockLength block_len_code;
618};
619
620void EncodeBlockLength(const EntropyCodeBlockLength& entropy,
621 int length,
622 int* storage_ix, uint8_t* storage) {
623 int len_code = BlockLengthPrefix(length);
624 int extra_bits = BlockLengthExtraBits(len_code);
625 int extra_bits_value = length - BlockLengthOffset(len_code);
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100626 WriteBits(entropy.depth_[len_code], entropy.bits_[len_code],
627 storage_ix, storage);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200628 if (extra_bits > 0) {
629 WriteBits(extra_bits, extra_bits_value, storage_ix, storage);
630 }
631}
632
633void ComputeBlockTypeShortCodes(BlockSplit* split) {
634 if (split->num_types_ <= 1) {
635 split->num_types_ = 1;
636 return;
637 }
638 int ringbuffer[2] = { 0, 1 };
639 size_t index = 0;
640 for (int i = 0; i < split->types_.size(); ++i) {
641 int type = split->types_[i];
642 int type_code;
643 if (type == ringbuffer[index & 1]) {
644 type_code = 0;
645 } else if (type == ringbuffer[(index - 1) & 1] + 1) {
646 type_code = 1;
647 } else {
648 type_code = type + 2;
649 }
650 ringbuffer[index & 1] = type;
651 ++index;
652 split->type_codes_.push_back(type_code);
653 }
654}
655
656void BuildAndEncodeBlockSplitCode(const BlockSplit& split,
657 BlockSplitCode* code,
658 int* storage_ix, uint8_t* storage) {
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100659 EncodeVarLenUint8(split.num_types_ - 1, storage_ix, storage);
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100660
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100661 if (split.num_types_ == 1) {
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200662 return;
663 }
Zoltan Szabadkafe79fac2013-11-28 17:37:13 +0100664
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100665 HistogramBlockType type_histo;
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100666 for (int i = 1; i < split.type_codes_.size(); ++i) {
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200667 type_histo.Add(split.type_codes_[i]);
668 }
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200669 HistogramBlockLength length_histo;
670 for (int i = 0; i < split.lengths_.size(); ++i) {
671 length_histo.Add(BlockLengthPrefix(split.lengths_[i]));
672 }
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100673 BuildAndStoreEntropyCode(type_histo, 15, split.num_types_ + 2,
674 &code->block_type_code,
675 storage_ix, storage);
676 BuildAndStoreEntropyCode(length_histo, 15, kNumBlockLenPrefixes,
677 &code->block_len_code,
678 storage_ix, storage);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200679 EncodeBlockLength(code->block_len_code, split.lengths_[0],
680 storage_ix, storage);
681}
682
683void MoveAndEncode(const BlockSplitCode& code,
684 BlockSplitIterator* it,
685 int* storage_ix, uint8_t* storage) {
686 if (it->length_ == 0) {
687 ++it->idx_;
688 it->type_ = it->split_.types_[it->idx_];
689 it->length_ = it->split_.lengths_[it->idx_];
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100690 int type_code = it->split_.type_codes_[it->idx_];
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100691 WriteBits(code.block_type_code.depth_[type_code],
692 code.block_type_code.bits_[type_code],
693 storage_ix, storage);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200694 EncodeBlockLength(code.block_len_code, it->length_, storage_ix, storage);
695 }
696 --it->length_;
697}
698
699struct EncodingParams {
700 int num_direct_distance_codes;
701 int distance_postfix_bits;
702 int literal_context_mode;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200703};
704
705struct MetaBlock {
706 std::vector<Command> cmds;
707 EncodingParams params;
708 BlockSplit literal_split;
709 BlockSplit command_split;
710 BlockSplit distance_split;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100711 std::vector<int> literal_context_modes;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200712 std::vector<int> literal_context_map;
713 std::vector<int> distance_context_map;
714 std::vector<HistogramLiteral> literal_histograms;
715 std::vector<HistogramCommand> command_histograms;
716 std::vector<HistogramDistance> distance_histograms;
717};
718
719void BuildMetaBlock(const EncodingParams& params,
720 const std::vector<Command>& cmds,
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100721 const uint8_t* ringbuffer,
722 const size_t pos,
723 const size_t mask,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200724 MetaBlock* mb) {
725 mb->cmds = cmds;
726 mb->params = params;
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100727 if (cmds.empty()) {
728 return;
729 }
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200730 ComputeCommandPrefixes(&mb->cmds,
731 mb->params.num_direct_distance_codes,
732 mb->params.distance_postfix_bits);
733 SplitBlock(mb->cmds,
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100734 &ringbuffer[pos & mask],
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200735 &mb->literal_split,
736 &mb->command_split,
737 &mb->distance_split);
738 ComputeBlockTypeShortCodes(&mb->literal_split);
739 ComputeBlockTypeShortCodes(&mb->command_split);
740 ComputeBlockTypeShortCodes(&mb->distance_split);
741
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100742 mb->literal_context_modes.resize(mb->literal_split.num_types_,
743 mb->params.literal_context_mode);
744
745
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200746 int num_literal_contexts =
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100747 mb->literal_split.num_types_ << kLiteralContextBits;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200748 int num_distance_contexts =
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100749 mb->distance_split.num_types_ << kDistanceContextBits;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200750 std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
751 mb->command_histograms.resize(mb->command_split.num_types_);
752 std::vector<HistogramDistance> distance_histograms(num_distance_contexts);
753 BuildHistograms(mb->cmds,
754 mb->literal_split,
755 mb->command_split,
756 mb->distance_split,
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100757 ringbuffer,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200758 pos,
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100759 mask,
760 mb->literal_context_modes,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200761 &literal_histograms,
762 &mb->command_histograms,
763 &distance_histograms);
764
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100765 // Histogram ids need to fit in one byte.
766 static const int kMaxNumberOfHistograms = 256;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200767
768 mb->literal_histograms = literal_histograms;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100769 ClusterHistograms(literal_histograms,
770 1 << kLiteralContextBits,
771 mb->literal_split.num_types_,
772 kMaxNumberOfHistograms,
773 &mb->literal_histograms,
774 &mb->literal_context_map);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200775
776 mb->distance_histograms = distance_histograms;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100777 ClusterHistograms(distance_histograms,
778 1 << kDistanceContextBits,
779 mb->distance_split.num_types_,
780 kMaxNumberOfHistograms,
781 &mb->distance_histograms,
782 &mb->distance_context_map);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200783}
784
785size_t MetaBlockLength(const std::vector<Command>& cmds) {
786 size_t length = 0;
787 for (int i = 0; i < cmds.size(); ++i) {
788 const Command& cmd = cmds[i];
789 length += cmd.insert_length_ + cmd.copy_length_;
790 }
791 return length;
792}
793
794void StoreMetaBlock(const MetaBlock& mb,
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100795 const bool is_last,
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100796 const uint8_t* ringbuffer,
797 const size_t mask,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200798 size_t* pos,
799 int* storage_ix, uint8_t* storage) {
800 size_t length = MetaBlockLength(mb.cmds);
801 const size_t end_pos = *pos + length;
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100802 EncodeMetaBlockLength(length, is_last, false, storage_ix, storage);
803
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100804 if (length == 0) {
805 return;
806 }
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200807 BlockSplitCode literal_split_code;
808 BlockSplitCode command_split_code;
809 BlockSplitCode distance_split_code;
810 BuildAndEncodeBlockSplitCode(mb.literal_split, &literal_split_code,
811 storage_ix, storage);
812 BuildAndEncodeBlockSplitCode(mb.command_split, &command_split_code,
813 storage_ix, storage);
814 BuildAndEncodeBlockSplitCode(mb.distance_split, &distance_split_code,
815 storage_ix, storage);
816 WriteBits(2, mb.params.distance_postfix_bits, storage_ix, storage);
817 WriteBits(4,
818 mb.params.num_direct_distance_codes >>
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100819 mb.params.distance_postfix_bits,
820 storage_ix, storage);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200821 int num_distance_codes =
822 kNumDistanceShortCodes + mb.params.num_direct_distance_codes +
823 (48 << mb.params.distance_postfix_bits);
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100824 for (int i = 0; i < mb.literal_split.num_types_; ++i) {
825 WriteBits(2, mb.literal_context_modes[i], storage_ix, storage);
826 }
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100827 EncodeContextMap(mb.literal_context_map, mb.literal_histograms.size(),
828 storage_ix, storage);
829 EncodeContextMap(mb.distance_context_map, mb.distance_histograms.size(),
830 storage_ix, storage);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200831 std::vector<EntropyCodeLiteral> literal_codes;
832 std::vector<EntropyCodeCommand> command_codes;
833 std::vector<EntropyCodeDistance> distance_codes;
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100834 BuildAndStoreEntropyCodes(mb.literal_histograms, 256, &literal_codes,
835 storage_ix, storage);
836 BuildAndStoreEntropyCodes(mb.command_histograms, kNumCommandPrefixes,
837 &command_codes, storage_ix, storage);
838 BuildAndStoreEntropyCodes(mb.distance_histograms, num_distance_codes,
839 &distance_codes, storage_ix, storage);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200840 BlockSplitIterator literal_it(mb.literal_split);
841 BlockSplitIterator command_it(mb.command_split);
842 BlockSplitIterator distance_it(mb.distance_split);
843 for (int i = 0; i < mb.cmds.size(); ++i) {
844 const Command& cmd = mb.cmds[i];
845 MoveAndEncode(command_split_code, &command_it, storage_ix, storage);
846 EncodeCommand(cmd, command_codes[command_it.type_], storage_ix, storage);
847 for (int j = 0; j < cmd.insert_length_; ++j) {
848 MoveAndEncode(literal_split_code, &literal_it, storage_ix, storage);
849 int histogram_idx = literal_it.type_;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100850 uint8_t prev_byte = *pos > 0 ? ringbuffer[(*pos - 1) & mask] : 0;
851 uint8_t prev_byte2 = *pos > 1 ? ringbuffer[(*pos - 2) & mask] : 0;
852 int context = ((literal_it.type_ << kLiteralContextBits) +
853 Context(prev_byte, prev_byte2,
854 mb.literal_context_modes[literal_it.type_]));
855 histogram_idx = mb.literal_context_map[context];
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100856 int literal = ringbuffer[*pos & mask];
857 WriteBits(literal_codes[histogram_idx].depth_[literal],
858 literal_codes[histogram_idx].bits_[literal],
859 storage_ix, storage);
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100860 ++(*pos);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200861 }
862 if (*pos < end_pos && cmd.distance_prefix_ != 0xffff) {
863 MoveAndEncode(distance_split_code, &distance_it, storage_ix, storage);
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100864 int context = (distance_it.type_ << 2) +
Roderick Sheeter437bbad2013-11-19 14:32:56 -0800865 ((cmd.copy_length_code_ > 4) ? 3 : cmd.copy_length_code_ - 2);
866 int histogram_index = mb.distance_context_map[context];
867 size_t max_distance = std::min(*pos, (size_t)kMaxBackwardDistance);
Zoltan Szabadka79e99af2013-10-23 13:06:13 +0200868 EncodeCopyDistance(cmd, distance_codes[histogram_index],
869 storage_ix, storage);
870 }
871 *pos += cmd.copy_length_;
872 }
873}
874
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100875BrotliCompressor::BrotliCompressor(BrotliParams params)
876 : params_(params),
877 window_bits_(kWindowBits),
878 hashers_(new Hashers()),
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100879 dist_ringbuffer_idx_(0),
880 input_pos_(0),
881 ringbuffer_(kRingBufferBits, kMetaBlockSizeBits),
882 literal_cost_(1 << kRingBufferBits),
883 storage_ix_(0),
884 storage_(new uint8_t[2 << kMetaBlockSizeBits]) {
Zoltan Szabadkafe79fac2013-11-28 17:37:13 +0100885 dist_ringbuffer_[0] = 16;
886 dist_ringbuffer_[1] = 15;
887 dist_ringbuffer_[2] = 11;
888 dist_ringbuffer_[3] = 4;
Roderick Sheeter437bbad2013-11-19 14:32:56 -0800889 storage_[0] = 0;
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100890 switch (params.mode) {
891 case BrotliParams::MODE_TEXT: hash_type_ = Hashers::HASH_15_8_4; break;
892 case BrotliParams::MODE_FONT: hash_type_ = Hashers::HASH_15_8_2; break;
893 default: break;
894 }
895 hashers_->Init(hash_type_);
896 if (params.mode == BrotliParams::MODE_TEXT) {
897 StoreDictionaryWordHashes();
898 }
Roderick Sheeter437bbad2013-11-19 14:32:56 -0800899}
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100900
901BrotliCompressor::~BrotliCompressor() {
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100902 delete[] storage_;
903}
904
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100905StaticDictionary *BrotliCompressor::static_dictionary_ = NULL;
906
Zoltan Szabadka2733d6c2014-02-17 14:25:36 +0100907void BrotliCompressor::StoreDictionaryWordHashes() {
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100908 const int num_transforms = kNumTransforms;
909 if (static_dictionary_ == NULL) {
910 static_dictionary_ = new StaticDictionary;
911 for (int t = num_transforms - 1; t >= 0; --t) {
Zoltan Szabadka0829e372014-03-25 16:48:25 +0100912 for (int i = kMaxDictionaryWordLength;
913 i >= kMinDictionaryWordLength; --i) {
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100914 const int num_words = 1 << kBrotliDictionarySizeBitsByLength[i];
915 for (int j = num_words - 1; j >= 0; --j) {
916 int word_id = t * num_words + j;
917 std::string word = GetTransformedDictionaryWord(i, word_id);
Zoltan Szabadka0829e372014-03-25 16:48:25 +0100918 if (word.size() >= 4) {
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100919 static_dictionary_->Insert(word, i, word_id);
920 }
Zoltan Szabadka2733d6c2014-02-17 14:25:36 +0100921 }
922 }
923 }
924 }
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100925 hashers_->SetStaticDictionary(static_dictionary_);
Zoltan Szabadka2733d6c2014-02-17 14:25:36 +0100926}
927
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100928void BrotliCompressor::WriteStreamHeader() {
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100929 // Encode window size.
Roderick Sheeter437bbad2013-11-19 14:32:56 -0800930 if (window_bits_ == 16) {
931 WriteBits(1, 0, &storage_ix_, storage_);
932 } else {
933 WriteBits(1, 1, &storage_ix_, storage_);
934 WriteBits(3, window_bits_ - 17, &storage_ix_, storage_);
935 }
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100936}
937
938void BrotliCompressor::WriteMetaBlock(const size_t input_size,
939 const uint8_t* input_buffer,
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100940 const bool is_last,
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100941 size_t* encoded_size,
942 uint8_t* encoded_buffer) {
Zoltan Szabadkacbd5cb52014-02-14 15:04:23 +0100943 static const double kMinUTF8Ratio = 0.75;
944 bool utf8_mode = false;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100945 std::vector<Command> commands;
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100946 if (input_size > 0) {
947 ringbuffer_.Write(input_buffer, input_size);
Zoltan Szabadkacbd5cb52014-02-14 15:04:23 +0100948 utf8_mode = IsMostlyUTF8(
949 &ringbuffer_.start()[input_pos_ & kRingBufferMask],
950 input_size, kMinUTF8Ratio);
951 if (utf8_mode) {
952 EstimateBitCostsForLiteralsUTF8(input_pos_, input_size,
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100953 kRingBufferMask, kRingBufferMask,
954 ringbuffer_.start(), &literal_cost_[0]);
Zoltan Szabadkacbd5cb52014-02-14 15:04:23 +0100955 } else {
956 EstimateBitCostsForLiterals(input_pos_, input_size,
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100957 kRingBufferMask, kRingBufferMask,
958 ringbuffer_.start(), &literal_cost_[0]);
Zoltan Szabadkacbd5cb52014-02-14 15:04:23 +0100959 }
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100960 CreateBackwardReferences(
961 input_size, input_pos_,
962 ringbuffer_.start(),
963 &literal_cost_[0],
964 kRingBufferMask, kMaxBackwardDistance,
965 hashers_.get(),
966 hash_type_,
967 &commands);
968 ComputeDistanceShortCodes(&commands, input_pos_, kMaxBackwardDistance,
969 dist_ringbuffer_,
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100970 &dist_ringbuffer_idx_);
971 }
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100972 EncodingParams params;
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100973 params.num_direct_distance_codes =
974 params_.mode == BrotliParams::MODE_FONT ? 12 : 0;
975 params.distance_postfix_bits =
976 params_.mode == BrotliParams::MODE_FONT ? 1 : 0;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100977 params.literal_context_mode = CONTEXT_SIGNED;
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100978 const int storage_ix0 = storage_ix_;
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100979 MetaBlock mb;
980 BuildMetaBlock(params, commands, ringbuffer_.start(), input_pos_,
981 kRingBufferMask, &mb);
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100982 StoreMetaBlock(mb, is_last, ringbuffer_.start(), kRingBufferMask,
Zoltan Szabadka1571db32013-11-15 19:02:17 +0100983 &input_pos_, &storage_ix_, storage_);
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100984 size_t output_size = is_last ? ((storage_ix_ + 7) >> 3) : (storage_ix_ >> 3);
Zoltan Szabadka278b8942014-03-20 14:32:35 +0100985 output_size -= (storage_ix0 >> 3);
Zoltan Szabadkae7094912013-12-12 13:18:04 +0100986 if (input_size + 4 < output_size) {
987 storage_ix_ = storage_ix0;
988 storage_[storage_ix_ >> 3] &= (1 << (storage_ix_ & 7)) - 1;
989 EncodeMetaBlockLength(input_size, false, true, &storage_ix_, storage_);
990 size_t hdr_size = (storage_ix_ + 7) >> 3;
991 memcpy(encoded_buffer, storage_, hdr_size);
992 memcpy(encoded_buffer + hdr_size, input_buffer, input_size);
993 *encoded_size = hdr_size + input_size;
994 if (is_last) {
995 encoded_buffer[*encoded_size] = 0x3; // ISLAST, ISEMPTY
996 ++(*encoded_size);
997 }
998 storage_ix_ = 0;
999 storage_[0] = 0;
1000 } else {
1001 memcpy(encoded_buffer, storage_, output_size);
1002 *encoded_size = output_size;
1003 if (is_last) {
1004 storage_ix_ = 0;
1005 storage_[0] = 0;
1006 } else {
1007 storage_ix_ -= output_size << 3;
1008 storage_[storage_ix_ >> 3] = storage_[output_size];
1009 }
1010 }
Zoltan Szabadka1571db32013-11-15 19:02:17 +01001011}
1012
1013void BrotliCompressor::FinishStream(
1014 size_t* encoded_size, uint8_t* encoded_buffer) {
Zoltan Szabadkae7094912013-12-12 13:18:04 +01001015 WriteMetaBlock(0, NULL, true, encoded_size, encoded_buffer);
Zoltan Szabadka1571db32013-11-15 19:02:17 +01001016}
1017
1018
Zoltan Szabadka278b8942014-03-20 14:32:35 +01001019int BrotliCompressBuffer(BrotliParams params,
1020 size_t input_size,
Zoltan Szabadka79e99af2013-10-23 13:06:13 +02001021 const uint8_t* input_buffer,
1022 size_t* encoded_size,
1023 uint8_t* encoded_buffer) {
Zoltan Szabadka79e99af2013-10-23 13:06:13 +02001024 if (input_size == 0) {
Zoltan Szabadkafe79fac2013-11-28 17:37:13 +01001025 encoded_buffer[0] = 6;
1026 *encoded_size = 1;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +02001027 return 1;
1028 }
Zoltan Szabadka79e99af2013-10-23 13:06:13 +02001029
Zoltan Szabadka278b8942014-03-20 14:32:35 +01001030 BrotliCompressor compressor(params);
Zoltan Szabadka1571db32013-11-15 19:02:17 +01001031 compressor.WriteStreamHeader();
Zoltan Szabadka79e99af2013-10-23 13:06:13 +02001032
Zoltan Szabadka1571db32013-11-15 19:02:17 +01001033 const int max_block_size = 1 << kMetaBlockSizeBits;
1034 size_t max_output_size = *encoded_size;
1035 const uint8_t* input_end = input_buffer + input_size;
1036 *encoded_size = 0;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +02001037
Zoltan Szabadka1571db32013-11-15 19:02:17 +01001038 while (input_buffer < input_end) {
1039 int block_size = max_block_size;
Zoltan Szabadkae7094912013-12-12 13:18:04 +01001040 bool is_last = false;
Zoltan Szabadka1571db32013-11-15 19:02:17 +01001041 if (block_size >= input_end - input_buffer) {
1042 block_size = input_end - input_buffer;
Zoltan Szabadkae7094912013-12-12 13:18:04 +01001043 is_last = true;
Zoltan Szabadka1571db32013-11-15 19:02:17 +01001044 }
1045 size_t output_size = max_output_size;
Zoltan Szabadkae7094912013-12-12 13:18:04 +01001046 compressor.WriteMetaBlock(block_size, input_buffer, is_last,
Zoltan Szabadka1571db32013-11-15 19:02:17 +01001047 &output_size, &encoded_buffer[*encoded_size]);
1048 input_buffer += block_size;
1049 *encoded_size += output_size;
1050 max_output_size -= output_size;
Zoltan Szabadka79e99af2013-10-23 13:06:13 +02001051 }
1052
Zoltan Szabadka79e99af2013-10-23 13:06:13 +02001053 return 1;
1054}
1055
1056} // namespace brotli