blob: fa048346d82825a0b91ae4af10f8786321f7cc1a [file] [log] [blame]
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +02001// Copyright 2013 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Implementation of Brotli compressor.
16
17#include "./encode.h"
18
19#include <algorithm>
20#include <limits>
21
22#include "./backward_references.h"
23#include "./bit_cost.h"
24#include "./block_splitter.h"
25#include "./cluster.h"
26#include "./context.h"
Zoltan Szabadka2f268ad2014-02-17 14:25:36 +010027#include "./transform.h"
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +020028#include "./entropy_encode.h"
29#include "./fast_log.h"
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +010030#include "./hash.h"
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +020031#include "./histogram.h"
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +010032#include "./literal_cost.h"
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +020033#include "./prefix.h"
34#include "./write_bits.h"
35
36namespace brotli {
37
Roderick Sheeter1cdcbd82013-11-19 14:32:56 -080038static const int kWindowBits = 22;
39// To make decoding faster, we allow the decoder to write 16 bytes ahead in
40// its ringbuffer, therefore the encoder has to decrease max distance by this
41// amount.
42static const int kDecoderRingBufferWriteAheadSlack = 16;
43static const int kMaxBackwardDistance =
44 (1 << kWindowBits) - kDecoderRingBufferWriteAheadSlack;
45
46static const int kMetaBlockSizeBits = 21;
47static const int kRingBufferBits = 23;
48static const int kRingBufferMask = (1 << kRingBufferBits) - 1;
49
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +020050template<int kSize>
51double Entropy(const std::vector<Histogram<kSize> >& histograms) {
52 double retval = 0;
53 for (int i = 0; i < histograms.size(); ++i) {
54 retval += histograms[i].EntropyBitCost();
55 }
56 return retval;
57}
58
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +010059template<int kSize>
60double TotalBitCost(const std::vector<Histogram<kSize> >& histograms) {
61 double retval = 0;
62 for (int i = 0; i < histograms.size(); ++i) {
63 retval += PopulationCost(histograms[i]);
64 }
65 return retval;
66}
67
Zoltan Szabadka60c24c02013-12-12 13:18:04 +010068void EncodeVarLenUint8(int n, int* storage_ix, uint8_t* storage) {
69 if (n == 0) {
70 WriteBits(1, 0, storage_ix, storage);
71 } else {
72 WriteBits(1, 1, storage_ix, storage);
73 int nbits = Log2Floor(n);
74 WriteBits(3, nbits, storage_ix, storage);
75 if (nbits > 0) {
76 WriteBits(nbits, n - (1 << nbits), storage_ix, storage);
77 }
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +020078 }
79}
80
Zoltan Szabadka0454ab42014-02-14 15:04:23 +010081int ParseAsUTF8(int* symbol, const uint8_t* input, int size) {
82 // ASCII
83 if ((input[0] & 0x80) == 0) {
84 *symbol = input[0];
85 if (*symbol > 0) {
86 return 1;
87 }
88 }
89 // 2-byte UTF8
90 if (size > 1 &&
91 (input[0] & 0xe0) == 0xc0 &&
92 (input[1] & 0xc0) == 0x80) {
93 *symbol = (((input[0] & 0x1f) << 6) |
94 (input[1] & 0x3f));
95 if (*symbol > 0x7f) {
96 return 2;
97 }
98 }
99 // 3-byte UFT8
100 if (size > 2 &&
101 (input[0] & 0xf0) == 0xe0 &&
102 (input[1] & 0xc0) == 0x80 &&
103 (input[2] & 0xc0) == 0x80) {
104 *symbol = (((input[0] & 0x0f) << 12) |
105 ((input[1] & 0x3f) << 6) |
106 (input[2] & 0x3f));
107 if (*symbol > 0x7ff) {
108 return 3;
109 }
110 }
111 // 4-byte UFT8
112 if (size > 3 &&
113 (input[0] & 0xf8) == 0xf0 &&
114 (input[1] & 0xc0) == 0x80 &&
115 (input[2] & 0xc0) == 0x80 &&
116 (input[3] & 0xc0) == 0x80) {
117 *symbol = (((input[0] & 0x07) << 18) |
118 ((input[1] & 0x3f) << 12) |
119 ((input[2] & 0x3f) << 6) |
120 (input[3] & 0x3f));
121 if (*symbol > 0xffff && *symbol <= 0x10ffff) {
122 return 4;
123 }
124 }
125 // Not UTF8, emit a special symbol above the UTF8-code space
126 *symbol = 0x110000 | input[0];
127 return 1;
128}
129
130// Returns true if at least min_fraction of the data is UTF8-encoded.
131bool IsMostlyUTF8(const uint8_t* data, size_t length, double min_fraction) {
132 size_t size_utf8 = 0;
133 size_t pos = 0;
134 while (pos < length) {
135 int symbol;
136 int bytes_read = ParseAsUTF8(&symbol, data + pos, length - pos);
137 pos += bytes_read;
138 if (symbol < 0x110000) size_utf8 += bytes_read;
139 }
140 return size_utf8 > min_fraction * length;
141}
142
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100143void EncodeMetaBlockLength(size_t meta_block_size,
Zoltan Szabadka60c24c02013-12-12 13:18:04 +0100144 bool is_last,
145 bool is_uncompressed,
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200146 int* storage_ix, uint8_t* storage) {
Zoltan Szabadka60c24c02013-12-12 13:18:04 +0100147 WriteBits(1, is_last, storage_ix, storage);
148 if (is_last) {
149 if (meta_block_size == 0) {
150 WriteBits(1, 1, storage_ix, storage);
151 return;
152 }
153 WriteBits(1, 0, storage_ix, storage);
154 }
155 --meta_block_size;
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100156 int num_bits = Log2Floor(meta_block_size) + 1;
Zoltan Szabadka8d7081f2013-11-28 17:37:13 +0100157 if (num_bits < 16) {
158 num_bits = 16;
159 }
160 WriteBits(2, (num_bits - 13) >> 2, storage_ix, storage);
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100161 while (num_bits > 0) {
162 WriteBits(4, meta_block_size & 0xf, storage_ix, storage);
163 meta_block_size >>= 4;
164 num_bits -= 4;
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200165 }
Zoltan Szabadka60c24c02013-12-12 13:18:04 +0100166 if (!is_last) {
167 WriteBits(1, is_uncompressed, storage_ix, storage);
168 }
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200169}
170
171template<int kSize>
172void EntropyEncode(int val, const EntropyCode<kSize>& code,
173 int* storage_ix, uint8_t* storage) {
174 if (code.count_ <= 1) {
175 return;
176 };
177 WriteBits(code.depth_[val], code.bits_[val], storage_ix, storage);
178}
179
180void StoreHuffmanTreeOfHuffmanTreeToBitMask(
181 const uint8_t* code_length_bitdepth,
182 int* storage_ix, uint8_t* storage) {
183 static const uint8_t kStorageOrder[kCodeLengthCodes] = {
Zoltan Szabadka0454ab42014-02-14 15:04:23 +0100184 1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15,
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200185 };
186 // Throw away trailing zeros:
187 int codes_to_store = kCodeLengthCodes;
Zoltan Szabadka14473452013-12-17 17:17:57 +0100188 for (; codes_to_store > 0; --codes_to_store) {
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200189 if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
190 break;
191 }
192 }
Zoltan Szabadka14473452013-12-17 17:17:57 +0100193 int num_codes = 0;
194 for (int i = 0; i < codes_to_store; ++i) {
195 if (code_length_bitdepth[kStorageOrder[i]] != 0) {
196 ++num_codes;
197 }
198 }
199 if (num_codes == 1) {
200 codes_to_store = kCodeLengthCodes;
201 }
Zoltan Szabadka2bcd58b2014-01-08 12:28:28 +0100202 int skip_some = 0; // skips none.
203 if (code_length_bitdepth[kStorageOrder[0]] == 0 &&
204 code_length_bitdepth[kStorageOrder[1]] == 0) {
205 skip_some = 2; // skips two.
206 if (code_length_bitdepth[kStorageOrder[2]] == 0) {
207 skip_some = 3; // skips three.
208 }
209 }
210 WriteBits(2, skip_some, storage_ix, storage);
211 for (int i = skip_some; i < codes_to_store; ++i) {
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100212 uint8_t len[] = { 2, 4, 3, 2, 2, 4 };
Zoltan Szabadka0454ab42014-02-14 15:04:23 +0100213 uint8_t bits[] = { 0, 7, 3, 2, 1, 15 };
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100214 int v = code_length_bitdepth[kStorageOrder[i]];
215 WriteBits(len[v], bits[v], storage_ix, storage);
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200216 }
217}
218
219void StoreHuffmanTreeToBitMask(
220 const uint8_t* huffman_tree,
221 const uint8_t* huffman_tree_extra_bits,
222 const int huffman_tree_size,
223 const EntropyCode<kCodeLengthCodes>& entropy,
224 int* storage_ix, uint8_t* storage) {
225 for (int i = 0; i < huffman_tree_size; ++i) {
226 const int ix = huffman_tree[i];
227 const int extra_bits = huffman_tree_extra_bits[i];
228 EntropyEncode(ix, entropy, storage_ix, storage);
229 switch (ix) {
230 case 16:
231 WriteBits(2, extra_bits, storage_ix, storage);
232 break;
233 case 17:
234 WriteBits(3, extra_bits, storage_ix, storage);
235 break;
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200236 }
237 }
238}
239
240template<int kSize>
Zoltan Szabadka0454ab42014-02-14 15:04:23 +0100241void StoreHuffmanCodeSimple(
242 const EntropyCode<kSize>& code, int alphabet_size,
243 int max_bits,
244 int* storage_ix, uint8_t* storage) {
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100245 const uint8_t *depth = &code.depth_[0];
Zoltan Szabadka0454ab42014-02-14 15:04:23 +0100246 int symbols[4];
247 // Quadratic sort.
248 int k, j;
249 for (k = 0; k < code.count_; ++k) {
250 symbols[k] = code.symbols_[k];
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100251 }
Zoltan Szabadka0454ab42014-02-14 15:04:23 +0100252 for (k = 0; k < code.count_; ++k) {
253 for (j = k + 1; j < code.count_; ++j) {
254 if (depth[symbols[j]] < depth[symbols[k]]) {
255 int t = symbols[k];
256 symbols[k] = symbols[j];
257 symbols[j] = t;
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100258 }
259 }
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200260 }
Zoltan Szabadka0454ab42014-02-14 15:04:23 +0100261 // Small tree marker to encode 1-4 symbols.
262 WriteBits(2, 1, storage_ix, storage);
263 WriteBits(2, code.count_ - 1, storage_ix, storage);
264 for (int i = 0; i < code.count_; ++i) {
265 WriteBits(max_bits, symbols[i], storage_ix, storage);
266 }
267 if (code.count_ == 4) {
268 if (depth[symbols[0]] == 2 &&
269 depth[symbols[1]] == 2 &&
270 depth[symbols[2]] == 2 &&
271 depth[symbols[3]] == 2) {
272 WriteBits(1, 0, storage_ix, storage);
273 } else {
274 WriteBits(1, 1, storage_ix, storage);
275 }
276 }
277}
278
279template<int kSize>
280void StoreHuffmanCodeComplex(
281 const EntropyCode<kSize>& code, int alphabet_size,
282 int* storage_ix, uint8_t* storage) {
283 const uint8_t *depth = &code.depth_[0];
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200284 uint8_t huffman_tree[kSize];
285 uint8_t huffman_tree_extra_bits[kSize];
286 int huffman_tree_size = 0;
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100287 WriteHuffmanTree(depth,
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200288 alphabet_size,
289 &huffman_tree[0],
290 &huffman_tree_extra_bits[0],
291 &huffman_tree_size);
292 Histogram<kCodeLengthCodes> huffman_tree_histogram;
293 memset(huffman_tree_histogram.data_, 0, sizeof(huffman_tree_histogram.data_));
294 for (int i = 0; i < huffman_tree_size; ++i) {
295 huffman_tree_histogram.Add(huffman_tree[i]);
296 }
297 EntropyCode<kCodeLengthCodes> huffman_tree_entropy;
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100298 BuildEntropyCode(huffman_tree_histogram, 5, kCodeLengthCodes,
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200299 &huffman_tree_entropy);
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200300 StoreHuffmanTreeOfHuffmanTreeToBitMask(
301 &huffman_tree_entropy.depth_[0], storage_ix, storage);
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200302 StoreHuffmanTreeToBitMask(&huffman_tree[0], &huffman_tree_extra_bits[0],
303 huffman_tree_size, huffman_tree_entropy,
304 storage_ix, storage);
305}
306
Zoltan Szabadka0454ab42014-02-14 15:04:23 +0100307
308template<int kSize>
309void StoreHuffmanCode(const EntropyCode<kSize>& code, int alphabet_size,
310 int* storage_ix, uint8_t* storage) {
311 int max_bits_counter = alphabet_size - 1;
312 int max_bits = 0;
313 while (max_bits_counter) {
314 max_bits_counter >>= 1;
315 ++max_bits;
316 }
317 if (code.count_ == 0) {
318 // Emit a minimal tree for empty cases.
319 // bits: small tree marker: 1, count-1: 0, max_bits-sized encoding for 0
320 WriteBits(4 + max_bits, 0x1, storage_ix, storage);
321 } else if (code.count_ <= 4) {
322 StoreHuffmanCodeSimple(
323 code, alphabet_size, max_bits,
324 storage_ix, storage);
325 } else {
326 StoreHuffmanCodeComplex(
327 code, alphabet_size,
328 storage_ix, storage);
329 }
330}
331
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200332template<int kSize>
333void StoreHuffmanCodes(const std::vector<EntropyCode<kSize> >& codes,
334 int alphabet_size,
335 int* storage_ix, uint8_t* storage) {
336 for (int i = 0; i < codes.size(); ++i) {
337 StoreHuffmanCode(codes[i], alphabet_size, storage_ix, storage);
338 }
339}
340
341void EncodeCommand(const Command& cmd,
342 const EntropyCodeCommand& entropy,
343 int* storage_ix, uint8_t* storage) {
344 int code = cmd.command_prefix_;
345 EntropyEncode(code, entropy, storage_ix, storage);
346 if (code >= 128) {
347 code -= 128;
348 }
349 int insert_extra_bits = InsertLengthExtraBits(code);
350 uint64_t insert_extra_bits_val =
351 cmd.insert_length_ - InsertLengthOffset(code);
352 int copy_extra_bits = CopyLengthExtraBits(code);
Roderick Sheeter1cdcbd82013-11-19 14:32:56 -0800353 uint64_t copy_extra_bits_val = cmd.copy_length_code_ - CopyLengthOffset(code);
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200354 if (insert_extra_bits > 0) {
355 WriteBits(insert_extra_bits, insert_extra_bits_val, storage_ix, storage);
356 }
357 if (copy_extra_bits > 0) {
358 WriteBits(copy_extra_bits, copy_extra_bits_val, storage_ix, storage);
359 }
360}
361
362void EncodeCopyDistance(const Command& cmd, const EntropyCodeDistance& entropy,
363 int* storage_ix, uint8_t* storage) {
364 int code = cmd.distance_prefix_;
365 int extra_bits = cmd.distance_extra_bits_;
366 uint64_t extra_bits_val = cmd.distance_extra_bits_value_;
367 EntropyEncode(code, entropy, storage_ix, storage);
368 if (extra_bits > 0) {
369 WriteBits(extra_bits, extra_bits_val, storage_ix, storage);
370 }
371}
372
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100373void ComputeDistanceShortCodes(std::vector<Command>* cmds,
374 int* dist_ringbuffer,
375 size_t* ringbuffer_idx) {
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200376 static const int kIndexOffset[16] = {
377 3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2
378 };
379 static const int kValueOffset[16] = {
380 0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
381 };
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200382 for (int i = 0; i < cmds->size(); ++i) {
383 int cur_dist = (*cmds)[i].copy_distance_;
384 if (cur_dist == 0) break;
385 int dist_code = cur_dist + 16;
Zoltan Szabadka14473452013-12-17 17:17:57 +0100386 int limits[16] = { 0, 4, 10, 11,
387 6, 6, 11, 11,
388 11, 11, 11, 11,
389 12, 12, 12, 12 };
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200390 for (int k = 0; k < 16; ++k) {
391 // Only accept more popular choices.
Zoltan Szabadka14473452013-12-17 17:17:57 +0100392 if (cur_dist < limits[k]) {
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200393 // Typically unpopular ranges, don't replace a short distance
394 // with them.
395 continue;
396 }
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100397 int comp = (dist_ringbuffer[(*ringbuffer_idx + kIndexOffset[k]) & 3] +
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200398 kValueOffset[k]);
399 if (cur_dist == comp) {
400 dist_code = k + 1;
401 break;
402 }
403 }
404 if (dist_code > 1) {
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100405 dist_ringbuffer[*ringbuffer_idx & 3] = cur_dist;
406 ++(*ringbuffer_idx);
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200407 }
408 (*cmds)[i].distance_code_ = dist_code;
409 }
410}
411
412void ComputeCommandPrefixes(std::vector<Command>* cmds,
413 int num_direct_distance_codes,
414 int distance_postfix_bits) {
415 for (int i = 0; i < cmds->size(); ++i) {
416 Command* cmd = &(*cmds)[i];
417 cmd->command_prefix_ = CommandPrefix(cmd->insert_length_,
Roderick Sheeter1cdcbd82013-11-19 14:32:56 -0800418 cmd->copy_length_code_);
419 if (cmd->copy_length_code_ > 0) {
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200420 PrefixEncodeCopyDistance(cmd->distance_code_,
421 num_direct_distance_codes,
422 distance_postfix_bits,
423 &cmd->distance_prefix_,
424 &cmd->distance_extra_bits_,
425 &cmd->distance_extra_bits_value_);
426 }
427 if (cmd->command_prefix_ < 128 && cmd->distance_prefix_ == 0) {
428 cmd->distance_prefix_ = 0xffff;
429 } else {
430 cmd->command_prefix_ += 128;
431 }
432 }
433}
434
435int IndexOf(const std::vector<int>& v, int value) {
436 for (int i = 0; i < v.size(); ++i) {
437 if (v[i] == value) return i;
438 }
439 return -1;
440}
441
442void MoveToFront(std::vector<int>* v, int index) {
443 int value = (*v)[index];
444 for (int i = index; i > 0; --i) {
445 (*v)[i] = (*v)[i - 1];
446 }
447 (*v)[0] = value;
448}
449
450std::vector<int> MoveToFrontTransform(const std::vector<int>& v) {
451 if (v.empty()) return v;
452 std::vector<int> mtf(*max_element(v.begin(), v.end()) + 1);
453 for (int i = 0; i < mtf.size(); ++i) mtf[i] = i;
454 std::vector<int> result(v.size());
455 for (int i = 0; i < v.size(); ++i) {
456 int index = IndexOf(mtf, v[i]);
457 result[i] = index;
458 MoveToFront(&mtf, index);
459 }
460 return result;
461}
462
463// Finds runs of zeros in v_in and replaces them with a prefix code of the run
464// length plus extra bits in *v_out and *extra_bits. Non-zero values in v_in are
465// shifted by *max_length_prefix. Will not create prefix codes bigger than the
466// initial value of *max_run_length_prefix. The prefix code of run length L is
467// simply Log2Floor(L) and the number of extra bits is the same as the prefix
468// code.
469void RunLengthCodeZeros(const std::vector<int>& v_in,
470 int* max_run_length_prefix,
471 std::vector<int>* v_out,
472 std::vector<int>* extra_bits) {
473 int max_reps = 0;
474 for (int i = 0; i < v_in.size();) {
475 for (; i < v_in.size() && v_in[i] != 0; ++i) ;
476 int reps = 0;
477 for (; i < v_in.size() && v_in[i] == 0; ++i) {
478 ++reps;
479 }
480 max_reps = std::max(reps, max_reps);
481 }
482 int max_prefix = max_reps > 0 ? Log2Floor(max_reps) : 0;
483 *max_run_length_prefix = std::min(max_prefix, *max_run_length_prefix);
484 for (int i = 0; i < v_in.size();) {
485 if (v_in[i] != 0) {
486 v_out->push_back(v_in[i] + *max_run_length_prefix);
487 extra_bits->push_back(0);
488 ++i;
489 } else {
490 int reps = 1;
491 for (uint32_t k = i + 1; k < v_in.size() && v_in[k] == 0; ++k) {
492 ++reps;
493 }
494 i += reps;
495 while (reps) {
496 if (reps < (2 << *max_run_length_prefix)) {
497 int run_length_prefix = Log2Floor(reps);
498 v_out->push_back(run_length_prefix);
499 extra_bits->push_back(reps - (1 << run_length_prefix));
500 break;
501 } else {
502 v_out->push_back(*max_run_length_prefix);
503 extra_bits->push_back((1 << *max_run_length_prefix) - 1);
504 reps -= (2 << *max_run_length_prefix) - 1;
505 }
506 }
507 }
508 }
509}
510
511// Returns a maximum zero-run-length-prefix value such that run-length coding
512// zeros in v with this maximum prefix value and then encoding the resulting
513// histogram and entropy-coding v produces the least amount of bits.
514int BestMaxZeroRunLengthPrefix(const std::vector<int>& v) {
515 int min_cost = std::numeric_limits<int>::max();
516 int best_max_prefix = 0;
517 for (int max_prefix = 0; max_prefix <= 16; ++max_prefix) {
518 std::vector<int> rle_symbols;
519 std::vector<int> extra_bits;
520 int max_run_length_prefix = max_prefix;
521 RunLengthCodeZeros(v, &max_run_length_prefix, &rle_symbols, &extra_bits);
522 if (max_run_length_prefix < max_prefix) break;
523 HistogramLiteral histogram;
524 for (int i = 0; i < rle_symbols.size(); ++i) {
525 histogram.Add(rle_symbols[i]);
526 }
527 int bit_cost = PopulationCost(histogram);
528 if (max_prefix > 0) {
529 bit_cost += 4;
530 }
531 for (int i = 1; i <= max_prefix; ++i) {
532 bit_cost += histogram.data_[i] * i; // extra bits
533 }
534 if (bit_cost < min_cost) {
535 min_cost = bit_cost;
536 best_max_prefix = max_prefix;
537 }
538 }
539 return best_max_prefix;
540}
541
542void EncodeContextMap(const std::vector<int>& context_map,
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200543 int num_clusters,
544 int* storage_ix, uint8_t* storage) {
Zoltan Szabadka60c24c02013-12-12 13:18:04 +0100545 EncodeVarLenUint8(num_clusters - 1, storage_ix, storage);
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200546
Roderick Sheeter1cdcbd82013-11-19 14:32:56 -0800547 if (num_clusters == 1) {
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200548 return;
549 }
550
551 std::vector<int> transformed_symbols = MoveToFrontTransform(context_map);
552 std::vector<int> rle_symbols;
553 std::vector<int> extra_bits;
554 int max_run_length_prefix = BestMaxZeroRunLengthPrefix(transformed_symbols);
555 RunLengthCodeZeros(transformed_symbols, &max_run_length_prefix,
556 &rle_symbols, &extra_bits);
Zoltan Szabadka60c24c02013-12-12 13:18:04 +0100557 HistogramContextMap symbol_histogram;
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200558 for (int i = 0; i < rle_symbols.size(); ++i) {
559 symbol_histogram.Add(rle_symbols[i]);
560 }
Zoltan Szabadka60c24c02013-12-12 13:18:04 +0100561 EntropyCodeContextMap symbol_code;
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200562 BuildEntropyCode(symbol_histogram, 15, num_clusters + max_run_length_prefix,
563 &symbol_code);
564 bool use_rle = max_run_length_prefix > 0;
565 WriteBits(1, use_rle, storage_ix, storage);
566 if (use_rle) {
567 WriteBits(4, max_run_length_prefix - 1, storage_ix, storage);
568 }
569 StoreHuffmanCode(symbol_code, num_clusters + max_run_length_prefix,
570 storage_ix, storage);
571 for (int i = 0; i < rle_symbols.size(); ++i) {
572 EntropyEncode(rle_symbols[i], symbol_code, storage_ix, storage);
573 if (rle_symbols[i] > 0 && rle_symbols[i] <= max_run_length_prefix) {
574 WriteBits(rle_symbols[i], extra_bits[i], storage_ix, storage);
575 }
576 }
577 WriteBits(1, 1, storage_ix, storage); // use move-to-front
578}
579
580template<int kSize>
581void BuildEntropyCodes(const std::vector<Histogram<kSize> >& histograms,
582 int alphabet_size,
583 std::vector<EntropyCode<kSize> >* entropy_codes) {
584 entropy_codes->resize(histograms.size());
585 for (int i = 0; i < histograms.size(); ++i) {
586 BuildEntropyCode(histograms[i], 15, alphabet_size, &(*entropy_codes)[i]);
587 }
588}
589
590struct BlockSplitCode {
Zoltan Szabadka60c24c02013-12-12 13:18:04 +0100591 EntropyCodeBlockType block_type_code;
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200592 EntropyCodeBlockLength block_len_code;
593};
594
595void EncodeBlockLength(const EntropyCodeBlockLength& entropy,
596 int length,
597 int* storage_ix, uint8_t* storage) {
598 int len_code = BlockLengthPrefix(length);
599 int extra_bits = BlockLengthExtraBits(len_code);
600 int extra_bits_value = length - BlockLengthOffset(len_code);
601 EntropyEncode(len_code, entropy, storage_ix, storage);
602
603 if (extra_bits > 0) {
604 WriteBits(extra_bits, extra_bits_value, storage_ix, storage);
605 }
606}
607
608void ComputeBlockTypeShortCodes(BlockSplit* split) {
609 if (split->num_types_ <= 1) {
610 split->num_types_ = 1;
611 return;
612 }
613 int ringbuffer[2] = { 0, 1 };
614 size_t index = 0;
615 for (int i = 0; i < split->types_.size(); ++i) {
616 int type = split->types_[i];
617 int type_code;
618 if (type == ringbuffer[index & 1]) {
619 type_code = 0;
620 } else if (type == ringbuffer[(index - 1) & 1] + 1) {
621 type_code = 1;
622 } else {
623 type_code = type + 2;
624 }
625 ringbuffer[index & 1] = type;
626 ++index;
627 split->type_codes_.push_back(type_code);
628 }
629}
630
631void BuildAndEncodeBlockSplitCode(const BlockSplit& split,
632 BlockSplitCode* code,
633 int* storage_ix, uint8_t* storage) {
Zoltan Szabadka60c24c02013-12-12 13:18:04 +0100634 EncodeVarLenUint8(split.num_types_ - 1, storage_ix, storage);
635 if (split.num_types_ == 1) {
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200636 return;
637 }
Zoltan Szabadka8d7081f2013-11-28 17:37:13 +0100638
Zoltan Szabadka60c24c02013-12-12 13:18:04 +0100639 HistogramBlockType type_histo;
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200640 for (int i = 0; i < split.type_codes_.size(); ++i) {
641 type_histo.Add(split.type_codes_[i]);
642 }
643 BuildEntropyCode(type_histo, 15, split.num_types_ + 2,
644 &code->block_type_code);
645 HistogramBlockLength length_histo;
646 for (int i = 0; i < split.lengths_.size(); ++i) {
647 length_histo.Add(BlockLengthPrefix(split.lengths_[i]));
648 }
649 BuildEntropyCode(length_histo, 15, kNumBlockLenPrefixes,
650 &code->block_len_code);
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200651 StoreHuffmanCode(code->block_type_code, split.num_types_ + 2,
652 storage_ix, storage);
653 StoreHuffmanCode(code->block_len_code, kNumBlockLenPrefixes,
654 storage_ix, storage);
655 EncodeBlockLength(code->block_len_code, split.lengths_[0],
656 storage_ix, storage);
657}
658
659void MoveAndEncode(const BlockSplitCode& code,
660 BlockSplitIterator* it,
661 int* storage_ix, uint8_t* storage) {
662 if (it->length_ == 0) {
663 ++it->idx_;
664 it->type_ = it->split_.types_[it->idx_];
665 it->length_ = it->split_.lengths_[it->idx_];
Zoltan Szabadka60c24c02013-12-12 13:18:04 +0100666 int type_code = it->split_.type_codes_[it->idx_];
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200667 EntropyEncode(type_code, code.block_type_code, storage_ix, storage);
668 EncodeBlockLength(code.block_len_code, it->length_, storage_ix, storage);
669 }
670 --it->length_;
671}
672
673struct EncodingParams {
674 int num_direct_distance_codes;
675 int distance_postfix_bits;
676 int literal_context_mode;
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200677};
678
679struct MetaBlock {
680 std::vector<Command> cmds;
681 EncodingParams params;
682 BlockSplit literal_split;
683 BlockSplit command_split;
684 BlockSplit distance_split;
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100685 std::vector<int> literal_context_modes;
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200686 std::vector<int> literal_context_map;
687 std::vector<int> distance_context_map;
688 std::vector<HistogramLiteral> literal_histograms;
689 std::vector<HistogramCommand> command_histograms;
690 std::vector<HistogramDistance> distance_histograms;
691};
692
693void BuildMetaBlock(const EncodingParams& params,
694 const std::vector<Command>& cmds,
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100695 const uint8_t* ringbuffer,
696 const size_t pos,
697 const size_t mask,
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200698 MetaBlock* mb) {
699 mb->cmds = cmds;
700 mb->params = params;
Zoltan Szabadka60c24c02013-12-12 13:18:04 +0100701 if (cmds.empty()) {
702 return;
703 }
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200704 ComputeCommandPrefixes(&mb->cmds,
705 mb->params.num_direct_distance_codes,
706 mb->params.distance_postfix_bits);
707 SplitBlock(mb->cmds,
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100708 &ringbuffer[pos & mask],
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200709 &mb->literal_split,
710 &mb->command_split,
711 &mb->distance_split);
712 ComputeBlockTypeShortCodes(&mb->literal_split);
713 ComputeBlockTypeShortCodes(&mb->command_split);
714 ComputeBlockTypeShortCodes(&mb->distance_split);
715
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100716 mb->literal_context_modes.resize(mb->literal_split.num_types_,
717 mb->params.literal_context_mode);
718
719
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200720 int num_literal_contexts =
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100721 mb->literal_split.num_types_ << kLiteralContextBits;
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200722 int num_distance_contexts =
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100723 mb->distance_split.num_types_ << kDistanceContextBits;
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200724 std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
725 mb->command_histograms.resize(mb->command_split.num_types_);
726 std::vector<HistogramDistance> distance_histograms(num_distance_contexts);
727 BuildHistograms(mb->cmds,
728 mb->literal_split,
729 mb->command_split,
730 mb->distance_split,
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100731 ringbuffer,
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200732 pos,
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100733 mask,
734 mb->literal_context_modes,
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200735 &literal_histograms,
736 &mb->command_histograms,
737 &distance_histograms);
738
Zoltan Szabadka60c24c02013-12-12 13:18:04 +0100739 // Histogram ids need to fit in one byte.
740 static const int kMaxNumberOfHistograms = 256;
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200741
742 mb->literal_histograms = literal_histograms;
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100743 ClusterHistograms(literal_histograms,
744 1 << kLiteralContextBits,
745 mb->literal_split.num_types_,
746 kMaxNumberOfHistograms,
747 &mb->literal_histograms,
748 &mb->literal_context_map);
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200749
750 mb->distance_histograms = distance_histograms;
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100751 ClusterHistograms(distance_histograms,
752 1 << kDistanceContextBits,
753 mb->distance_split.num_types_,
754 kMaxNumberOfHistograms,
755 &mb->distance_histograms,
756 &mb->distance_context_map);
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200757}
758
759size_t MetaBlockLength(const std::vector<Command>& cmds) {
760 size_t length = 0;
761 for (int i = 0; i < cmds.size(); ++i) {
762 const Command& cmd = cmds[i];
763 length += cmd.insert_length_ + cmd.copy_length_;
764 }
765 return length;
766}
767
768void StoreMetaBlock(const MetaBlock& mb,
Zoltan Szabadka60c24c02013-12-12 13:18:04 +0100769 const bool is_last,
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100770 const uint8_t* ringbuffer,
771 const size_t mask,
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200772 size_t* pos,
773 int* storage_ix, uint8_t* storage) {
774 size_t length = MetaBlockLength(mb.cmds);
775 const size_t end_pos = *pos + length;
Zoltan Szabadka60c24c02013-12-12 13:18:04 +0100776 EncodeMetaBlockLength(length,
777 is_last,
778 false,
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200779 storage_ix, storage);
Zoltan Szabadka60c24c02013-12-12 13:18:04 +0100780 if (length == 0) {
781 return;
782 }
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200783 BlockSplitCode literal_split_code;
784 BlockSplitCode command_split_code;
785 BlockSplitCode distance_split_code;
786 BuildAndEncodeBlockSplitCode(mb.literal_split, &literal_split_code,
787 storage_ix, storage);
788 BuildAndEncodeBlockSplitCode(mb.command_split, &command_split_code,
789 storage_ix, storage);
790 BuildAndEncodeBlockSplitCode(mb.distance_split, &distance_split_code,
791 storage_ix, storage);
792 WriteBits(2, mb.params.distance_postfix_bits, storage_ix, storage);
793 WriteBits(4,
794 mb.params.num_direct_distance_codes >>
795 mb.params.distance_postfix_bits, storage_ix, storage);
796 int num_distance_codes =
797 kNumDistanceShortCodes + mb.params.num_direct_distance_codes +
798 (48 << mb.params.distance_postfix_bits);
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100799 for (int i = 0; i < mb.literal_split.num_types_; ++i) {
800 WriteBits(2, mb.literal_context_modes[i], storage_ix, storage);
801 }
802 EncodeContextMap(mb.literal_context_map, mb.literal_histograms.size(), storage_ix, storage);
803 EncodeContextMap(mb.distance_context_map, mb.distance_histograms.size(), storage_ix, storage);
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200804 std::vector<EntropyCodeLiteral> literal_codes;
805 std::vector<EntropyCodeCommand> command_codes;
806 std::vector<EntropyCodeDistance> distance_codes;
807 BuildEntropyCodes(mb.literal_histograms, 256, &literal_codes);
808 BuildEntropyCodes(mb.command_histograms, kNumCommandPrefixes,
809 &command_codes);
810 BuildEntropyCodes(mb.distance_histograms, num_distance_codes,
811 &distance_codes);
812 StoreHuffmanCodes(literal_codes, 256, storage_ix, storage);
813 StoreHuffmanCodes(command_codes, kNumCommandPrefixes, storage_ix, storage);
814 StoreHuffmanCodes(distance_codes, num_distance_codes, storage_ix, storage);
815 BlockSplitIterator literal_it(mb.literal_split);
816 BlockSplitIterator command_it(mb.command_split);
817 BlockSplitIterator distance_it(mb.distance_split);
818 for (int i = 0; i < mb.cmds.size(); ++i) {
819 const Command& cmd = mb.cmds[i];
820 MoveAndEncode(command_split_code, &command_it, storage_ix, storage);
821 EncodeCommand(cmd, command_codes[command_it.type_], storage_ix, storage);
822 for (int j = 0; j < cmd.insert_length_; ++j) {
823 MoveAndEncode(literal_split_code, &literal_it, storage_ix, storage);
824 int histogram_idx = literal_it.type_;
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100825 uint8_t prev_byte = *pos > 0 ? ringbuffer[(*pos - 1) & mask] : 0;
826 uint8_t prev_byte2 = *pos > 1 ? ringbuffer[(*pos - 2) & mask] : 0;
827 int context = ((literal_it.type_ << kLiteralContextBits) +
828 Context(prev_byte, prev_byte2,
829 mb.literal_context_modes[literal_it.type_]));
830 histogram_idx = mb.literal_context_map[context];
831 EntropyEncode(ringbuffer[*pos & mask],
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200832 literal_codes[histogram_idx], storage_ix, storage);
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100833 ++(*pos);
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200834 }
835 if (*pos < end_pos && cmd.distance_prefix_ != 0xffff) {
836 MoveAndEncode(distance_split_code, &distance_it, storage_ix, storage);
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100837 int context = (distance_it.type_ << 2) +
Roderick Sheeter1cdcbd82013-11-19 14:32:56 -0800838 ((cmd.copy_length_code_ > 4) ? 3 : cmd.copy_length_code_ - 2);
839 int histogram_index = mb.distance_context_map[context];
840 size_t max_distance = std::min(*pos, (size_t)kMaxBackwardDistance);
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200841 EncodeCopyDistance(cmd, distance_codes[histogram_index],
842 storage_ix, storage);
843 }
844 *pos += cmd.copy_length_;
845 }
846}
847
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100848BrotliCompressor::BrotliCompressor()
Roderick Sheeter1cdcbd82013-11-19 14:32:56 -0800849 : window_bits_(kWindowBits),
850 hasher_(new Hasher),
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100851 dist_ringbuffer_idx_(0),
852 input_pos_(0),
853 ringbuffer_(kRingBufferBits, kMetaBlockSizeBits),
854 literal_cost_(1 << kRingBufferBits),
855 storage_ix_(0),
856 storage_(new uint8_t[2 << kMetaBlockSizeBits]) {
Zoltan Szabadka8d7081f2013-11-28 17:37:13 +0100857 dist_ringbuffer_[0] = 16;
858 dist_ringbuffer_[1] = 15;
859 dist_ringbuffer_[2] = 11;
860 dist_ringbuffer_[3] = 4;
Roderick Sheeter1cdcbd82013-11-19 14:32:56 -0800861 storage_[0] = 0;
Zoltan Szabadka2f268ad2014-02-17 14:25:36 +0100862 StoreDictionaryWordHashes();
Roderick Sheeter1cdcbd82013-11-19 14:32:56 -0800863}
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100864
865BrotliCompressor::~BrotliCompressor() {
866 delete hasher_;
867 delete[] storage_;
868}
869
Zoltan Szabadka2f268ad2014-02-17 14:25:36 +0100870void BrotliCompressor::StoreDictionaryWordHashes() {
871 for (int t = kNumTransforms - 1; t >= 0; --t) {
872 for (int i = kMaxDictionaryWordLength; i >= 3; --i) {
873 const int num_words = 1 << kBrotliDictionarySizeBitsByLength[i];
874 for (int j = num_words - 1; j >= 0; --j) {
875 int word_id = t * num_words + j;
876 std::string word = GetTransformedDictionaryWord(i, word_id);
877 if (word.size() >= 3) {
878 hasher_->Store(reinterpret_cast<const uint8_t*>(&word[0]),
879 (-1) * ((i << 20) + word_id + 1));
880 }
881 }
882 }
883 }
884}
885
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100886void BrotliCompressor::WriteStreamHeader() {
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100887 // Encode window size.
Roderick Sheeter1cdcbd82013-11-19 14:32:56 -0800888 if (window_bits_ == 16) {
889 WriteBits(1, 0, &storage_ix_, storage_);
890 } else {
891 WriteBits(1, 1, &storage_ix_, storage_);
892 WriteBits(3, window_bits_ - 17, &storage_ix_, storage_);
893 }
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100894}
895
896void BrotliCompressor::WriteMetaBlock(const size_t input_size,
897 const uint8_t* input_buffer,
Zoltan Szabadka60c24c02013-12-12 13:18:04 +0100898 const bool is_last,
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100899 size_t* encoded_size,
900 uint8_t* encoded_buffer) {
Zoltan Szabadka0454ab42014-02-14 15:04:23 +0100901 static const double kMinUTF8Ratio = 0.75;
902 bool utf8_mode = false;
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100903 std::vector<Command> commands;
Zoltan Szabadka60c24c02013-12-12 13:18:04 +0100904 if (input_size > 0) {
905 ringbuffer_.Write(input_buffer, input_size);
Zoltan Szabadka0454ab42014-02-14 15:04:23 +0100906 utf8_mode = IsMostlyUTF8(
907 &ringbuffer_.start()[input_pos_ & kRingBufferMask],
908 input_size, kMinUTF8Ratio);
909 if (utf8_mode) {
910 EstimateBitCostsForLiteralsUTF8(input_pos_, input_size,
911 kRingBufferMask, ringbuffer_.start(),
912 &literal_cost_[0]);
913 } else {
914 EstimateBitCostsForLiterals(input_pos_, input_size,
915 kRingBufferMask, ringbuffer_.start(),
916 &literal_cost_[0]);
917 }
Zoltan Szabadka60c24c02013-12-12 13:18:04 +0100918 CreateBackwardReferences(input_size, input_pos_,
919 ringbuffer_.start(),
920 &literal_cost_[0],
921 kRingBufferMask, kMaxBackwardDistance,
922 hasher_,
923 &commands);
924 ComputeDistanceShortCodes(&commands, dist_ringbuffer_,
925 &dist_ringbuffer_idx_);
926 }
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100927 EncodingParams params;
928 params.num_direct_distance_codes = 12;
929 params.distance_postfix_bits = 1;
930 params.literal_context_mode = CONTEXT_SIGNED;
Zoltan Szabadka60c24c02013-12-12 13:18:04 +0100931 const int storage_ix0 = storage_ix_;
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100932 MetaBlock mb;
933 BuildMetaBlock(params, commands, ringbuffer_.start(), input_pos_,
934 kRingBufferMask, &mb);
Zoltan Szabadka60c24c02013-12-12 13:18:04 +0100935 StoreMetaBlock(mb, is_last, ringbuffer_.start(), kRingBufferMask,
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100936 &input_pos_, &storage_ix_, storage_);
Zoltan Szabadka60c24c02013-12-12 13:18:04 +0100937 size_t output_size = is_last ? ((storage_ix_ + 7) >> 3) : (storage_ix_ >> 3);
938 if (input_size + 4 < output_size) {
939 storage_ix_ = storage_ix0;
940 storage_[storage_ix_ >> 3] &= (1 << (storage_ix_ & 7)) - 1;
941 EncodeMetaBlockLength(input_size, false, true, &storage_ix_, storage_);
942 size_t hdr_size = (storage_ix_ + 7) >> 3;
943 memcpy(encoded_buffer, storage_, hdr_size);
944 memcpy(encoded_buffer + hdr_size, input_buffer, input_size);
945 *encoded_size = hdr_size + input_size;
946 if (is_last) {
947 encoded_buffer[*encoded_size] = 0x3; // ISLAST, ISEMPTY
948 ++(*encoded_size);
949 }
950 storage_ix_ = 0;
951 storage_[0] = 0;
952 } else {
953 memcpy(encoded_buffer, storage_, output_size);
954 *encoded_size = output_size;
955 if (is_last) {
956 storage_ix_ = 0;
957 storage_[0] = 0;
958 } else {
959 storage_ix_ -= output_size << 3;
960 storage_[storage_ix_ >> 3] = storage_[output_size];
961 }
962 }
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100963}
964
965void BrotliCompressor::FinishStream(
966 size_t* encoded_size, uint8_t* encoded_buffer) {
Zoltan Szabadka60c24c02013-12-12 13:18:04 +0100967 WriteMetaBlock(0, NULL, true, encoded_size, encoded_buffer);
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100968}
969
970
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200971int BrotliCompressBuffer(size_t input_size,
972 const uint8_t* input_buffer,
973 size_t* encoded_size,
974 uint8_t* encoded_buffer) {
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200975 if (input_size == 0) {
Zoltan Szabadka8d7081f2013-11-28 17:37:13 +0100976 encoded_buffer[0] = 6;
977 *encoded_size = 1;
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200978 return 1;
979 }
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200980
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100981 BrotliCompressor compressor;
982 compressor.WriteStreamHeader();
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200983
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100984 const int max_block_size = 1 << kMetaBlockSizeBits;
985 size_t max_output_size = *encoded_size;
986 const uint8_t* input_end = input_buffer + input_size;
987 *encoded_size = 0;
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200988
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100989 while (input_buffer < input_end) {
990 int block_size = max_block_size;
Zoltan Szabadka60c24c02013-12-12 13:18:04 +0100991 bool is_last = false;
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100992 if (block_size >= input_end - input_buffer) {
993 block_size = input_end - input_buffer;
Zoltan Szabadka60c24c02013-12-12 13:18:04 +0100994 is_last = true;
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100995 }
996 size_t output_size = max_output_size;
Zoltan Szabadka60c24c02013-12-12 13:18:04 +0100997 compressor.WriteMetaBlock(block_size, input_buffer, is_last,
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100998 &output_size, &encoded_buffer[*encoded_size]);
999 input_buffer += block_size;
1000 *encoded_size += output_size;
1001 max_output_size -= output_size;
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +02001002 }
1003
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +02001004 return 1;
1005}
1006
1007} // namespace brotli