blob: ce09c54f346e76f9de2725528a06de6abb8e3a36 [file] [log] [blame]
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +02001// Copyright 2013 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Function to find backward reference copies.
16
17#include "./backward_references.h"
18
19#include <algorithm>
Zoltan Szabadka95ddb482015-06-29 14:20:25 +020020#include <limits>
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +020021#include <vector>
22
23#include "./command.h"
Zoltan Szabadka95ddb482015-06-29 14:20:25 +020024#include "./fast_log.h"
Zoltan Szabadka4c375662015-10-01 15:10:42 +020025#include "./literal_cost.h"
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +020026
27namespace brotli {
28
Zoltan Szabadka95ddb482015-06-29 14:20:25 +020029static const double kInfinity = std::numeric_limits<double>::infinity();
Zoltan Szabadkab3d37232015-06-12 16:25:41 +020030
31// Histogram based cost model for zopflification.
32class ZopfliCostModel {
33 public:
Zoltan Szabadka99aae452015-10-05 11:43:31 +020034 ZopfliCostModel() : min_cost_cmd_(kInfinity) {}
35
Zoltan Szabadkab3d37232015-06-12 16:25:41 +020036 void SetFromCommands(size_t num_bytes,
37 size_t position,
38 const uint8_t* ringbuffer,
39 size_t ringbuffer_mask,
40 const Command* commands,
41 int num_commands,
42 int last_insert_len) {
43 std::vector<int> histogram_literal(256, 0);
44 std::vector<int> histogram_cmd(kNumCommandPrefixes, 0);
45 std::vector<int> histogram_dist(kNumDistancePrefixes, 0);
46
47 size_t pos = position - last_insert_len;
48 for (int i = 0; i < num_commands; i++) {
49 int inslength = commands[i].insert_len_;
50 int copylength = commands[i].copy_len_;
51 int distcode = commands[i].dist_prefix_;
52 int cmdcode = commands[i].cmd_prefix_;
53
54 histogram_cmd[cmdcode]++;
55 if (cmdcode >= 128) histogram_dist[distcode]++;
56
57 for (int j = 0; j < inslength; j++) {
58 histogram_literal[ringbuffer[(pos + j) & ringbuffer_mask]]++;
59 }
60
61 pos += inslength + copylength;
62 }
63
64 std::vector<double> cost_literal;
65 Set(histogram_literal, &cost_literal);
66 Set(histogram_cmd, &cost_cmd_);
67 Set(histogram_dist, &cost_dist_);
68
Zoltan Szabadkab3d37232015-06-12 16:25:41 +020069 for (int i = 0; i < kNumCommandPrefixes; ++i) {
70 min_cost_cmd_ = std::min(min_cost_cmd_, cost_cmd_[i]);
71 }
72
73 literal_costs_.resize(num_bytes + 1);
74 literal_costs_[0] = 0.0;
75 for (int i = 0; i < num_bytes; ++i) {
76 literal_costs_[i + 1] = literal_costs_[i] +
77 cost_literal[ringbuffer[(position + i) & ringbuffer_mask]];
78 }
79 }
80
81 void SetFromLiteralCosts(size_t num_bytes,
82 size_t position,
Zoltan Szabadka4c375662015-10-01 15:10:42 +020083 const uint8_t* ringbuffer,
84 size_t ringbuffer_mask) {
Zoltan Szabadka754deae2015-10-01 17:08:59 +020085 std::vector<float> literal_cost(num_bytes + 1);
Zoltan Szabadka4c375662015-10-01 15:10:42 +020086 EstimateBitCostsForLiterals(position, num_bytes, ringbuffer_mask,
87 ringbuffer, &literal_cost[0]);
Zoltan Szabadkab3d37232015-06-12 16:25:41 +020088 literal_costs_.resize(num_bytes + 1);
89 literal_costs_[0] = 0.0;
Zoltan Szabadka4c375662015-10-01 15:10:42 +020090 for (int i = 0; i < num_bytes; ++i) {
91 literal_costs_[i + 1] = literal_costs_[i] + literal_cost[i];
Zoltan Szabadkab3d37232015-06-12 16:25:41 +020092 }
93 cost_cmd_.resize(kNumCommandPrefixes);
94 cost_dist_.resize(kNumDistancePrefixes);
95 for (int i = 0; i < kNumCommandPrefixes; ++i) {
96 cost_cmd_[i] = FastLog2(11 + i);
97 }
98 for (int i = 0; i < kNumDistancePrefixes; ++i) {
99 cost_dist_[i] = FastLog2(20 + i);
100 }
101 min_cost_cmd_ = FastLog2(11);
102 }
103
104 double GetCommandCost(
105 int dist_code, int length_code, int insert_length) const {
106 int inscode = GetInsertLengthCode(insert_length);
107 int copycode = GetCopyLengthCode(length_code);
108 uint16_t cmdcode = CombineLengthCodes(inscode, copycode, dist_code);
109 uint64_t insnumextra = insextra[inscode];
110 uint64_t copynumextra = copyextra[copycode];
111 uint16_t dist_symbol;
112 uint32_t distextra;
Lode Vandevenne6511d6b2015-08-28 16:09:23 +0200113 PrefixEncodeCopyDistance(dist_code, 0, 0, &dist_symbol, &distextra);
Zoltan Szabadkab3d37232015-06-12 16:25:41 +0200114 uint32_t distnumextra = distextra >> 24;
115
116 double result = insnumextra + copynumextra + distnumextra;
117 result += cost_cmd_[cmdcode];
118 if (cmdcode >= 128) result += cost_dist_[dist_symbol];
119 return result;
120 }
121
122 double GetLiteralCosts(int from, int to) const {
123 return literal_costs_[to] - literal_costs_[from];
124 }
125
126 double GetMinCostCmd() const {
127 return min_cost_cmd_;
128 }
129
130 private:
131 void Set(const std::vector<int>& histogram, std::vector<double>* cost) {
132 cost->resize(histogram.size());
Zoltan Szabadka95ddb482015-06-29 14:20:25 +0200133 int sum = 0;
Zoltan Szabadkab3d37232015-06-12 16:25:41 +0200134 for (size_t i = 0; i < histogram.size(); i++) {
135 sum += histogram[i];
136 }
Zoltan Szabadka95ddb482015-06-29 14:20:25 +0200137 double log2sum = FastLog2(sum);
Zoltan Szabadkab3d37232015-06-12 16:25:41 +0200138 for (size_t i = 0; i < histogram.size(); i++) {
139 if (histogram[i] == 0) {
Zoltan Szabadka95ddb482015-06-29 14:20:25 +0200140 (*cost)[i] = log2sum + 2;
Zoltan Szabadkab3d37232015-06-12 16:25:41 +0200141 continue;
142 }
143
144 // Shannon bits for this symbol.
Zoltan Szabadka95ddb482015-06-29 14:20:25 +0200145 (*cost)[i] = log2sum - FastLog2(histogram[i]);
Zoltan Szabadkab3d37232015-06-12 16:25:41 +0200146
147 // Cannot be coded with less than 1 bit
148 if ((*cost)[i] < 1) (*cost)[i] = 1;
149 }
150 }
151
152 std::vector<double> cost_cmd_; // The insert and copy length symbols.
153 std::vector<double> cost_dist_;
154 // Cumulative costs of literals per position in the stream.
155 std::vector<double> literal_costs_;
156 double min_cost_cmd_;
157};
158
159inline void SetDistanceCache(int distance,
160 int distance_code,
161 int max_distance,
162 const int* dist_cache,
163 int* result_dist_cache) {
164 if (distance <= max_distance && distance_code > 0) {
165 result_dist_cache[0] = distance;
166 memcpy(&result_dist_cache[1], dist_cache, 3 * sizeof(dist_cache[0]));
167 } else {
168 memcpy(result_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));
169 }
170}
171
172inline int ComputeDistanceCode(int distance,
173 int max_distance,
174 int quality,
175 const int* dist_cache) {
176 if (distance <= max_distance) {
177 if (distance == dist_cache[0]) {
178 return 0;
179 } else if (distance == dist_cache[1]) {
180 return 1;
181 } else if (distance == dist_cache[2]) {
182 return 2;
183 } else if (distance == dist_cache[3]) {
184 return 3;
185 } else if (quality > 3 && distance >= 6) {
186 for (int k = 4; k < kNumDistanceShortCodes; ++k) {
187 int idx = kDistanceCacheIndex[k];
188 int candidate = dist_cache[idx] + kDistanceCacheOffset[k];
189 static const int kLimits[16] = { 0, 0, 0, 0,
190 6, 6, 11, 11,
191 11, 11, 11, 11,
192 12, 12, 12, 12 };
193 if (distance == candidate && distance >= kLimits[k]) {
194 return k;
195 }
196 }
197 }
198 }
199 return distance + 15;
200}
201
202struct ZopfliNode {
203 ZopfliNode() : length(1),
204 distance(0),
205 distance_code(0),
206 length_code(0),
207 insert_length(0),
208 cost(kInfinity) {}
209
210 // best length to get up to this byte (not including this byte itself)
211 int length;
212 // distance associated with the length
213 int distance;
214 int distance_code;
215 int distance_cache[4];
216 // length code associated with the length - usually the same as length,
217 // except in case of length-changing dictionary transformation.
218 int length_code;
219 // number of literal inserts before this copy
220 int insert_length;
221 // smallest cost to get to this byte from the beginning, as found so far
222 double cost;
223};
224
225inline void UpdateZopfliNode(ZopfliNode* nodes, size_t pos, size_t start_pos,
226 int len, int len_code, int dist, int dist_code,
227 int max_dist, const int* dist_cache,
228 double cost) {
229 ZopfliNode& next = nodes[pos + len];
230 next.length = len;
231 next.length_code = len_code;
232 next.distance = dist;
233 next.distance_code = dist_code;
234 next.insert_length = pos - start_pos;
235 next.cost = cost;
236 SetDistanceCache(dist, dist_code, max_dist, dist_cache,
237 &next.distance_cache[0]);
238}
239
240// Maintains the smallest 2^k cost difference together with their positions
241class StartPosQueue {
242 public:
243 explicit StartPosQueue(int bits)
244 : mask_((1 << bits) - 1), q_(1 << bits), idx_(0) {}
245
246 void Clear() {
247 idx_ = 0;
248 }
249
250 void Push(size_t pos, double costdiff) {
Lode Vandevenne17ed2582015-08-10 13:13:58 +0200251 if (costdiff == kInfinity) {
252 // We can't start a command from an unreachable start position.
253 // E.g. position 1 in a stream is always unreachable, because all commands
254 // have a copy of at least length 2.
255 return;
256 }
Zoltan Szabadkab3d37232015-06-12 16:25:41 +0200257 q_[idx_ & mask_] = std::make_pair(pos, costdiff);
258 // Restore the sorted order.
259 for (int i = idx_; i > 0 && i > idx_ - mask_; --i) {
260 if (q_[i & mask_].second > q_[(i - 1) & mask_].second) {
261 std::swap(q_[i & mask_], q_[(i - 1) & mask_]);
262 }
263 }
264 ++idx_;
265 }
266
267 int size() const { return std::min<int>(idx_, mask_ + 1); }
268
269 size_t GetStartPos(int k) const {
270 return q_[(idx_ - k - 1) & mask_].first;
271 }
272
273 private:
274 const int mask_;
275 std::vector<std::pair<size_t, double> > q_;
276 int idx_;
277};
278
279// Returns the minimum possible copy length that can improve the cost of any
280// future position.
281int ComputeMinimumCopyLength(const StartPosQueue& queue,
282 const std::vector<ZopfliNode>& nodes,
283 const ZopfliCostModel& model,
284 size_t pos,
285 double min_cost_cmd) {
286 // Compute the minimum possible cost of reaching any future position.
287 const size_t start0 = queue.GetStartPos(0);
288 double min_cost = (nodes[start0].cost +
289 model.GetLiteralCosts(start0, pos) +
290 min_cost_cmd);
291 int len = 2;
292 int next_len_bucket = 4;
293 int next_len_offset = 10;
294 while (pos + len < nodes.size() && nodes[pos + len].cost <= min_cost) {
295 // We already reached (pos + len) with no more cost than the minimum
296 // possible cost of reaching anything from this pos, so there is no point in
297 // looking for lengths <= len.
298 ++len;
299 if (len == next_len_offset) {
300 // We reached the next copy length code bucket, so we add one more
301 // extra bit to the minimum cost.
302 min_cost += 1.0;
303 next_len_offset += next_len_bucket;
304 next_len_bucket *= 2;
305 }
306 }
307 return len;
308}
309
310void ZopfliIterate(size_t num_bytes,
311 size_t position,
312 const uint8_t* ringbuffer,
313 size_t ringbuffer_mask,
314 const size_t max_backward_limit,
Zoltan Szabadkab3d37232015-06-12 16:25:41 +0200315 const ZopfliCostModel& model,
316 const std::vector<int>& num_matches,
317 const std::vector<BackwardMatch>& matches,
318 int* dist_cache,
319 int* last_insert_len,
320 Command* commands,
321 int* num_commands,
322 int* num_literals) {
323 const Command * const orig_commands = commands;
324
325 std::vector<ZopfliNode> nodes(num_bytes + 1);
326 nodes[0].length = 0;
327 nodes[0].cost = 0;
328 memcpy(nodes[0].distance_cache, dist_cache, 4 * sizeof(dist_cache[0]));
329
330 StartPosQueue queue(3);
331 const double min_cost_cmd = model.GetMinCostCmd();
332
333 size_t cur_match_pos = 0;
334 for (size_t i = 0; i + 3 < num_bytes; i++) {
335 size_t cur_ix = position + i;
336 size_t cur_ix_masked = cur_ix & ringbuffer_mask;
337 size_t max_distance = std::min(cur_ix, max_backward_limit);
338 int max_length = num_bytes - i;
339
340 queue.Push(i, nodes[i].cost - model.GetLiteralCosts(0, i));
341
342 const int min_len = ComputeMinimumCopyLength(queue, nodes, model,
343 i, min_cost_cmd);
344
345 // Go over the command starting positions in order of increasing cost
346 // difference.
347 for (size_t k = 0; k < 5 && k < queue.size(); ++k) {
348 const size_t start = queue.GetStartPos(k);
349 const double start_costdiff =
350 nodes[start].cost - model.GetLiteralCosts(0, start);
351 const int* dist_cache2 = &nodes[start].distance_cache[0];
352
353 // Look for last distance matches using the distance cache from this
354 // starting position.
355 int best_len = min_len - 1;
356 for (int j = 0; j < kNumDistanceShortCodes; ++j) {
357 const int idx = kDistanceCacheIndex[j];
358 const int backward = dist_cache2[idx] + kDistanceCacheOffset[j];
359 size_t prev_ix = cur_ix - backward;
360 if (prev_ix >= cur_ix) {
361 continue;
362 }
363 if (PREDICT_FALSE(backward > max_distance)) {
364 continue;
365 }
366 prev_ix &= ringbuffer_mask;
367
368 if (cur_ix_masked + best_len > ringbuffer_mask ||
369 prev_ix + best_len > ringbuffer_mask ||
370 ringbuffer[cur_ix_masked + best_len] !=
371 ringbuffer[prev_ix + best_len]) {
372 continue;
373 }
374 const size_t len =
375 FindMatchLengthWithLimit(&ringbuffer[prev_ix],
376 &ringbuffer[cur_ix_masked],
377 max_length);
378 for (int l = best_len + 1; l <= len; ++l) {
379 double cmd_cost = model.GetCommandCost(j, l, i - start);
380 double cost = start_costdiff + cmd_cost + model.GetLiteralCosts(0, i);
381 if (cost < nodes[i + l].cost) {
382 UpdateZopfliNode(&nodes[0], i, start, l, l, backward, j,
383 max_distance, dist_cache2, cost);
384 }
385 best_len = l;
386 }
387 }
388
389 // At higher iterations look only for new last distance matches, since
390 // looking only for new command start positions with the same distances
391 // does not help much.
392 if (k >= 2) continue;
393
394 // Loop through all possible copy lengths at this position.
395 int len = min_len;
396 for (int j = 0; j < num_matches[i]; ++j) {
397 BackwardMatch match = matches[cur_match_pos + j];
398 int dist = match.distance;
399 bool is_dictionary_match = dist > max_distance;
400 // We already tried all possible last distance matches, so we can use
401 // normal distance code here.
402 int dist_code = dist + 15;
403 // Try all copy lengths up until the maximum copy length corresponding
404 // to this distance. If the distance refers to the static dictionary, or
405 // the maximum length is long enough, try only one maximum length.
406 int max_len = match.length();
407 if (len < max_len && (is_dictionary_match || max_len > kMaxZopfliLen)) {
408 len = max_len;
409 }
410 for (; len <= max_len; ++len) {
411 int len_code = is_dictionary_match ? match.length_code() : len;
412 double cmd_cost =
413 model.GetCommandCost(dist_code, len_code, i - start);
414 double cost = start_costdiff + cmd_cost + model.GetLiteralCosts(0, i);
415 if (cost < nodes[i + len].cost) {
416 UpdateZopfliNode(&nodes[0], i, start, len, len_code, dist,
417 dist_code, max_distance, dist_cache2, cost);
418 }
419 }
420 }
421 }
422
423 cur_match_pos += num_matches[i];
424
425 // The zopflification can be too slow in case of very long lengths, so in
426 // such case skip it all, it does not cost a lot of compression ratio.
427 if (num_matches[i] == 1 &&
428 matches[cur_match_pos - 1].length() > kMaxZopfliLen) {
429 i += matches[cur_match_pos - 1].length() - 1;
430 queue.Clear();
431 }
432 }
433
434 std::vector<int> backwards;
435 size_t index = num_bytes;
436 while (nodes[index].cost == kInfinity) --index;
437 while (index > 0) {
438 int len = nodes[index].length + nodes[index].insert_length;
439 backwards.push_back(len);
440 index -= len;
441 }
442
443 std::vector<int> path;
444 for (size_t i = backwards.size(); i > 0; i--) {
445 path.push_back(backwards[i - 1]);
446 }
447
448 size_t pos = 0;
449 for (size_t i = 0; i < path.size(); i++) {
450 const ZopfliNode& next = nodes[pos + path[i]];
451 int copy_length = next.length;
452 int insert_length = next.insert_length;
453 pos += insert_length;
454 if (i == 0) {
455 insert_length += *last_insert_len;
Eugene Klyuchnikovb58317a2015-09-01 12:18:41 +0200456 *last_insert_len = 0;
Zoltan Szabadkab3d37232015-06-12 16:25:41 +0200457 }
458 int distance = next.distance;
459 int len_code = next.length_code;
460 size_t max_distance = std::min(position + pos, max_backward_limit);
461 bool is_dictionary = (distance > max_distance);
462 int dist_code = next.distance_code;
463
464 Command cmd(insert_length, copy_length, len_code, dist_code);
465 *commands++ = cmd;
466
467 if (!is_dictionary && dist_code > 0) {
468 dist_cache[3] = dist_cache[2];
469 dist_cache[2] = dist_cache[1];
470 dist_cache[1] = dist_cache[0];
471 dist_cache[0] = distance;
472 }
473
474 *num_literals += insert_length;
475 insert_length = 0;
476 pos += copy_length;
477 }
Eugene Klyuchnikovb58317a2015-09-01 12:18:41 +0200478 *last_insert_len += num_bytes - pos;
Zoltan Szabadkab3d37232015-06-12 16:25:41 +0200479 *num_commands += (commands - orig_commands);
480}
481
482template<typename Hasher>
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100483void CreateBackwardReferences(size_t num_bytes,
484 size_t position,
485 const uint8_t* ringbuffer,
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100486 size_t ringbuffer_mask,
487 const size_t max_backward_limit,
Zoltan Szabadkab4f39bf2014-10-28 13:25:22 +0100488 const int quality,
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100489 Hasher* hasher,
Zoltan Szabadkab4f39bf2014-10-28 13:25:22 +0100490 int* dist_cache,
491 int* last_insert_len,
492 Command* commands,
Zoltan Szabadka0f726df2015-04-28 10:12:47 +0200493 int* num_commands,
494 int* num_literals) {
Zoltan Szabadkab4f39bf2014-10-28 13:25:22 +0100495 if (num_bytes >= 3 && position >= 3) {
496 // Prepare the hashes for three last bytes of the last write.
497 // These could not be calculated before, since they require knowledge
498 // of both the previous and the current block.
499 hasher->Store(&ringbuffer[(position - 3) & ringbuffer_mask],
500 position - 3);
501 hasher->Store(&ringbuffer[(position - 2) & ringbuffer_mask],
502 position - 2);
503 hasher->Store(&ringbuffer[(position - 1) & ringbuffer_mask],
504 position - 1);
505 }
506 const Command * const orig_commands = commands;
507 int insert_length = *last_insert_len;
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100508 size_t i = position & ringbuffer_mask;
Zoltan Szabadkaa89b57b2015-10-26 17:08:57 +0100509 const size_t i_diff = position - i;
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100510 const size_t i_end = i + num_bytes;
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200511
Zoltan Szabadkab4f39bf2014-10-28 13:25:22 +0100512 // For speed up heuristics for random data.
513 const int random_heuristics_window_size = quality < 9 ? 64 : 512;
Zoltan Szabadkae7650082014-03-20 14:32:35 +0100514 int apply_random_heuristics = i + random_heuristics_window_size;
515
Zoltan Szabadka618287b2015-06-12 16:50:49 +0200516 // Minimum score to accept a backward reference.
517 const int kMinScore = 4.0;
518
Lode Vandevenne6511d6b2015-08-28 16:09:23 +0200519 while (i + Hasher::kHashTypeLength - 1 < i_end) {
Zoltan Szabadkab4f39bf2014-10-28 13:25:22 +0100520 int max_length = i_end - i;
Roderick Sheeter1cdcbd82013-11-19 14:32:56 -0800521 size_t max_distance = std::min(i + i_diff, max_backward_limit);
Zoltan Szabadkab4f39bf2014-10-28 13:25:22 +0100522 int best_len = 0;
523 int best_len_code = 0;
524 int best_dist = 0;
Zoltan Szabadka618287b2015-06-12 16:50:49 +0200525 double best_score = kMinScore;
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200526 bool match_found = hasher->FindLongestMatch(
Zoltan Szabadkab4f39bf2014-10-28 13:25:22 +0100527 ringbuffer, ringbuffer_mask,
Zoltan Szabadkab4f39bf2014-10-28 13:25:22 +0100528 dist_cache, i + i_diff, max_length, max_distance,
529 &best_len, &best_len_code, &best_dist, &best_score);
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200530 if (match_found) {
Zoltan Szabadkab3d37232015-06-12 16:25:41 +0200531 // Found a match. Let's look for something even better ahead.
532 int delayed_backward_references_in_row = 0;
533 for (;;) {
534 --max_length;
535 int best_len_2 = quality < 5 ? std::min(best_len - 1, max_length) : 0;
536 int best_len_code_2 = 0;
537 int best_dist_2 = 0;
Zoltan Szabadka618287b2015-06-12 16:50:49 +0200538 double best_score_2 = kMinScore;
Zoltan Szabadkab3d37232015-06-12 16:25:41 +0200539 max_distance = std::min(i + i_diff + 1, max_backward_limit);
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100540 hasher->Store(ringbuffer + i, i + i_diff);
Zoltan Szabadkab3d37232015-06-12 16:25:41 +0200541 match_found = hasher->FindLongestMatch(
542 ringbuffer, ringbuffer_mask,
543 dist_cache, i + i_diff + 1, max_length, max_distance,
544 &best_len_2, &best_len_code_2, &best_dist_2, &best_score_2);
545 double cost_diff_lazy = 7.0;
546 if (match_found && best_score_2 >= best_score + cost_diff_lazy) {
547 // Ok, let's just write one byte for now and start a match from the
Zoltan Szabadka0454ab42014-02-14 15:04:23 +0100548 // next byte.
Zoltan Szabadkab3d37232015-06-12 16:25:41 +0200549 ++i;
550 ++insert_length;
551 best_len = best_len_2;
552 best_len_code = best_len_code_2;
553 best_dist = best_dist_2;
554 best_score = best_score_2;
555 if (++delayed_backward_references_in_row < 4) {
556 continue;
Zoltan Szabadka0454ab42014-02-14 15:04:23 +0100557 }
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200558 }
Zoltan Szabadkab3d37232015-06-12 16:25:41 +0200559 break;
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200560 }
Zoltan Szabadkae7650082014-03-20 14:32:35 +0100561 apply_random_heuristics =
562 i + 2 * best_len + random_heuristics_window_size;
Zoltan Szabadkab4f39bf2014-10-28 13:25:22 +0100563 max_distance = std::min(i + i_diff, max_backward_limit);
Zoltan Szabadka12eb9bf2015-05-07 17:40:00 +0200564 // The first 16 codes are special shortcodes, and the minimum offset is 1.
Zoltan Szabadkab3d37232015-06-12 16:25:41 +0200565 int distance_code =
566 ComputeDistanceCode(best_dist, max_distance, quality, dist_cache);
567 if (best_dist <= max_distance && distance_code > 0) {
568 dist_cache[3] = dist_cache[2];
569 dist_cache[2] = dist_cache[1];
570 dist_cache[1] = dist_cache[0];
571 dist_cache[0] = best_dist;
Zoltan Szabadkae7650082014-03-20 14:32:35 +0100572 }
Zoltan Szabadkab4f39bf2014-10-28 13:25:22 +0100573 Command cmd(insert_length, best_len, best_len_code, distance_code);
574 *commands++ = cmd;
Zoltan Szabadka0f726df2015-04-28 10:12:47 +0200575 *num_literals += insert_length;
Zoltan Szabadkab4f39bf2014-10-28 13:25:22 +0100576 insert_length = 0;
Zoltan Szabadkab3d37232015-06-12 16:25:41 +0200577 // Put the hash keys into the table, if there are enough
578 // bytes left.
579 for (int j = 1; j < best_len; ++j) {
580 hasher->Store(&ringbuffer[i + j], i + i_diff + j);
Zoltan Szabadka0454ab42014-02-14 15:04:23 +0100581 }
Zoltan Szabadkab3d37232015-06-12 16:25:41 +0200582 i += best_len;
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200583 } else {
584 ++insert_length;
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100585 hasher->Store(ringbuffer + i, i + i_diff);
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200586 ++i;
Zoltan Szabadkae7650082014-03-20 14:32:35 +0100587 // If we have not seen matches for a long time, we can skip some
588 // match lookups. Unsuccessful match lookups are very very expensive
589 // and this kind of a heuristic speeds up compression quite
590 // a lot.
591 if (i > apply_random_heuristics) {
592 // Going through uncompressible data, jump.
593 if (i > apply_random_heuristics + 4 * random_heuristics_window_size) {
594 // It is quite a long time since we saw a copy, so we assume
595 // that this data is not compressible, and store hashes less
596 // often. Hashes of non compressible data are less likely to
597 // turn out to be useful in the future, too, so we store less of
598 // them to not to flood out the hash table of good compressible
599 // data.
600 int i_jump = std::min(i + 16, i_end - 4);
601 for (; i < i_jump; i += 4) {
602 hasher->Store(ringbuffer + i, i + i_diff);
603 insert_length += 4;
604 }
605 } else {
Zoltan Szabadkab4f39bf2014-10-28 13:25:22 +0100606 int i_jump = std::min(i + 8, i_end - 3);
Zoltan Szabadkae7650082014-03-20 14:32:35 +0100607 for (; i < i_jump; i += 2) {
608 hasher->Store(ringbuffer + i, i + i_diff);
609 insert_length += 2;
610 }
611 }
612 }
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200613 }
614 }
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100615 insert_length += (i_end - i);
Zoltan Szabadkab4f39bf2014-10-28 13:25:22 +0100616 *last_insert_len = insert_length;
617 *num_commands += (commands - orig_commands);
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200618}
619
Zoltan Szabadkae7650082014-03-20 14:32:35 +0100620void CreateBackwardReferences(size_t num_bytes,
621 size_t position,
622 const uint8_t* ringbuffer,
Zoltan Szabadkae7650082014-03-20 14:32:35 +0100623 size_t ringbuffer_mask,
624 const size_t max_backward_limit,
Zoltan Szabadkab4f39bf2014-10-28 13:25:22 +0100625 const int quality,
Zoltan Szabadkae7650082014-03-20 14:32:35 +0100626 Hashers* hashers,
Zoltan Szabadkab4f39bf2014-10-28 13:25:22 +0100627 int hash_type,
628 int* dist_cache,
629 int* last_insert_len,
630 Command* commands,
Zoltan Szabadka0f726df2015-04-28 10:12:47 +0200631 int* num_commands,
632 int* num_literals) {
Zoltan Szabadkab3d37232015-06-12 16:25:41 +0200633 bool zopflify = quality > 9;
634 if (zopflify) {
Zoltan Szabadka4a7024d2015-10-01 12:08:14 +0200635 Hashers::H9* hasher = hashers->hash_h9;
Zoltan Szabadkab3d37232015-06-12 16:25:41 +0200636 if (num_bytes >= 3 && position >= 3) {
637 // Prepare the hashes for three last bytes of the last write.
638 // These could not be calculated before, since they require knowledge
639 // of both the previous and the current block.
640 hasher->Store(&ringbuffer[(position - 3) & ringbuffer_mask],
641 position - 3);
642 hasher->Store(&ringbuffer[(position - 2) & ringbuffer_mask],
643 position - 2);
644 hasher->Store(&ringbuffer[(position - 1) & ringbuffer_mask],
645 position - 1);
646 }
647 std::vector<int> num_matches(num_bytes);
648 std::vector<BackwardMatch> matches(3 * num_bytes);
649 size_t cur_match_pos = 0;
650 for (size_t i = 0; i + 3 < num_bytes; ++i) {
651 size_t max_distance = std::min(position + i, max_backward_limit);
652 int max_length = num_bytes - i;
653 // Ensure that we have at least kMaxZopfliLen free slots.
654 if (matches.size() < cur_match_pos + kMaxZopfliLen) {
655 matches.resize(cur_match_pos + kMaxZopfliLen);
656 }
657 hasher->FindAllMatches(
658 ringbuffer, ringbuffer_mask,
659 position + i, max_length, max_distance,
660 &num_matches[i], &matches[cur_match_pos]);
661 hasher->Store(&ringbuffer[(position + i) & ringbuffer_mask],
662 position + i);
663 cur_match_pos += num_matches[i];
664 if (num_matches[i] == 1) {
665 const int match_len = matches[cur_match_pos - 1].length();
666 if (match_len > kMaxZopfliLen) {
667 for (int j = 1; j < match_len; ++j) {
668 ++i;
669 hasher->Store(
670 &ringbuffer[(position + i) & ringbuffer_mask], position + i);
671 num_matches[i] = 0;
672 }
673 }
674 }
675 }
676 int orig_num_literals = *num_literals;
677 int orig_last_insert_len = *last_insert_len;
678 int orig_dist_cache[4] = {
679 dist_cache[0], dist_cache[1], dist_cache[2], dist_cache[3]
680 };
681 int orig_num_commands = *num_commands;
682 static const int kIterations = 2;
683 for (int i = 0; i < kIterations; i++) {
684 ZopfliCostModel model;
685 if (i == 0) {
686 model.SetFromLiteralCosts(num_bytes, position,
Zoltan Szabadka4c375662015-10-01 15:10:42 +0200687 ringbuffer, ringbuffer_mask);
Zoltan Szabadkab3d37232015-06-12 16:25:41 +0200688 } else {
689 model.SetFromCommands(num_bytes, position,
690 ringbuffer, ringbuffer_mask,
691 commands, *num_commands - orig_num_commands,
692 orig_last_insert_len);
693 }
694 *num_commands = orig_num_commands;
695 *num_literals = orig_num_literals;
696 *last_insert_len = orig_last_insert_len;
697 memcpy(dist_cache, orig_dist_cache, 4 * sizeof(dist_cache[0]));
698 ZopfliIterate(num_bytes, position, ringbuffer, ringbuffer_mask,
Zoltan Szabadka618287b2015-06-12 16:50:49 +0200699 max_backward_limit, model, num_matches, matches, dist_cache,
Zoltan Szabadkab3d37232015-06-12 16:25:41 +0200700 last_insert_len, commands, num_commands, num_literals);
701 }
702 return;
703 }
704
Zoltan Szabadkae7650082014-03-20 14:32:35 +0100705 switch (hash_type) {
Zoltan Szabadkab4f39bf2014-10-28 13:25:22 +0100706 case 1:
Zoltan Szabadkab3d37232015-06-12 16:25:41 +0200707 CreateBackwardReferences<Hashers::H1>(
Zoltan Szabadka618287b2015-06-12 16:50:49 +0200708 num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
Zoltan Szabadka4a7024d2015-10-01 12:08:14 +0200709 quality, hashers->hash_h1, dist_cache, last_insert_len,
Zoltan Szabadka0f726df2015-04-28 10:12:47 +0200710 commands, num_commands, num_literals);
Zoltan Szabadkae7650082014-03-20 14:32:35 +0100711 break;
Zoltan Szabadkab4f39bf2014-10-28 13:25:22 +0100712 case 2:
Zoltan Szabadkab3d37232015-06-12 16:25:41 +0200713 CreateBackwardReferences<Hashers::H2>(
Zoltan Szabadka618287b2015-06-12 16:50:49 +0200714 num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
Zoltan Szabadka4a7024d2015-10-01 12:08:14 +0200715 quality, hashers->hash_h2, dist_cache, last_insert_len,
Zoltan Szabadka0f726df2015-04-28 10:12:47 +0200716 commands, num_commands, num_literals);
Zoltan Szabadkab4f39bf2014-10-28 13:25:22 +0100717 break;
718 case 3:
Zoltan Szabadkab3d37232015-06-12 16:25:41 +0200719 CreateBackwardReferences<Hashers::H3>(
Zoltan Szabadka618287b2015-06-12 16:50:49 +0200720 num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
Zoltan Szabadka4a7024d2015-10-01 12:08:14 +0200721 quality, hashers->hash_h3, dist_cache, last_insert_len,
Zoltan Szabadka0f726df2015-04-28 10:12:47 +0200722 commands, num_commands, num_literals);
Zoltan Szabadkab4f39bf2014-10-28 13:25:22 +0100723 break;
724 case 4:
Zoltan Szabadkab3d37232015-06-12 16:25:41 +0200725 CreateBackwardReferences<Hashers::H4>(
Zoltan Szabadka618287b2015-06-12 16:50:49 +0200726 num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
Zoltan Szabadka4a7024d2015-10-01 12:08:14 +0200727 quality, hashers->hash_h4, dist_cache, last_insert_len,
Zoltan Szabadka0f726df2015-04-28 10:12:47 +0200728 commands, num_commands, num_literals);
Zoltan Szabadkab4f39bf2014-10-28 13:25:22 +0100729 break;
730 case 5:
Zoltan Szabadkab3d37232015-06-12 16:25:41 +0200731 CreateBackwardReferences<Hashers::H5>(
Zoltan Szabadka618287b2015-06-12 16:50:49 +0200732 num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
Zoltan Szabadka4a7024d2015-10-01 12:08:14 +0200733 quality, hashers->hash_h5, dist_cache, last_insert_len,
Zoltan Szabadka0f726df2015-04-28 10:12:47 +0200734 commands, num_commands, num_literals);
Zoltan Szabadkab4f39bf2014-10-28 13:25:22 +0100735 break;
736 case 6:
Zoltan Szabadkab3d37232015-06-12 16:25:41 +0200737 CreateBackwardReferences<Hashers::H6>(
Zoltan Szabadka618287b2015-06-12 16:50:49 +0200738 num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
Zoltan Szabadka4a7024d2015-10-01 12:08:14 +0200739 quality, hashers->hash_h6, dist_cache, last_insert_len,
Zoltan Szabadka0f726df2015-04-28 10:12:47 +0200740 commands, num_commands, num_literals);
Zoltan Szabadkab4f39bf2014-10-28 13:25:22 +0100741 break;
742 case 7:
Zoltan Szabadkab3d37232015-06-12 16:25:41 +0200743 CreateBackwardReferences<Hashers::H7>(
Zoltan Szabadka618287b2015-06-12 16:50:49 +0200744 num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
Zoltan Szabadka4a7024d2015-10-01 12:08:14 +0200745 quality, hashers->hash_h7, dist_cache, last_insert_len,
Zoltan Szabadka0f726df2015-04-28 10:12:47 +0200746 commands, num_commands, num_literals);
Zoltan Szabadkab4f39bf2014-10-28 13:25:22 +0100747 break;
748 case 8:
Zoltan Szabadkab3d37232015-06-12 16:25:41 +0200749 CreateBackwardReferences<Hashers::H8>(
Zoltan Szabadka618287b2015-06-12 16:50:49 +0200750 num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
Zoltan Szabadka4a7024d2015-10-01 12:08:14 +0200751 quality, hashers->hash_h8, dist_cache, last_insert_len,
Zoltan Szabadka0f726df2015-04-28 10:12:47 +0200752 commands, num_commands, num_literals);
Zoltan Szabadkab4f39bf2014-10-28 13:25:22 +0100753 break;
754 case 9:
Zoltan Szabadkab3d37232015-06-12 16:25:41 +0200755 CreateBackwardReferences<Hashers::H9>(
Zoltan Szabadka618287b2015-06-12 16:50:49 +0200756 num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
Zoltan Szabadka4a7024d2015-10-01 12:08:14 +0200757 quality, hashers->hash_h9, dist_cache, last_insert_len,
Zoltan Szabadka0f726df2015-04-28 10:12:47 +0200758 commands, num_commands, num_literals);
Zoltan Szabadkae7650082014-03-20 14:32:35 +0100759 break;
760 default:
761 break;
762 }
763}
764
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200765} // namespace brotli