Eugene Klyuchnikov | 771eb10 | 2015-11-27 11:27:11 +0100 | [diff] [blame] | 1 | /* Copyright 2010 Google Inc. All Rights Reserved. |
| 2 | |
Eugene Klyuchnikov | 24ffa78 | 2015-12-11 11:11:51 +0100 | [diff] [blame] | 3 | Distributed under MIT license. |
Eugene Klyuchnikov | 771eb10 | 2015-11-27 11:27:11 +0100 | [diff] [blame] | 4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
| 5 | */ |
| 6 | |
Eugene Kliuchnikov | 352b0b2 | 2016-06-03 11:19:23 +0200 | [diff] [blame] | 7 | /* A (forgetful) hash table to the data seen by the compressor, to |
| 8 | help create backward references to previous data. */ |
Zoltan Szabadka | c66e4e3 | 2013-10-23 13:06:13 +0200 | [diff] [blame] | 9 | |
| 10 | #ifndef BROTLI_ENC_HASH_H_ |
| 11 | #define BROTLI_ENC_HASH_H_ |
| 12 | |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 13 | #include <string.h> /* memcmp, memset */ |
Eugene Kliuchnikov | 0282918 | 2016-06-03 10:51:04 +0200 | [diff] [blame] | 14 | |
Eugene Kliuchnikov | 2048189 | 2016-07-26 14:41:59 +0200 | [diff] [blame] | 15 | #include "../common/constants.h" |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 16 | #include "../common/dictionary.h" |
Eugene Kliuchnikov | da254cf | 2017-12-12 14:33:12 +0100 | [diff] [blame] | 17 | #include "../common/platform.h" |
Eugene Kliuchnikov | 8148001 | 2016-08-23 14:40:33 +0200 | [diff] [blame] | 18 | #include <brotli/types.h> |
Eugene Kliuchnikov | 35e69fc | 2018-02-26 09:04:36 -0500 | [diff] [blame^] | 19 | #include "./encoder_dict.h" |
Zoltan Szabadka | c66e4e3 | 2013-10-23 13:06:13 +0200 | [diff] [blame] | 20 | #include "./fast_log.h" |
| 21 | #include "./find_match_length.h" |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 22 | #include "./memory.h" |
Eugene Kliuchnikov | 2048189 | 2016-07-26 14:41:59 +0200 | [diff] [blame] | 23 | #include "./quality.h" |
Zoltan Szabadka | e765008 | 2014-03-20 14:32:35 +0100 | [diff] [blame] | 24 | #include "./static_dict.h" |
Zoltan Szabadka | c66e4e3 | 2013-10-23 13:06:13 +0200 | [diff] [blame] | 25 | |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 26 | #if defined(__cplusplus) || defined(c_plusplus) |
| 27 | extern "C" { |
| 28 | #endif |
Zoltan Szabadka | c66e4e3 | 2013-10-23 13:06:13 +0200 | [diff] [blame] | 29 | |
Eugene Kliuchnikov | cdca91b | 2017-03-06 14:22:45 +0100 | [diff] [blame] | 30 | /* Pointer to hasher data. |
| 31 | * |
| 32 | * Excluding initialization and destruction, hasher can be passed as |
| 33 | * HasherHandle by value. |
| 34 | * |
| 35 | * Typically hasher data consists of 3 sections: |
| 36 | * * HasherCommon structure |
| 37 | * * private structured hasher data, depending on hasher type |
| 38 | * * private dynamic hasher data, depending on hasher type and parameters |
| 39 | */ |
| 40 | typedef uint8_t* HasherHandle; |
| 41 | |
| 42 | typedef struct { |
| 43 | BrotliHasherParams params; |
| 44 | |
| 45 | /* False if hasher needs to be "prepared" before use. */ |
| 46 | BROTLI_BOOL is_prepared_; |
| 47 | |
| 48 | size_t dict_num_lookups; |
| 49 | size_t dict_num_matches; |
| 50 | } HasherCommon; |
| 51 | |
| 52 | static BROTLI_INLINE HasherCommon* GetHasherCommon(HasherHandle handle) { |
| 53 | return (HasherCommon*)handle; |
| 54 | } |
| 55 | |
Eugene Kliuchnikov | 2048189 | 2016-07-26 14:41:59 +0200 | [diff] [blame] | 56 | #define score_t size_t |
Zoltan Szabadka | dbb53e6 | 2016-01-27 09:50:39 +0100 | [diff] [blame] | 57 | |
Zoltan Szabadka | 8844b7f | 2016-01-07 16:27:49 +0100 | [diff] [blame] | 58 | static const uint32_t kCutoffTransformsCount = 10; |
Eugene Kliuchnikov | cdca91b | 2017-03-06 14:22:45 +0100 | [diff] [blame] | 59 | /* 0, 12, 27, 23, 42, 63, 56, 48, 59, 64 */ |
| 60 | /* 0+0, 4+8, 8+19, 12+11, 16+26, 20+43, 24+32, 28+20, 32+27, 36+28 */ |
| 61 | static const uint64_t kCutoffTransforms = |
| 62 | BROTLI_MAKE_UINT64_T(0x071B520A, 0xDA2D3200); |
Lode Vandevenne | 17ed258 | 2015-08-10 13:13:58 +0200 | [diff] [blame] | 63 | |
Eugene Kliuchnikov | 2048189 | 2016-07-26 14:41:59 +0200 | [diff] [blame] | 64 | typedef struct HasherSearchResult { |
| 65 | size_t len; |
Eugene Kliuchnikov | 2048189 | 2016-07-26 14:41:59 +0200 | [diff] [blame] | 66 | size_t distance; |
| 67 | score_t score; |
Eugene Kliuchnikov | 5244106 | 2017-07-21 10:07:24 +0200 | [diff] [blame] | 68 | int len_code_delta; /* == len_code - len */ |
Eugene Kliuchnikov | 2048189 | 2016-07-26 14:41:59 +0200 | [diff] [blame] | 69 | } HasherSearchResult; |
| 70 | |
Eugene Kliuchnikov | 352b0b2 | 2016-06-03 11:19:23 +0200 | [diff] [blame] | 71 | /* kHashMul32 multiplier has these properties: |
| 72 | * The multiplier must be odd. Otherwise we may lose the highest bit. |
Eugene Kliuchnikov | e9b278a | 2016-10-31 14:33:59 +0100 | [diff] [blame] | 73 | * No long streaks of ones or zeros. |
Eugene Kliuchnikov | 352b0b2 | 2016-06-03 11:19:23 +0200 | [diff] [blame] | 74 | * There is no effort to ensure that it is a prime, the oddity is enough |
| 75 | for this use. |
| 76 | * The number has been tuned heuristically against compression benchmarks. */ |
Eugene Kliuchnikov | 35e69fc | 2018-02-26 09:04:36 -0500 | [diff] [blame^] | 77 | static const uint32_t kHashMul32 = 0x1E35A7BD; |
| 78 | static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1E35A7BD, 0x1E35A7BD); |
Eugene Kliuchnikov | cdca91b | 2017-03-06 14:22:45 +0100 | [diff] [blame] | 79 | static const uint64_t kHashMul64Long = |
Eugene Kliuchnikov | 35e69fc | 2018-02-26 09:04:36 -0500 | [diff] [blame^] | 80 | BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u); |
Zoltan Szabadka | c66e4e3 | 2013-10-23 13:06:13 +0200 | [diff] [blame] | 81 | |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 82 | static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) { |
Eugene Kliuchnikov | da254cf | 2017-12-12 14:33:12 +0100 | [diff] [blame] | 83 | uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32; |
Eugene Kliuchnikov | 352b0b2 | 2016-06-03 11:19:23 +0200 | [diff] [blame] | 84 | /* The higher bits contain more mixture from the multiplication, |
| 85 | so we take our results from there. */ |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 86 | return h >> (32 - 14); |
Zoltan Szabadka | c66e4e3 | 2013-10-23 13:06:13 +0200 | [diff] [blame] | 87 | } |
| 88 | |
Eugene Kliuchnikov | cdca91b | 2017-03-06 14:22:45 +0100 | [diff] [blame] | 89 | static BROTLI_INLINE void PrepareDistanceCache( |
| 90 | int* BROTLI_RESTRICT distance_cache, const int num_distances) { |
| 91 | if (num_distances > 4) { |
| 92 | int last_distance = distance_cache[0]; |
| 93 | distance_cache[4] = last_distance - 1; |
| 94 | distance_cache[5] = last_distance + 1; |
| 95 | distance_cache[6] = last_distance - 2; |
| 96 | distance_cache[7] = last_distance + 2; |
| 97 | distance_cache[8] = last_distance - 3; |
| 98 | distance_cache[9] = last_distance + 3; |
| 99 | if (num_distances > 10) { |
| 100 | int next_last_distance = distance_cache[1]; |
| 101 | distance_cache[10] = next_last_distance - 1; |
| 102 | distance_cache[11] = next_last_distance + 1; |
| 103 | distance_cache[12] = next_last_distance - 2; |
| 104 | distance_cache[13] = next_last_distance + 2; |
| 105 | distance_cache[14] = next_last_distance - 3; |
| 106 | distance_cache[15] = next_last_distance + 3; |
| 107 | } |
| 108 | } |
| 109 | } |
| 110 | |
| 111 | #define BROTLI_LITERAL_BYTE_SCORE 135 |
| 112 | #define BROTLI_DISTANCE_BIT_PENALTY 30 |
Eugene Kliuchnikov | 2048189 | 2016-07-26 14:41:59 +0200 | [diff] [blame] | 113 | /* Score must be positive after applying maximal penalty. */ |
| 114 | #define BROTLI_SCORE_BASE (BROTLI_DISTANCE_BIT_PENALTY * 8 * sizeof(size_t)) |
| 115 | |
Eugene Kliuchnikov | 352b0b2 | 2016-06-03 11:19:23 +0200 | [diff] [blame] | 116 | /* Usually, we always choose the longest backward reference. This function |
| 117 | allows for the exception of that rule. |
| 118 | |
| 119 | If we choose a backward reference that is further away, it will |
| 120 | usually be coded with more bits. We approximate this by assuming |
| 121 | log2(distance). If the distance can be expressed in terms of the |
| 122 | last four distances, we use some heuristic constants to estimate |
| 123 | the bits cost. For the first up to four literals we use the bit |
| 124 | cost of the literals from the literal cost model, after that we |
| 125 | use the average bit cost of the cost model. |
| 126 | |
| 127 | This function is used to sometimes discard a longer backward reference |
| 128 | when it is not much longer and the bit cost for encoding it is more |
| 129 | than the saved literals. |
| 130 | |
| 131 | backward_reference_offset MUST be positive. */ |
Eugene Kliuchnikov | 2048189 | 2016-07-26 14:41:59 +0200 | [diff] [blame] | 132 | static BROTLI_INLINE score_t BackwardReferenceScore( |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 133 | size_t copy_length, size_t backward_reference_offset) { |
Eugene Kliuchnikov | 2048189 | 2016-07-26 14:41:59 +0200 | [diff] [blame] | 134 | return BROTLI_SCORE_BASE + BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length - |
| 135 | BROTLI_DISTANCE_BIT_PENALTY * Log2FloorNonZero(backward_reference_offset); |
Zoltan Szabadka | e765008 | 2014-03-20 14:32:35 +0100 | [diff] [blame] | 136 | } |
| 137 | |
Eugene Kliuchnikov | 2048189 | 2016-07-26 14:41:59 +0200 | [diff] [blame] | 138 | static BROTLI_INLINE score_t BackwardReferenceScoreUsingLastDistance( |
Eugene Kliuchnikov | cdca91b | 2017-03-06 14:22:45 +0100 | [diff] [blame] | 139 | size_t copy_length) { |
Eugene Kliuchnikov | 2048189 | 2016-07-26 14:41:59 +0200 | [diff] [blame] | 140 | return BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length + |
Eugene Kliuchnikov | cdca91b | 2017-03-06 14:22:45 +0100 | [diff] [blame] | 141 | BROTLI_SCORE_BASE + 15; |
Eugene Kliuchnikov | 2048189 | 2016-07-26 14:41:59 +0200 | [diff] [blame] | 142 | } |
| 143 | |
Eugene Kliuchnikov | cdca91b | 2017-03-06 14:22:45 +0100 | [diff] [blame] | 144 | static BROTLI_INLINE score_t BackwardReferencePenaltyUsingLastDistance( |
| 145 | size_t distance_short_code) { |
| 146 | return (score_t)39 + ((0x1CA10 >> (distance_short_code & 0xE)) & 0xE); |
Eugene Kliuchnikov | 2048189 | 2016-07-26 14:41:59 +0200 | [diff] [blame] | 147 | } |
| 148 | |
| 149 | static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem( |
Eugene Kliuchnikov | 35e69fc | 2018-02-26 09:04:36 -0500 | [diff] [blame^] | 150 | const BrotliEncoderDictionary* dictionary, size_t item, const uint8_t* data, |
| 151 | size_t max_length, size_t max_backward, size_t max_distance, |
| 152 | HasherSearchResult* out) { |
Eugene Kliuchnikov | 2048189 | 2016-07-26 14:41:59 +0200 | [diff] [blame] | 153 | size_t len; |
| 154 | size_t dist; |
| 155 | size_t offset; |
| 156 | size_t matchlen; |
| 157 | size_t backward; |
| 158 | score_t score; |
Eugene Kliuchnikov | 8d3fdc1 | 2017-01-26 11:32:18 +0100 | [diff] [blame] | 159 | len = item & 0x1F; |
Eugene Kliuchnikov | 2048189 | 2016-07-26 14:41:59 +0200 | [diff] [blame] | 160 | dist = item >> 5; |
Eugene Kliuchnikov | 35e69fc | 2018-02-26 09:04:36 -0500 | [diff] [blame^] | 161 | offset = dictionary->words->offsets_by_length[len] + len * dist; |
Eugene Kliuchnikov | 2048189 | 2016-07-26 14:41:59 +0200 | [diff] [blame] | 162 | if (len > max_length) { |
| 163 | return BROTLI_FALSE; |
| 164 | } |
| 165 | |
Eugene Kliuchnikov | cdca91b | 2017-03-06 14:22:45 +0100 | [diff] [blame] | 166 | matchlen = |
Eugene Kliuchnikov | 35e69fc | 2018-02-26 09:04:36 -0500 | [diff] [blame^] | 167 | FindMatchLengthWithLimit(data, &dictionary->words->data[offset], len); |
| 168 | if (matchlen + dictionary->cutoffTransformsCount <= len || matchlen == 0) { |
Eugene Kliuchnikov | 2048189 | 2016-07-26 14:41:59 +0200 | [diff] [blame] | 169 | return BROTLI_FALSE; |
| 170 | } |
| 171 | { |
Eugene Kliuchnikov | cdca91b | 2017-03-06 14:22:45 +0100 | [diff] [blame] | 172 | size_t cut = len - matchlen; |
Eugene Kliuchnikov | 35e69fc | 2018-02-26 09:04:36 -0500 | [diff] [blame^] | 173 | size_t transform_id = (cut << 2) + |
| 174 | (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F); |
Eugene Kliuchnikov | 2048189 | 2016-07-26 14:41:59 +0200 | [diff] [blame] | 175 | backward = max_backward + dist + 1 + |
Eugene Kliuchnikov | 35e69fc | 2018-02-26 09:04:36 -0500 | [diff] [blame^] | 176 | (transform_id << dictionary->words->size_bits_by_length[len]); |
Eugene Kliuchnikov | 2048189 | 2016-07-26 14:41:59 +0200 | [diff] [blame] | 177 | } |
Eugene Kliuchnikov | 35e69fc | 2018-02-26 09:04:36 -0500 | [diff] [blame^] | 178 | if (backward > max_distance) { |
Eugene Kliuchnikov | d63e8f7 | 2017-08-04 10:02:56 +0200 | [diff] [blame] | 179 | return BROTLI_FALSE; |
| 180 | } |
Eugene Kliuchnikov | 2048189 | 2016-07-26 14:41:59 +0200 | [diff] [blame] | 181 | score = BackwardReferenceScore(matchlen, backward); |
| 182 | if (score < out->score) { |
| 183 | return BROTLI_FALSE; |
| 184 | } |
| 185 | out->len = matchlen; |
Eugene Kliuchnikov | 5244106 | 2017-07-21 10:07:24 +0200 | [diff] [blame] | 186 | out->len_code_delta = (int)len - (int)matchlen; |
Eugene Kliuchnikov | 2048189 | 2016-07-26 14:41:59 +0200 | [diff] [blame] | 187 | out->distance = backward; |
| 188 | out->score = score; |
| 189 | return BROTLI_TRUE; |
| 190 | } |
| 191 | |
Eugene Kliuchnikov | 5244106 | 2017-07-21 10:07:24 +0200 | [diff] [blame] | 192 | static BROTLI_INLINE void SearchInStaticDictionary( |
Eugene Kliuchnikov | 35e69fc | 2018-02-26 09:04:36 -0500 | [diff] [blame^] | 193 | const BrotliEncoderDictionary* dictionary, |
Eugene Kliuchnikov | cdca91b | 2017-03-06 14:22:45 +0100 | [diff] [blame] | 194 | HasherHandle handle, const uint8_t* data, size_t max_length, |
Eugene Kliuchnikov | 35e69fc | 2018-02-26 09:04:36 -0500 | [diff] [blame^] | 195 | size_t max_backward, size_t max_distance, |
| 196 | HasherSearchResult* out, BROTLI_BOOL shallow) { |
Eugene Kliuchnikov | 2048189 | 2016-07-26 14:41:59 +0200 | [diff] [blame] | 197 | size_t key; |
| 198 | size_t i; |
Eugene Kliuchnikov | cdca91b | 2017-03-06 14:22:45 +0100 | [diff] [blame] | 199 | HasherCommon* self = GetHasherCommon(handle); |
| 200 | if (self->dict_num_matches < (self->dict_num_lookups >> 7)) { |
Eugene Kliuchnikov | 5244106 | 2017-07-21 10:07:24 +0200 | [diff] [blame] | 201 | return; |
Eugene Kliuchnikov | 2048189 | 2016-07-26 14:41:59 +0200 | [diff] [blame] | 202 | } |
| 203 | key = Hash14(data) << 1; |
Eugene Kliuchnikov | 0a63f99 | 2016-09-21 17:20:36 +0200 | [diff] [blame] | 204 | for (i = 0; i < (shallow ? 1u : 2u); ++i, ++key) { |
Eugene Kliuchnikov | 35e69fc | 2018-02-26 09:04:36 -0500 | [diff] [blame^] | 205 | size_t item = dictionary->hash_table[key]; |
Eugene Kliuchnikov | cdca91b | 2017-03-06 14:22:45 +0100 | [diff] [blame] | 206 | self->dict_num_lookups++; |
| 207 | if (item != 0) { |
| 208 | BROTLI_BOOL item_matches = TestStaticDictionaryItem( |
Eugene Kliuchnikov | 35e69fc | 2018-02-26 09:04:36 -0500 | [diff] [blame^] | 209 | dictionary, item, data, max_length, max_backward, max_distance, out); |
Eugene Kliuchnikov | cdca91b | 2017-03-06 14:22:45 +0100 | [diff] [blame] | 210 | if (item_matches) { |
| 211 | self->dict_num_matches++; |
Eugene Kliuchnikov | cdca91b | 2017-03-06 14:22:45 +0100 | [diff] [blame] | 212 | } |
Eugene Kliuchnikov | 2048189 | 2016-07-26 14:41:59 +0200 | [diff] [blame] | 213 | } |
| 214 | } |
Zoltan Szabadka | c66e4e3 | 2013-10-23 13:06:13 +0200 | [diff] [blame] | 215 | } |
| 216 | |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 217 | typedef struct BackwardMatch { |
Zoltan Szabadka | 8844b7f | 2016-01-07 16:27:49 +0100 | [diff] [blame] | 218 | uint32_t distance; |
| 219 | uint32_t length_and_code; |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 220 | } BackwardMatch; |
Zoltan Szabadka | b3d3723 | 2015-06-12 16:25:41 +0200 | [diff] [blame] | 221 | |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 222 | static BROTLI_INLINE void InitBackwardMatch(BackwardMatch* self, |
| 223 | size_t dist, size_t len) { |
| 224 | self->distance = (uint32_t)dist; |
| 225 | self->length_and_code = (uint32_t)(len << 5); |
| 226 | } |
Zoltan Szabadka | b4f39bf | 2014-10-28 13:25:22 +0100 | [diff] [blame] | 227 | |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 228 | static BROTLI_INLINE void InitDictionaryBackwardMatch(BackwardMatch* self, |
| 229 | size_t dist, size_t len, size_t len_code) { |
| 230 | self->distance = (uint32_t)dist; |
| 231 | self->length_and_code = |
| 232 | (uint32_t)((len << 5) | (len == len_code ? 0 : len_code)); |
| 233 | } |
| 234 | |
| 235 | static BROTLI_INLINE size_t BackwardMatchLength(const BackwardMatch* self) { |
| 236 | return self->length_and_code >> 5; |
| 237 | } |
| 238 | |
| 239 | static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) { |
| 240 | size_t code = self->length_and_code & 31; |
| 241 | return code ? code : BackwardMatchLength(self); |
| 242 | } |
| 243 | |
| 244 | #define EXPAND_CAT(a, b) CAT(a, b) |
| 245 | #define CAT(a, b) a ## b |
| 246 | #define FN(X) EXPAND_CAT(X, HASHER()) |
| 247 | |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 248 | #define HASHER() H10 |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 249 | #define BUCKET_BITS 17 |
Eugene Kliuchnikov | 11df843 | 2017-02-06 14:20:43 +0100 | [diff] [blame] | 250 | #define MAX_TREE_SEARCH_DEPTH 64 |
| 251 | #define MAX_TREE_COMP_LENGTH 128 |
| 252 | #include "./hash_to_binary_tree_inc.h" /* NOLINT(build/include) */ |
| 253 | #undef MAX_TREE_SEARCH_DEPTH |
| 254 | #undef MAX_TREE_COMP_LENGTH |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 255 | #undef BUCKET_BITS |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 256 | #undef HASHER |
Eugene Kliuchnikov | 11df843 | 2017-02-06 14:20:43 +0100 | [diff] [blame] | 257 | /* MAX_NUM_MATCHES == 64 + MAX_TREE_SEARCH_DEPTH */ |
| 258 | #define MAX_NUM_MATCHES_H10 128 |
Zoltan Szabadka | dbb53e6 | 2016-01-27 09:50:39 +0100 | [diff] [blame] | 259 | |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 260 | /* For BUCKET_SWEEP == 1, enabling the dictionary lookup makes compression |
| 261 | a little faster (0.5% - 1%) and it compresses 0.15% better on small text |
Eugene Kliuchnikov | e9b278a | 2016-10-31 14:33:59 +0100 | [diff] [blame] | 262 | and HTML inputs. */ |
Zoltan Szabadka | b820c39 | 2016-03-15 10:50:16 +0100 | [diff] [blame] | 263 | |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 264 | #define HASHER() H2 |
| 265 | #define BUCKET_BITS 16 |
| 266 | #define BUCKET_SWEEP 1 |
Eugene Kliuchnikov | 11df843 | 2017-02-06 14:20:43 +0100 | [diff] [blame] | 267 | #define HASH_LEN 5 |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 268 | #define USE_DICTIONARY 1 |
| 269 | #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */ |
| 270 | #undef BUCKET_SWEEP |
| 271 | #undef USE_DICTIONARY |
| 272 | #undef HASHER |
Zoltan Szabadka | dbb53e6 | 2016-01-27 09:50:39 +0100 | [diff] [blame] | 273 | |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 274 | #define HASHER() H3 |
| 275 | #define BUCKET_SWEEP 2 |
| 276 | #define USE_DICTIONARY 0 |
| 277 | #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */ |
| 278 | #undef USE_DICTIONARY |
| 279 | #undef BUCKET_SWEEP |
| 280 | #undef BUCKET_BITS |
| 281 | #undef HASHER |
Zoltan Szabadka | dbb53e6 | 2016-01-27 09:50:39 +0100 | [diff] [blame] | 282 | |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 283 | #define HASHER() H4 |
| 284 | #define BUCKET_BITS 17 |
| 285 | #define BUCKET_SWEEP 4 |
| 286 | #define USE_DICTIONARY 1 |
| 287 | #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */ |
| 288 | #undef USE_DICTIONARY |
Eugene Kliuchnikov | 11df843 | 2017-02-06 14:20:43 +0100 | [diff] [blame] | 289 | #undef HASH_LEN |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 290 | #undef BUCKET_SWEEP |
| 291 | #undef BUCKET_BITS |
| 292 | #undef HASHER |
Zoltan Szabadka | dbb53e6 | 2016-01-27 09:50:39 +0100 | [diff] [blame] | 293 | |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 294 | #define HASHER() H5 |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 295 | #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */ |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 296 | #undef HASHER |
Zoltan Szabadka | dbb53e6 | 2016-01-27 09:50:39 +0100 | [diff] [blame] | 297 | |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 298 | #define HASHER() H6 |
Eugene Kliuchnikov | cdca91b | 2017-03-06 14:22:45 +0100 | [diff] [blame] | 299 | #include "./hash_longest_match64_inc.h" /* NOLINT(build/include) */ |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 300 | #undef HASHER |
Zoltan Szabadka | dbb53e6 | 2016-01-27 09:50:39 +0100 | [diff] [blame] | 301 | |
Eugene Kliuchnikov | 2048189 | 2016-07-26 14:41:59 +0200 | [diff] [blame] | 302 | #define BUCKET_BITS 15 |
| 303 | |
| 304 | #define NUM_LAST_DISTANCES_TO_CHECK 4 |
| 305 | #define NUM_BANKS 1 |
| 306 | #define BANK_BITS 16 |
| 307 | #define HASHER() H40 |
| 308 | #include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */ |
| 309 | #undef HASHER |
| 310 | #undef NUM_LAST_DISTANCES_TO_CHECK |
| 311 | |
| 312 | #define NUM_LAST_DISTANCES_TO_CHECK 10 |
| 313 | #define HASHER() H41 |
| 314 | #include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */ |
| 315 | #undef HASHER |
| 316 | #undef NUM_LAST_DISTANCES_TO_CHECK |
| 317 | #undef NUM_BANKS |
| 318 | #undef BANK_BITS |
| 319 | |
| 320 | #define NUM_LAST_DISTANCES_TO_CHECK 16 |
| 321 | #define NUM_BANKS 512 |
| 322 | #define BANK_BITS 9 |
| 323 | #define HASHER() H42 |
| 324 | #include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */ |
| 325 | #undef HASHER |
| 326 | #undef NUM_LAST_DISTANCES_TO_CHECK |
| 327 | #undef NUM_BANKS |
| 328 | #undef BANK_BITS |
| 329 | |
| 330 | #undef BUCKET_BITS |
| 331 | |
Eugene Kliuchnikov | 11df843 | 2017-02-06 14:20:43 +0100 | [diff] [blame] | 332 | #define HASHER() H54 |
| 333 | #define BUCKET_BITS 20 |
| 334 | #define BUCKET_SWEEP 4 |
| 335 | #define HASH_LEN 7 |
| 336 | #define USE_DICTIONARY 0 |
| 337 | #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */ |
| 338 | #undef USE_DICTIONARY |
| 339 | #undef HASH_LEN |
| 340 | #undef BUCKET_SWEEP |
| 341 | #undef BUCKET_BITS |
| 342 | #undef HASHER |
| 343 | |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 344 | #undef FN |
| 345 | #undef CAT |
| 346 | #undef EXPAND_CAT |
Zoltan Szabadka | dbb53e6 | 2016-01-27 09:50:39 +0100 | [diff] [blame] | 347 | |
Eugene Kliuchnikov | cdca91b | 2017-03-06 14:22:45 +0100 | [diff] [blame] | 348 | #define FOR_GENERIC_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(40) H(41) H(42) H(54) |
Eugene Kliuchnikov | 2048189 | 2016-07-26 14:41:59 +0200 | [diff] [blame] | 349 | #define FOR_ALL_HASHERS(H) FOR_GENERIC_HASHERS(H) H(10) |
| 350 | |
Eugene Kliuchnikov | cdca91b | 2017-03-06 14:22:45 +0100 | [diff] [blame] | 351 | static BROTLI_INLINE void DestroyHasher( |
| 352 | MemoryManager* m, HasherHandle* handle) { |
| 353 | if (*handle == NULL) return; |
| 354 | BROTLI_FREE(m, *handle); |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 355 | } |
| 356 | |
Eugene Kliuchnikov | cdca91b | 2017-03-06 14:22:45 +0100 | [diff] [blame] | 357 | static BROTLI_INLINE void HasherReset(HasherHandle handle) { |
| 358 | if (handle == NULL) return; |
| 359 | GetHasherCommon(handle)->is_prepared_ = BROTLI_FALSE; |
Eugene Kliuchnikov | 2048189 | 2016-07-26 14:41:59 +0200 | [diff] [blame] | 360 | } |
| 361 | |
Eugene Kliuchnikov | cdca91b | 2017-03-06 14:22:45 +0100 | [diff] [blame] | 362 | static BROTLI_INLINE size_t HasherSize(const BrotliEncoderParams* params, |
| 363 | BROTLI_BOOL one_shot, const size_t input_size) { |
| 364 | size_t result = sizeof(HasherCommon); |
| 365 | switch (params->hasher.type) { |
| 366 | #define SIZE_(N) \ |
| 367 | case N: \ |
| 368 | result += HashMemAllocInBytesH ## N(params, one_shot, input_size); \ |
| 369 | break; |
| 370 | FOR_ALL_HASHERS(SIZE_) |
| 371 | #undef SIZE_ |
| 372 | default: |
| 373 | break; |
Eugene Kliuchnikov | 2048189 | 2016-07-26 14:41:59 +0200 | [diff] [blame] | 374 | } |
Eugene Kliuchnikov | cdca91b | 2017-03-06 14:22:45 +0100 | [diff] [blame] | 375 | return result; |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 376 | } |
| 377 | |
Eugene Kliuchnikov | cdca91b | 2017-03-06 14:22:45 +0100 | [diff] [blame] | 378 | static BROTLI_INLINE void HasherSetup(MemoryManager* m, HasherHandle* handle, |
| 379 | BrotliEncoderParams* params, const uint8_t* data, size_t position, |
| 380 | size_t input_size, BROTLI_BOOL is_last) { |
| 381 | HasherHandle self = NULL; |
| 382 | HasherCommon* common = NULL; |
| 383 | BROTLI_BOOL one_shot = (position == 0 && is_last); |
| 384 | if (*handle == NULL) { |
| 385 | size_t alloc_size; |
| 386 | ChooseHasher(params, ¶ms->hasher); |
| 387 | alloc_size = HasherSize(params, one_shot, input_size); |
| 388 | self = BROTLI_ALLOC(m, uint8_t, alloc_size); |
| 389 | if (BROTLI_IS_OOM(m)) return; |
| 390 | *handle = self; |
| 391 | common = GetHasherCommon(self); |
| 392 | common->params = params->hasher; |
| 393 | switch (common->params.type) { |
| 394 | #define INITIALIZE_(N) \ |
| 395 | case N: \ |
| 396 | InitializeH ## N(*handle, params); \ |
| 397 | break; |
| 398 | FOR_ALL_HASHERS(INITIALIZE_); |
Eugene Kliuchnikov | 11df843 | 2017-02-06 14:20:43 +0100 | [diff] [blame] | 399 | #undef INITIALIZE_ |
Eugene Kliuchnikov | cdca91b | 2017-03-06 14:22:45 +0100 | [diff] [blame] | 400 | default: |
| 401 | break; |
| 402 | } |
| 403 | HasherReset(*handle); |
Eugene Kliuchnikov | 11df843 | 2017-02-06 14:20:43 +0100 | [diff] [blame] | 404 | } |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 405 | |
Eugene Kliuchnikov | cdca91b | 2017-03-06 14:22:45 +0100 | [diff] [blame] | 406 | self = *handle; |
| 407 | common = GetHasherCommon(self); |
| 408 | if (!common->is_prepared_) { |
| 409 | switch (common->params.type) { |
| 410 | #define PREPARE_(N) \ |
| 411 | case N: \ |
| 412 | PrepareH ## N(self, one_shot, input_size, data); \ |
| 413 | break; |
| 414 | FOR_ALL_HASHERS(PREPARE_) |
| 415 | #undef PREPARE_ |
| 416 | default: break; |
| 417 | } |
| 418 | if (position == 0) { |
| 419 | common->dict_num_lookups = 0; |
| 420 | common->dict_num_matches = 0; |
| 421 | } |
| 422 | common->is_prepared_ = BROTLI_TRUE; |
| 423 | } |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 424 | } |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 425 | |
Eugene Kliuchnikov | 11df843 | 2017-02-06 14:20:43 +0100 | [diff] [blame] | 426 | static BROTLI_INLINE void InitOrStitchToPreviousBlock( |
Eugene Kliuchnikov | cdca91b | 2017-03-06 14:22:45 +0100 | [diff] [blame] | 427 | MemoryManager* m, HasherHandle* handle, const uint8_t* data, size_t mask, |
| 428 | BrotliEncoderParams* params, size_t position, size_t input_size, |
| 429 | BROTLI_BOOL is_last) { |
| 430 | HasherHandle self; |
| 431 | HasherSetup(m, handle, params, data, position, input_size, is_last); |
| 432 | if (BROTLI_IS_OOM(m)) return; |
| 433 | self = *handle; |
| 434 | switch (GetHasherCommon(self)->params.type) { |
| 435 | #define INIT_(N) \ |
| 436 | case N: \ |
| 437 | StitchToPreviousBlockH ## N(self, input_size, position, data, mask); \ |
Eugene Kliuchnikov | 11df843 | 2017-02-06 14:20:43 +0100 | [diff] [blame] | 438 | break; |
| 439 | FOR_ALL_HASHERS(INIT_) |
| 440 | #undef INIT_ |
| 441 | default: break; |
| 442 | } |
| 443 | } |
| 444 | |
Eugene Kliuchnikov | b972c67 | 2016-06-13 11:01:04 +0200 | [diff] [blame] | 445 | #if defined(__cplusplus) || defined(c_plusplus) |
| 446 | } /* extern "C" */ |
| 447 | #endif |
Zoltan Szabadka | c66e4e3 | 2013-10-23 13:06:13 +0200 | [diff] [blame] | 448 | |
Eugene Kliuchnikov | 352b0b2 | 2016-06-03 11:19:23 +0200 | [diff] [blame] | 449 | #endif /* BROTLI_ENC_HASH_H_ */ |