Updates to Brotli compression format, decoder and encoder

This commit contains a batch of changes that were made to the Brotli
compression algorithm in the last month. Most important changes:

   * Format change: don't push distances representing static dictionary words to the distance cache.
   * Fix decoder invalid memory access bug caused by building a non-complete Huffman tree.
   * Add a mode parameter to the encoder interface.
   * Use different hashers for text and font mode.
   * Add a heuristics to the hasher for skipping non-compressible data.
   * Exhaustive search of static dictionary during backward reference search.
diff --git a/brotli/enc/encode.cc b/brotli/enc/encode.cc
index fa04834..4ae96bc 100644
--- a/brotli/enc/encode.cc
+++ b/brotli/enc/encode.cc
@@ -168,15 +168,6 @@
   }
 }
 
-template<int kSize>
-void EntropyEncode(int val, const EntropyCode<kSize>& code,
-                   int* storage_ix, uint8_t* storage) {
-  if (code.count_ <= 1) {
-    return;
-  };
-  WriteBits(code.depth_[val], code.bits_[val], storage_ix, storage);
-}
-
 void StoreHuffmanTreeOfHuffmanTreeToBitMask(
     const uint8_t* code_length_bitdepth,
     int* storage_ix, uint8_t* storage) {
@@ -225,7 +216,9 @@
   for (int i = 0; i < huffman_tree_size; ++i) {
     const int ix = huffman_tree[i];
     const int extra_bits = huffman_tree_extra_bits[i];
-    EntropyEncode(ix, entropy, storage_ix, storage);
+    if (entropy.count_ > 1) {
+      WriteBits(entropy.depth_[ix], entropy.bits_[ix], storage_ix, storage);
+    }
     switch (ix) {
       case 16:
         WriteBits(2, extra_bits, storage_ix, storage);
@@ -240,8 +233,7 @@
 template<int kSize>
 void StoreHuffmanCodeSimple(
     const EntropyCode<kSize>& code, int alphabet_size,
-    int max_bits,
-    int* storage_ix, uint8_t* storage) {
+    int max_bits, int* storage_ix, uint8_t* storage) {
   const uint8_t *depth = &code.depth_[0];
   int symbols[4];
   // Quadratic sort.
@@ -304,37 +296,66 @@
                             storage_ix, storage);
 }
 
-
 template<int kSize>
-void StoreHuffmanCode(const EntropyCode<kSize>& code, int alphabet_size,
-                      int* storage_ix, uint8_t* storage) {
+void BuildAndStoreEntropyCode(const Histogram<kSize>& histogram,
+                              const int tree_limit,
+                              const int alphabet_size,
+                              EntropyCode<kSize>* code,
+                              int* storage_ix, uint8_t* storage) {
+  memset(code->depth_, 0, sizeof(code->depth_));
+  memset(code->bits_, 0, sizeof(code->bits_));
+  memset(code->symbols_, 0, sizeof(code->symbols_));
+  code->count_ = 0;
+
   int max_bits_counter = alphabet_size - 1;
   int max_bits = 0;
   while (max_bits_counter) {
     max_bits_counter >>= 1;
     ++max_bits;
   }
-  if (code.count_ == 0) {
-    // Emit a minimal tree for empty cases.
-    // bits: small tree marker: 1, count-1: 0, max_bits-sized encoding for 0
-    WriteBits(4 + max_bits, 0x1, storage_ix, storage);
-  } else if (code.count_ <= 4) {
-    StoreHuffmanCodeSimple(
-        code, alphabet_size, max_bits,
-        storage_ix, storage);
+
+  for (size_t i = 0; i < alphabet_size; i++) {
+    if (histogram.data_[i] > 0) {
+      if (code->count_ < 4) code->symbols_[code->count_] = i;
+      ++code->count_;
+    }
+  }
+
+  if (code->count_ <= 1) {
+    WriteBits(2, 1, storage_ix, storage);
+    WriteBits(2, 0, storage_ix, storage);
+    WriteBits(max_bits, code->symbols_[0], storage_ix, storage);
+    return;
+  }
+
+  if (alphabet_size >= 50 && code->count_ >= 16) {
+    std::vector<int> counts(alphabet_size);
+    memcpy(&counts[0], histogram.data_, sizeof(counts[0]) * alphabet_size);
+    OptimizeHuffmanCountsForRle(alphabet_size, &counts[0]);
+    CreateHuffmanTree(&counts[0], alphabet_size, tree_limit, code->depth_);
   } else {
-    StoreHuffmanCodeComplex(
-        code, alphabet_size,
-        storage_ix, storage);
+    CreateHuffmanTree(histogram.data_, alphabet_size, tree_limit, code->depth_);
+  }
+  ConvertBitDepthsToSymbols(code->depth_, alphabet_size, code->bits_);
+
+  if (code->count_ <= 4) {
+    StoreHuffmanCodeSimple(*code, alphabet_size, max_bits, storage_ix, storage);
+  } else {
+    StoreHuffmanCodeComplex(*code, alphabet_size, storage_ix, storage);
   }
 }
 
 template<int kSize>
-void StoreHuffmanCodes(const std::vector<EntropyCode<kSize> >& codes,
-                       int alphabet_size,
-                       int* storage_ix, uint8_t* storage) {
-  for (int i = 0; i < codes.size(); ++i) {
-    StoreHuffmanCode(codes[i], alphabet_size, storage_ix, storage);
+void BuildAndStoreEntropyCodes(
+    const std::vector<Histogram<kSize> >& histograms,
+    int alphabet_size,
+    std::vector<EntropyCode<kSize> >* entropy_codes,
+    int* storage_ix, uint8_t* storage) {
+  entropy_codes->resize(histograms.size());
+  for (int i = 0; i < histograms.size(); ++i) {
+    BuildAndStoreEntropyCode(histograms[i], 15, alphabet_size,
+                             &(*entropy_codes)[i],
+                             storage_ix, storage);
   }
 }
 
@@ -342,7 +363,7 @@
                    const EntropyCodeCommand& entropy,
                    int* storage_ix, uint8_t* storage) {
   int code = cmd.command_prefix_;
-  EntropyEncode(code, entropy, storage_ix, storage);
+  WriteBits(entropy.depth_[code], entropy.bits_[code], storage_ix, storage);
   if (code >= 128) {
     code -= 128;
   }
@@ -364,13 +385,15 @@
   int code = cmd.distance_prefix_;
   int extra_bits = cmd.distance_extra_bits_;
   uint64_t extra_bits_val = cmd.distance_extra_bits_value_;
-  EntropyEncode(code, entropy, storage_ix, storage);
+  WriteBits(entropy.depth_[code], entropy.bits_[code], storage_ix, storage);
   if (extra_bits > 0) {
     WriteBits(extra_bits, extra_bits_val, storage_ix, storage);
   }
 }
 
 void ComputeDistanceShortCodes(std::vector<Command>* cmds,
+                               size_t pos,
+                               const size_t max_backward,
                                int* dist_ringbuffer,
                                size_t* ringbuffer_idx) {
   static const int kIndexOffset[16] = {
@@ -380,30 +403,40 @@
     0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
   };
   for (int i = 0; i < cmds->size(); ++i) {
+    pos += (*cmds)[i].insert_length_;
+    size_t max_distance = std::min(pos, max_backward);
     int cur_dist = (*cmds)[i].copy_distance_;
-    if (cur_dist == 0) break;
     int dist_code = cur_dist + 16;
-    int limits[16] = { 0, 4, 10, 11,
-                       6, 6, 11, 11,
-                       11, 11, 11, 11,
-                       12, 12, 12, 12 };
-    for (int k = 0; k < 16; ++k) {
-      // Only accept more popular choices.
-      if (cur_dist < limits[k]) {
-        // Typically unpopular ranges, don't replace a short distance
-        // with them.
-        continue;
+    if (cur_dist <= max_distance) {
+      if (cur_dist == 0) break;
+      int limits[16] = { 0, 0, 0, 0,
+                         6, 6, 11, 11,
+                         11, 11, 11, 11,
+                         12, 12, 12, 12 };
+      for (int k = 0; k < 16; ++k) {
+        // Only accept more popular choices.
+        if (cur_dist < limits[k]) {
+          // Typically unpopular ranges, don't replace a short distance
+          // with them.
+          continue;
+        }
+        int comp = (dist_ringbuffer[(*ringbuffer_idx + kIndexOffset[k]) & 3] +
+                    kValueOffset[k]);
+        if (cur_dist == comp) {
+          dist_code = k + 1;
+          break;
+        }
       }
-      int comp = (dist_ringbuffer[(*ringbuffer_idx + kIndexOffset[k]) & 3] +
-                  kValueOffset[k]);
-      if (cur_dist == comp) {
-        dist_code = k + 1;
-        break;
+      if (dist_code > 1) {
+        dist_ringbuffer[*ringbuffer_idx & 3] = cur_dist;
+        ++(*ringbuffer_idx);
       }
-    }
-    if (dist_code > 1) {
-      dist_ringbuffer[*ringbuffer_idx & 3] = cur_dist;
-      ++(*ringbuffer_idx);
+      pos += (*cmds)[i].copy_length_;
+    } else {
+      int word_idx = cur_dist - max_distance - 1;
+      const std::string word =
+          GetTransformedDictionaryWord((*cmds)[i].copy_length_code_, word_idx);
+      pos += word.size();
     }
     (*cmds)[i].distance_code_ = dist_code;
   }
@@ -558,18 +591,20 @@
   for (int i = 0; i < rle_symbols.size(); ++i) {
     symbol_histogram.Add(rle_symbols[i]);
   }
-  EntropyCodeContextMap symbol_code;
-  BuildEntropyCode(symbol_histogram, 15, num_clusters + max_run_length_prefix,
-                   &symbol_code);
   bool use_rle = max_run_length_prefix > 0;
   WriteBits(1, use_rle, storage_ix, storage);
   if (use_rle) {
     WriteBits(4, max_run_length_prefix - 1, storage_ix, storage);
   }
-  StoreHuffmanCode(symbol_code, num_clusters + max_run_length_prefix,
-                   storage_ix, storage);
+  EntropyCodeContextMap symbol_code;
+  BuildAndStoreEntropyCode(symbol_histogram, 15,
+                           num_clusters + max_run_length_prefix,
+                           &symbol_code,
+                           storage_ix, storage);
   for (int i = 0; i < rle_symbols.size(); ++i) {
-    EntropyEncode(rle_symbols[i], symbol_code, storage_ix, storage);
+    WriteBits(symbol_code.depth_[rle_symbols[i]],
+              symbol_code.bits_[rle_symbols[i]],
+              storage_ix, storage);
     if (rle_symbols[i] > 0 && rle_symbols[i] <= max_run_length_prefix) {
       WriteBits(rle_symbols[i], extra_bits[i], storage_ix, storage);
     }
@@ -577,16 +612,6 @@
   WriteBits(1, 1, storage_ix, storage);  // use move-to-front
 }
 
-template<int kSize>
-void BuildEntropyCodes(const std::vector<Histogram<kSize> >& histograms,
-                       int alphabet_size,
-                       std::vector<EntropyCode<kSize> >* entropy_codes) {
-  entropy_codes->resize(histograms.size());
-  for (int i = 0; i < histograms.size(); ++i) {
-    BuildEntropyCode(histograms[i], 15, alphabet_size, &(*entropy_codes)[i]);
-  }
-}
-
 struct BlockSplitCode {
   EntropyCodeBlockType block_type_code;
   EntropyCodeBlockLength block_len_code;
@@ -598,8 +623,8 @@
   int len_code = BlockLengthPrefix(length);
   int extra_bits = BlockLengthExtraBits(len_code);
   int extra_bits_value = length - BlockLengthOffset(len_code);
-  EntropyEncode(len_code, entropy, storage_ix, storage);
-
+  WriteBits(entropy.depth_[len_code], entropy.bits_[len_code],
+            storage_ix, storage);
   if (extra_bits > 0) {
     WriteBits(extra_bits, extra_bits_value, storage_ix, storage);
   }
@@ -632,26 +657,25 @@
                                   BlockSplitCode* code,
                                   int* storage_ix, uint8_t* storage) {
   EncodeVarLenUint8(split.num_types_ - 1, storage_ix, storage);
+
   if (split.num_types_ == 1) {
     return;
   }
 
   HistogramBlockType type_histo;
-  for (int i = 0; i < split.type_codes_.size(); ++i) {
+  for (int i = 1; i < split.type_codes_.size(); ++i) {
     type_histo.Add(split.type_codes_[i]);
   }
-  BuildEntropyCode(type_histo, 15, split.num_types_ + 2,
-                   &code->block_type_code);
   HistogramBlockLength length_histo;
   for (int i = 0; i < split.lengths_.size(); ++i) {
     length_histo.Add(BlockLengthPrefix(split.lengths_[i]));
   }
-  BuildEntropyCode(length_histo, 15, kNumBlockLenPrefixes,
-                   &code->block_len_code);
-  StoreHuffmanCode(code->block_type_code, split.num_types_ + 2,
-                   storage_ix, storage);
-  StoreHuffmanCode(code->block_len_code, kNumBlockLenPrefixes,
-                   storage_ix, storage);
+  BuildAndStoreEntropyCode(type_histo, 15, split.num_types_ + 2,
+                           &code->block_type_code,
+                           storage_ix, storage);
+  BuildAndStoreEntropyCode(length_histo, 15, kNumBlockLenPrefixes,
+                           &code->block_len_code,
+                           storage_ix, storage);
   EncodeBlockLength(code->block_len_code, split.lengths_[0],
                     storage_ix, storage);
 }
@@ -664,7 +688,9 @@
     it->type_ = it->split_.types_[it->idx_];
     it->length_ = it->split_.lengths_[it->idx_];
     int type_code = it->split_.type_codes_[it->idx_];
-    EntropyEncode(type_code, code.block_type_code, storage_ix, storage);
+    WriteBits(code.block_type_code.depth_[type_code],
+              code.block_type_code.bits_[type_code],
+              storage_ix, storage);
     EncodeBlockLength(code.block_len_code, it->length_, storage_ix, storage);
   }
   --it->length_;
@@ -773,10 +799,8 @@
                     int* storage_ix, uint8_t* storage) {
   size_t length = MetaBlockLength(mb.cmds);
   const size_t end_pos = *pos + length;
-  EncodeMetaBlockLength(length,
-                        is_last,
-                        false,
-                        storage_ix, storage);
+  EncodeMetaBlockLength(length, is_last, false, storage_ix, storage);
+
   if (length == 0) {
     return;
   }
@@ -792,26 +816,27 @@
   WriteBits(2, mb.params.distance_postfix_bits, storage_ix, storage);
   WriteBits(4,
             mb.params.num_direct_distance_codes >>
-            mb.params.distance_postfix_bits, storage_ix, storage);
+            mb.params.distance_postfix_bits,
+            storage_ix, storage);
   int num_distance_codes =
       kNumDistanceShortCodes + mb.params.num_direct_distance_codes +
       (48 << mb.params.distance_postfix_bits);
   for (int i = 0; i < mb.literal_split.num_types_; ++i) {
     WriteBits(2, mb.literal_context_modes[i], storage_ix, storage);
   }
-  EncodeContextMap(mb.literal_context_map, mb.literal_histograms.size(), storage_ix, storage);
-  EncodeContextMap(mb.distance_context_map, mb.distance_histograms.size(), storage_ix, storage);
+  EncodeContextMap(mb.literal_context_map, mb.literal_histograms.size(),
+                   storage_ix, storage);
+  EncodeContextMap(mb.distance_context_map, mb.distance_histograms.size(),
+                   storage_ix, storage);
   std::vector<EntropyCodeLiteral> literal_codes;
   std::vector<EntropyCodeCommand> command_codes;
   std::vector<EntropyCodeDistance> distance_codes;
-  BuildEntropyCodes(mb.literal_histograms, 256, &literal_codes);
-  BuildEntropyCodes(mb.command_histograms, kNumCommandPrefixes,
-                    &command_codes);
-  BuildEntropyCodes(mb.distance_histograms, num_distance_codes,
-                    &distance_codes);
-  StoreHuffmanCodes(literal_codes, 256, storage_ix, storage);
-  StoreHuffmanCodes(command_codes, kNumCommandPrefixes, storage_ix, storage);
-  StoreHuffmanCodes(distance_codes, num_distance_codes, storage_ix, storage);
+  BuildAndStoreEntropyCodes(mb.literal_histograms, 256, &literal_codes,
+                            storage_ix, storage);
+  BuildAndStoreEntropyCodes(mb.command_histograms, kNumCommandPrefixes,
+                            &command_codes, storage_ix, storage);
+  BuildAndStoreEntropyCodes(mb.distance_histograms, num_distance_codes,
+                            &distance_codes, storage_ix, storage);
   BlockSplitIterator literal_it(mb.literal_split);
   BlockSplitIterator command_it(mb.command_split);
   BlockSplitIterator distance_it(mb.distance_split);
@@ -828,8 +853,10 @@
                      Context(prev_byte, prev_byte2,
                              mb.literal_context_modes[literal_it.type_]));
       histogram_idx = mb.literal_context_map[context];
-      EntropyEncode(ringbuffer[*pos & mask],
-                    literal_codes[histogram_idx], storage_ix, storage);
+      int literal = ringbuffer[*pos & mask];
+      WriteBits(literal_codes[histogram_idx].depth_[literal],
+                literal_codes[histogram_idx].bits_[literal],
+                storage_ix, storage);
       ++(*pos);
     }
     if (*pos < end_pos && cmd.distance_prefix_ != 0xffff) {
@@ -845,9 +872,10 @@
   }
 }
 
-BrotliCompressor::BrotliCompressor()
-    : window_bits_(kWindowBits),
-      hasher_(new Hasher),
+BrotliCompressor::BrotliCompressor(BrotliParams params)
+    : params_(params),
+      window_bits_(kWindowBits),
+      hashers_(new Hashers()),
       dist_ringbuffer_idx_(0),
       input_pos_(0),
       ringbuffer_(kRingBufferBits, kMetaBlockSizeBits),
@@ -859,28 +887,41 @@
   dist_ringbuffer_[2] = 11;
   dist_ringbuffer_[3] = 4;
   storage_[0] = 0;
-  StoreDictionaryWordHashes();
+  switch (params.mode) {
+    case BrotliParams::MODE_TEXT: hash_type_ = Hashers::HASH_15_8_4; break;
+    case BrotliParams::MODE_FONT: hash_type_ = Hashers::HASH_15_8_2; break;
+    default: break;
+  }
+  hashers_->Init(hash_type_);
+  if (params.mode == BrotliParams::MODE_TEXT) {
+    StoreDictionaryWordHashes();
+  }
 }
 
 BrotliCompressor::~BrotliCompressor() {
-  delete hasher_;
   delete[] storage_;
 }
 
+StaticDictionary *BrotliCompressor::static_dictionary_ = NULL;
+
 void BrotliCompressor::StoreDictionaryWordHashes() {
-  for (int t = kNumTransforms - 1; t >= 0; --t) {
-    for (int i = kMaxDictionaryWordLength; i >= 3; --i) {
-      const int num_words = 1 << kBrotliDictionarySizeBitsByLength[i];
-      for (int j = num_words - 1; j >= 0; --j) {
-        int word_id = t * num_words + j;
-        std::string word = GetTransformedDictionaryWord(i, word_id);
-        if (word.size() >= 3) {
-          hasher_->Store(reinterpret_cast<const uint8_t*>(&word[0]),
-                         (-1) * ((i << 20) + word_id + 1));
+  const int num_transforms = kNumTransforms;
+  if (static_dictionary_ == NULL) {
+    static_dictionary_ = new StaticDictionary;
+    for (int t = num_transforms - 1; t >= 0; --t) {
+      for (int i = kMaxDictionaryWordLength; i >= 3; --i) {
+        const int num_words = 1 << kBrotliDictionarySizeBitsByLength[i];
+        for (int j = num_words - 1; j >= 0; --j) {
+          int word_id = t * num_words + j;
+          std::string word = GetTransformedDictionaryWord(i, word_id);
+          if (word.size() >= 3) {
+            static_dictionary_->Insert(word, i, word_id);
+          }
         }
       }
     }
   }
+  hashers_->SetStaticDictionary(static_dictionary_);
 }
 
 void BrotliCompressor::WriteStreamHeader() {
@@ -908,25 +949,30 @@
       input_size, kMinUTF8Ratio);
     if (utf8_mode) {
       EstimateBitCostsForLiteralsUTF8(input_pos_, input_size,
-                                      kRingBufferMask, ringbuffer_.start(),
-                                      &literal_cost_[0]);
+                                      kRingBufferMask, kRingBufferMask,
+                                      ringbuffer_.start(), &literal_cost_[0]);
     } else {
       EstimateBitCostsForLiterals(input_pos_, input_size,
-                                  kRingBufferMask, ringbuffer_.start(),
-                                  &literal_cost_[0]);
+                                  kRingBufferMask, kRingBufferMask,
+                                  ringbuffer_.start(), &literal_cost_[0]);
     }
-    CreateBackwardReferences(input_size, input_pos_,
-                             ringbuffer_.start(),
-                             &literal_cost_[0],
-                             kRingBufferMask, kMaxBackwardDistance,
-                             hasher_,
-                             &commands);
-    ComputeDistanceShortCodes(&commands, dist_ringbuffer_,
+    CreateBackwardReferences(
+        input_size, input_pos_,
+        ringbuffer_.start(),
+        &literal_cost_[0],
+        kRingBufferMask, kMaxBackwardDistance,
+        hashers_.get(),
+        hash_type_,
+        &commands);
+    ComputeDistanceShortCodes(&commands, input_pos_, kMaxBackwardDistance,
+                              dist_ringbuffer_,
                               &dist_ringbuffer_idx_);
   }
   EncodingParams params;
-  params.num_direct_distance_codes = 12;
-  params.distance_postfix_bits = 1;
+  params.num_direct_distance_codes =
+      params_.mode == BrotliParams::MODE_FONT ? 12 : 0;
+  params.distance_postfix_bits =
+      params_.mode == BrotliParams::MODE_FONT ? 1 : 0;
   params.literal_context_mode = CONTEXT_SIGNED;
   const int storage_ix0 = storage_ix_;
   MetaBlock mb;
@@ -935,6 +981,7 @@
   StoreMetaBlock(mb, is_last, ringbuffer_.start(), kRingBufferMask,
                  &input_pos_, &storage_ix_, storage_);
   size_t output_size = is_last ? ((storage_ix_ + 7) >> 3) : (storage_ix_ >> 3);
+  output_size -= (storage_ix0 >> 3);
   if (input_size + 4 < output_size) {
     storage_ix_ = storage_ix0;
     storage_[storage_ix_ >> 3] &= (1 << (storage_ix_ & 7)) - 1;
@@ -968,7 +1015,8 @@
 }
 
 
-int BrotliCompressBuffer(size_t input_size,
+int BrotliCompressBuffer(BrotliParams params,
+                         size_t input_size,
                          const uint8_t* input_buffer,
                          size_t* encoded_size,
                          uint8_t* encoded_buffer) {
@@ -978,7 +1026,7 @@
     return 1;
   }
 
-  BrotliCompressor compressor;
+  BrotliCompressor compressor(params);
   compressor.WriteStreamHeader();
 
   const int max_block_size = 1 << kMetaBlockSizeBits;