Merge "Revert "Optimize register mask and stack mask in stack maps.""
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index b40ea37..c6e375a 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -48,6 +48,10 @@
         ArenaBitVector::Create(allocator_, num_dex_registers, true, kArenaAllocStackMapStream);
     current_entry_.dex_register_entry.live_dex_registers_mask->ClearAllBits();
   }
+  if (sp_mask != nullptr) {
+    stack_mask_max_ = std::max(stack_mask_max_, sp_mask->GetHighestBitSet());
+  }
+
   current_dex_register_ = 0;
 }
 
@@ -213,32 +217,11 @@
   PrepareMethodIndices();
 
   // Dedup stack masks. Needs to be done first as it modifies the stack map entry.
-  BitmapTableBuilder stack_mask_builder(allocator_);
-  for (StackMapEntry& stack_map : stack_maps_) {
-    BitVector* mask = stack_map.sp_mask;
-    size_t num_bits = (mask != nullptr) ? mask->GetNumberOfBits() : 0;
-    if (num_bits != 0) {
-      stack_map.stack_mask_index = stack_mask_builder.Dedup(mask->GetRawStorage(), num_bits);
-    } else {
-      stack_map.stack_mask_index = StackMap::kNoValue;
-    }
-  }
+  size_t stack_mask_bits = stack_mask_max_ + 1;  // Need room for max element too.
+  size_t num_stack_masks = PrepareStackMasks(stack_mask_bits);
 
   // Dedup register masks. Needs to be done first as it modifies the stack map entry.
-  BitTableBuilder<std::array<uint32_t, RegisterMask::kCount>> register_mask_builder(allocator_);
-  for (StackMapEntry& stack_map : stack_maps_) {
-    uint32_t register_mask = stack_map.register_mask;
-    if (register_mask != 0) {
-      uint32_t shift = LeastSignificantBit(register_mask);
-      std::array<uint32_t, RegisterMask::kCount> entry = {
-        register_mask >> shift,
-        shift,
-      };
-      stack_map.register_mask_index = register_mask_builder.Dedup(&entry);
-    } else {
-      stack_map.register_mask_index = StackMap::kNoValue;
-    }
-  }
+  size_t num_register_masks = PrepareRegisterMasks();
 
   // Write dex register maps.
   MemoryRegion dex_register_map_region =
@@ -318,8 +301,31 @@
   stack_map_builder.Encode(&out_, &bit_offset);
   invoke_info_builder.Encode(&out_, &bit_offset);
   inline_info_builder.Encode(&out_, &bit_offset);
+
+  // Write register masks table.
+  BitTableBuilder<uint32_t> register_mask_builder(allocator_);
+  for (size_t i = 0; i < num_register_masks; ++i) {
+    register_mask_builder.Add(register_masks_[i]);
+  }
   register_mask_builder.Encode(&out_, &bit_offset);
-  stack_mask_builder.Encode(&out_, &bit_offset);
+
+  // Write stack masks table.
+  EncodeVarintBits(&out_, &bit_offset, stack_mask_bits);
+  out_.resize(BitsToBytesRoundUp(bit_offset + stack_mask_bits * num_stack_masks));
+  BitMemoryRegion stack_mask_region(MemoryRegion(out_.data(), out_.size()),
+                                    bit_offset,
+                                    stack_mask_bits * num_stack_masks);
+  if (stack_mask_bits > 0) {
+    for (size_t i = 0; i < num_stack_masks; ++i) {
+      size_t stack_mask_bytes = BitsToBytesRoundUp(stack_mask_bits);
+      BitMemoryRegion src(MemoryRegion(&stack_masks_[i * stack_mask_bytes], stack_mask_bytes));
+      BitMemoryRegion dst = stack_mask_region.Subregion(i * stack_mask_bits, stack_mask_bits);
+      for (size_t bit_index = 0; bit_index < stack_mask_bits; bit_index += BitSizeOf<uint32_t>()) {
+        size_t num_bits = std::min<size_t>(stack_mask_bits - bit_index, BitSizeOf<uint32_t>());
+        dst.StoreBits(bit_index, src.LoadBits(bit_index, num_bits), num_bits);
+      }
+    }
+  }
 
   return UnsignedLeb128Size(out_.size()) +  out_.size();
 }
@@ -442,6 +448,17 @@
   }
 }
 
+size_t StackMapStream::PrepareRegisterMasks() {
+  register_masks_.resize(stack_maps_.size(), 0u);
+  ScopedArenaUnorderedMap<uint32_t, size_t> dedupe(allocator_->Adapter(kArenaAllocStackMapStream));
+  for (StackMapEntry& stack_map : stack_maps_) {
+    const size_t index = dedupe.size();
+    stack_map.register_mask_index = dedupe.emplace(stack_map.register_mask, index).first->second;
+    register_masks_[index] = stack_map.register_mask;
+  }
+  return dedupe.size();
+}
+
 void StackMapStream::PrepareMethodIndices() {
   CHECK(method_indices_.empty());
   method_indices_.resize(stack_maps_.size() + inline_infos_.size());
@@ -464,10 +481,35 @@
   method_indices_.resize(dedupe.size());
 }
 
+
+size_t StackMapStream::PrepareStackMasks(size_t entry_size_in_bits) {
+  // Preallocate memory since we do not want it to move (the dedup map will point into it).
+  const size_t byte_entry_size = RoundUp(entry_size_in_bits, kBitsPerByte) / kBitsPerByte;
+  stack_masks_.resize(byte_entry_size * stack_maps_.size(), 0u);
+  // For deduplicating we store the stack masks as byte packed for simplicity. We can bit pack later
+  // when copying out from stack_masks_.
+  ScopedArenaUnorderedMap<MemoryRegion,
+                          size_t,
+                          FNVHash<MemoryRegion>,
+                          MemoryRegion::ContentEquals> dedup(
+                              stack_maps_.size(), allocator_->Adapter(kArenaAllocStackMapStream));
+  for (StackMapEntry& stack_map : stack_maps_) {
+    size_t index = dedup.size();
+    MemoryRegion stack_mask(stack_masks_.data() + index * byte_entry_size, byte_entry_size);
+    BitMemoryRegion stack_mask_bits(stack_mask);
+    for (size_t i = 0; i < entry_size_in_bits; i++) {
+      stack_mask_bits.StoreBit(i, stack_map.sp_mask != nullptr && stack_map.sp_mask->IsBitSet(i));
+    }
+    stack_map.stack_mask_index = dedup.emplace(stack_mask, index).first->second;
+  }
+  return dedup.size();
+}
+
 // Check that all StackMapStream inputs are correctly encoded by trying to read them back.
 void StackMapStream::CheckCodeInfo(MemoryRegion region) const {
   CodeInfo code_info(region);
   DCHECK_EQ(code_info.GetNumberOfStackMaps(), stack_maps_.size());
+  DCHECK_EQ(code_info.GetNumberOfStackMaskBits(), static_cast<uint32_t>(stack_mask_max_ + 1));
   DCHECK_EQ(code_info.GetNumberOfLocationCatalogEntries(), location_catalog_entries_.size());
   size_t invoke_info_index = 0;
   for (size_t s = 0; s < stack_maps_.size(); ++s) {
@@ -480,15 +522,18 @@
     DCHECK_EQ(stack_map.GetDexPc(), entry.dex_pc);
     DCHECK_EQ(stack_map.GetRegisterMaskIndex(), entry.register_mask_index);
     DCHECK_EQ(code_info.GetRegisterMaskOf(stack_map), entry.register_mask);
+    const size_t num_stack_mask_bits = code_info.GetNumberOfStackMaskBits();
     DCHECK_EQ(stack_map.GetStackMaskIndex(), entry.stack_mask_index);
     BitMemoryRegion stack_mask = code_info.GetStackMaskOf(stack_map);
     if (entry.sp_mask != nullptr) {
       DCHECK_GE(stack_mask.size_in_bits(), entry.sp_mask->GetNumberOfBits());
-      for (size_t b = 0; b < stack_mask.size_in_bits(); b++) {
-        DCHECK_EQ(stack_mask.LoadBit(b), entry.sp_mask->IsBitSet(b)) << b;
+      for (size_t b = 0; b < num_stack_mask_bits; b++) {
+        DCHECK_EQ(stack_mask.LoadBit(b), entry.sp_mask->IsBitSet(b));
       }
     } else {
-      DCHECK_EQ(stack_mask.size_in_bits(), 0u);
+      for (size_t b = 0; b < num_stack_mask_bits; b++) {
+        DCHECK_EQ(stack_mask.LoadBit(b), 0u);
+      }
     }
     if (entry.dex_method_index != dex::kDexNoIndex) {
       InvokeInfo invoke_info = code_info.GetInvokeInfo(invoke_info_index);
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 19863d8..ea97cf6 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -68,8 +68,11 @@
         location_catalog_entries_indices_(allocator->Adapter(kArenaAllocStackMapStream)),
         dex_register_locations_(allocator->Adapter(kArenaAllocStackMapStream)),
         inline_infos_(allocator->Adapter(kArenaAllocStackMapStream)),
+        stack_masks_(allocator->Adapter(kArenaAllocStackMapStream)),
+        register_masks_(allocator->Adapter(kArenaAllocStackMapStream)),
         method_indices_(allocator->Adapter(kArenaAllocStackMapStream)),
         dex_register_entries_(allocator->Adapter(kArenaAllocStackMapStream)),
+        stack_mask_max_(-1),
         out_(allocator->Adapter(kArenaAllocStackMapStream)),
         dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(),
                                            allocator->Adapter(kArenaAllocStackMapStream)),
@@ -168,6 +171,12 @@
  private:
   size_t ComputeDexRegisterLocationCatalogSize() const;
 
+  // Returns the number of unique stack masks.
+  size_t PrepareStackMasks(size_t entry_size_in_bits);
+
+  // Returns the number of unique register masks.
+  size_t PrepareRegisterMasks();
+
   // Prepare and deduplicate method indices.
   void PrepareMethodIndices();
 
@@ -208,8 +217,11 @@
   // A set of concatenated maps of Dex register locations indices to `location_catalog_entries_`.
   ScopedArenaVector<size_t> dex_register_locations_;
   ScopedArenaVector<InlineInfoEntry> inline_infos_;
+  ScopedArenaVector<uint8_t> stack_masks_;
+  ScopedArenaVector<uint32_t> register_masks_;
   ScopedArenaVector<uint32_t> method_indices_;
   ScopedArenaVector<DexRegisterMapEntry> dex_register_entries_;
+  int stack_mask_max_;
 
   ScopedArenaVector<uint8_t> out_;
 
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index c372bb9..9db7588 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -32,10 +32,10 @@
     const StackMap& stack_map,
     const BitVector& bit_vector) {
   BitMemoryRegion stack_mask = code_info.GetStackMaskOf(stack_map);
-  if (bit_vector.GetNumberOfBits() > stack_mask.size_in_bits()) {
+  if (bit_vector.GetNumberOfBits() > code_info.GetNumberOfStackMaskBits()) {
     return false;
   }
-  for (size_t i = 0; i < stack_mask.size_in_bits(); ++i) {
+  for (size_t i = 0; i < code_info.GetNumberOfStackMaskBits(); ++i) {
     if (stack_mask.LoadBit(i) != bit_vector.IsBitSet(i)) {
       return false;
     }
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index b080f92..fcd6bfd 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -1731,7 +1731,7 @@
           // Stack masks
           stats_.AddBits(
               Stats::kByteKindCodeInfoStackMasks,
-              code_info.stack_masks_.DataBitSize());
+              code_info.stack_masks_.size_in_bits());
 
           // Register masks
           stats_.AddBits(
diff --git a/runtime/oat.h b/runtime/oat.h
index 8069a15..7b8f71a 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,8 +32,8 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  // Last oat version changed reason: Optimize masks in stack maps.
-  static constexpr uint8_t kOatVersion[] = { '1', '4', '5', '\0' };
+  // Last oat version changed reason: Refactor stackmap encoding.
+  static constexpr uint8_t kOatVersion[] = { '1', '4', '4', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 2648920..de613d3 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -439,7 +439,7 @@
           const uint8_t* addr = reinterpret_cast<const uint8_t*>(GetCurrentQuickFrame()) + offset;
           value = *reinterpret_cast<const uint32_t*>(addr);
           uint32_t bit = (offset >> 2);
-          if (bit < stack_mask.size_in_bits() && stack_mask.LoadBit(bit)) {
+          if (bit < code_info.GetNumberOfStackMaskBits() && stack_mask.LoadBit(bit)) {
             is_reference = true;
           }
           break;
diff --git a/runtime/stack_map.cc b/runtime/stack_map.cc
index fd0e28d..2b7e8dd 100644
--- a/runtime/stack_map.cc
+++ b/runtime/stack_map.cc
@@ -200,7 +200,7 @@
       << std::dec
       << ", stack_mask=0b";
   BitMemoryRegion stack_mask = code_info.GetStackMaskOf(*this);
-  for (size_t i = 0, e = stack_mask.size_in_bits(); i < e; ++i) {
+  for (size_t i = 0, e = code_info.GetNumberOfStackMaskBits(); i < e; ++i) {
     vios->Stream() << stack_mask.LoadBit(e - i - 1);
   }
   vios->Stream() << ")\n";
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index 02d8713..91cecf0 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -799,24 +799,6 @@
   }
 };
 
-// Register masks tend to have many tailing zero bits,
-// therefore it is worth encoding them as value+shift.
-class RegisterMask : public BitTable<2>::Accessor {
- public:
-  enum Field {
-    kValue,
-    kShift,
-    kCount,
-  };
-
-  RegisterMask(const BitTable<kCount>* table, uint32_t row)
-    : BitTable<kCount>::Accessor(table, row) {}
-
-  ALWAYS_INLINE uint32_t GetMask() const {
-    return Get<kValue>() << Get<kShift>();
-  }
-};
-
 /**
  * Wrapper around all compiler information collected for a method.
  * The information is of the form:
@@ -851,22 +833,24 @@
     return DexRegisterLocationCatalog(location_catalog_);
   }
 
+  ALWAYS_INLINE size_t GetNumberOfStackMaskBits() const {
+    return stack_mask_bits_;
+  }
+
   ALWAYS_INLINE StackMap GetStackMapAt(size_t index) const {
     return StackMap(&stack_maps_, index);
   }
 
   BitMemoryRegion GetStackMask(size_t index) const {
-    return stack_masks_.GetBitMemoryRegion(index);
+    return stack_masks_.Subregion(index * stack_mask_bits_, stack_mask_bits_);
   }
 
   BitMemoryRegion GetStackMaskOf(const StackMap& stack_map) const {
-    uint32_t index = stack_map.GetStackMaskIndex();
-    return (index == StackMap::kNoValue) ? BitMemoryRegion() : GetStackMask(index);
+    return GetStackMask(stack_map.GetStackMaskIndex());
   }
 
   uint32_t GetRegisterMaskOf(const StackMap& stack_map) const {
-    uint32_t index = stack_map.GetRegisterMaskIndex();
-    return (index == StackMap::kNoValue) ? 0 : RegisterMask(&register_masks_, index).GetMask();
+    return register_masks_.Get(stack_map.GetRegisterMaskIndex());
   }
 
   uint32_t GetNumberOfLocationCatalogEntries() const {
@@ -1061,8 +1045,8 @@
     invoke_infos_.Decode(bit_region, &bit_offset);
     inline_infos_.Decode(bit_region, &bit_offset);
     register_masks_.Decode(bit_region, &bit_offset);
-    stack_masks_.Decode(bit_region, &bit_offset);
-    CHECK_EQ(BitsToBytesRoundUp(bit_offset), non_header_size);
+    stack_mask_bits_ = DecodeVarintBits(bit_region, &bit_offset);
+    stack_masks_ = bit_region.Subregion(bit_offset, non_header_size * kBitsPerByte - bit_offset);
   }
 
   size_t size_;
@@ -1072,8 +1056,9 @@
   BitTable<StackMap::Field::kCount> stack_maps_;
   BitTable<InvokeInfo::Field::kCount> invoke_infos_;
   BitTable<InlineInfo::Field::kCount> inline_infos_;
-  BitTable<RegisterMask::Field::kCount> register_masks_;
-  BitTable<1> stack_masks_;
+  BitTable<1> register_masks_;
+  uint32_t stack_mask_bits_ = 0;
+  BitMemoryRegion stack_masks_;
 
   friend class OatDumper;
 };
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 2e737f5..129bae6 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -3568,8 +3568,9 @@
       T vreg_info(m, code_info, map, visitor_);
 
       // Visit stack entries that hold pointers.
+      const size_t number_of_bits = code_info.GetNumberOfStackMaskBits();
       BitMemoryRegion stack_mask = code_info.GetStackMaskOf(map);
-      for (size_t i = 0; i < stack_mask.size_in_bits(); ++i) {
+      for (size_t i = 0; i < number_of_bits; ++i) {
         if (stack_mask.LoadBit(i)) {
           StackReference<mirror::Object>* ref_addr = vreg_base + i;
           mirror::Object* ref = ref_addr->AsMirrorPtr();