Merge "Revert "Optimize register mask and stack mask in stack maps.""
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index b40ea37..c6e375a 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -48,6 +48,10 @@
ArenaBitVector::Create(allocator_, num_dex_registers, true, kArenaAllocStackMapStream);
current_entry_.dex_register_entry.live_dex_registers_mask->ClearAllBits();
}
+ if (sp_mask != nullptr) {
+ stack_mask_max_ = std::max(stack_mask_max_, sp_mask->GetHighestBitSet());
+ }
+
current_dex_register_ = 0;
}
@@ -213,32 +217,11 @@
PrepareMethodIndices();
// Dedup stack masks. Needs to be done first as it modifies the stack map entry.
- BitmapTableBuilder stack_mask_builder(allocator_);
- for (StackMapEntry& stack_map : stack_maps_) {
- BitVector* mask = stack_map.sp_mask;
- size_t num_bits = (mask != nullptr) ? mask->GetNumberOfBits() : 0;
- if (num_bits != 0) {
- stack_map.stack_mask_index = stack_mask_builder.Dedup(mask->GetRawStorage(), num_bits);
- } else {
- stack_map.stack_mask_index = StackMap::kNoValue;
- }
- }
+ size_t stack_mask_bits = stack_mask_max_ + 1; // Need room for max element too.
+ size_t num_stack_masks = PrepareStackMasks(stack_mask_bits);
// Dedup register masks. Needs to be done first as it modifies the stack map entry.
- BitTableBuilder<std::array<uint32_t, RegisterMask::kCount>> register_mask_builder(allocator_);
- for (StackMapEntry& stack_map : stack_maps_) {
- uint32_t register_mask = stack_map.register_mask;
- if (register_mask != 0) {
- uint32_t shift = LeastSignificantBit(register_mask);
- std::array<uint32_t, RegisterMask::kCount> entry = {
- register_mask >> shift,
- shift,
- };
- stack_map.register_mask_index = register_mask_builder.Dedup(&entry);
- } else {
- stack_map.register_mask_index = StackMap::kNoValue;
- }
- }
+ size_t num_register_masks = PrepareRegisterMasks();
// Write dex register maps.
MemoryRegion dex_register_map_region =
@@ -318,8 +301,31 @@
stack_map_builder.Encode(&out_, &bit_offset);
invoke_info_builder.Encode(&out_, &bit_offset);
inline_info_builder.Encode(&out_, &bit_offset);
+
+ // Write register masks table.
+ BitTableBuilder<uint32_t> register_mask_builder(allocator_);
+ for (size_t i = 0; i < num_register_masks; ++i) {
+ register_mask_builder.Add(register_masks_[i]);
+ }
register_mask_builder.Encode(&out_, &bit_offset);
- stack_mask_builder.Encode(&out_, &bit_offset);
+
+ // Write stack masks table.
+ EncodeVarintBits(&out_, &bit_offset, stack_mask_bits);
+ out_.resize(BitsToBytesRoundUp(bit_offset + stack_mask_bits * num_stack_masks));
+ BitMemoryRegion stack_mask_region(MemoryRegion(out_.data(), out_.size()),
+ bit_offset,
+ stack_mask_bits * num_stack_masks);
+ if (stack_mask_bits > 0) {
+ for (size_t i = 0; i < num_stack_masks; ++i) {
+ size_t stack_mask_bytes = BitsToBytesRoundUp(stack_mask_bits);
+ BitMemoryRegion src(MemoryRegion(&stack_masks_[i * stack_mask_bytes], stack_mask_bytes));
+ BitMemoryRegion dst = stack_mask_region.Subregion(i * stack_mask_bits, stack_mask_bits);
+ for (size_t bit_index = 0; bit_index < stack_mask_bits; bit_index += BitSizeOf<uint32_t>()) {
+ size_t num_bits = std::min<size_t>(stack_mask_bits - bit_index, BitSizeOf<uint32_t>());
+ dst.StoreBits(bit_index, src.LoadBits(bit_index, num_bits), num_bits);
+ }
+ }
+ }
return UnsignedLeb128Size(out_.size()) + out_.size();
}
@@ -442,6 +448,17 @@
}
}
+size_t StackMapStream::PrepareRegisterMasks() {
+ register_masks_.resize(stack_maps_.size(), 0u);
+ ScopedArenaUnorderedMap<uint32_t, size_t> dedupe(allocator_->Adapter(kArenaAllocStackMapStream));
+ for (StackMapEntry& stack_map : stack_maps_) {
+ const size_t index = dedupe.size();
+ stack_map.register_mask_index = dedupe.emplace(stack_map.register_mask, index).first->second;
+ register_masks_[index] = stack_map.register_mask;
+ }
+ return dedupe.size();
+}
+
void StackMapStream::PrepareMethodIndices() {
CHECK(method_indices_.empty());
method_indices_.resize(stack_maps_.size() + inline_infos_.size());
@@ -464,10 +481,35 @@
method_indices_.resize(dedupe.size());
}
+
+size_t StackMapStream::PrepareStackMasks(size_t entry_size_in_bits) {
+ // Preallocate memory since we do not want it to move (the dedup map will point into it).
+ const size_t byte_entry_size = RoundUp(entry_size_in_bits, kBitsPerByte) / kBitsPerByte;
+ stack_masks_.resize(byte_entry_size * stack_maps_.size(), 0u);
+ // For deduplicating we store the stack masks as byte packed for simplicity. We can bit pack later
+ // when copying out from stack_masks_.
+ ScopedArenaUnorderedMap<MemoryRegion,
+ size_t,
+ FNVHash<MemoryRegion>,
+ MemoryRegion::ContentEquals> dedup(
+ stack_maps_.size(), allocator_->Adapter(kArenaAllocStackMapStream));
+ for (StackMapEntry& stack_map : stack_maps_) {
+ size_t index = dedup.size();
+ MemoryRegion stack_mask(stack_masks_.data() + index * byte_entry_size, byte_entry_size);
+ BitMemoryRegion stack_mask_bits(stack_mask);
+ for (size_t i = 0; i < entry_size_in_bits; i++) {
+ stack_mask_bits.StoreBit(i, stack_map.sp_mask != nullptr && stack_map.sp_mask->IsBitSet(i));
+ }
+ stack_map.stack_mask_index = dedup.emplace(stack_mask, index).first->second;
+ }
+ return dedup.size();
+}
+
// Check that all StackMapStream inputs are correctly encoded by trying to read them back.
void StackMapStream::CheckCodeInfo(MemoryRegion region) const {
CodeInfo code_info(region);
DCHECK_EQ(code_info.GetNumberOfStackMaps(), stack_maps_.size());
+ DCHECK_EQ(code_info.GetNumberOfStackMaskBits(), static_cast<uint32_t>(stack_mask_max_ + 1));
DCHECK_EQ(code_info.GetNumberOfLocationCatalogEntries(), location_catalog_entries_.size());
size_t invoke_info_index = 0;
for (size_t s = 0; s < stack_maps_.size(); ++s) {
@@ -480,15 +522,18 @@
DCHECK_EQ(stack_map.GetDexPc(), entry.dex_pc);
DCHECK_EQ(stack_map.GetRegisterMaskIndex(), entry.register_mask_index);
DCHECK_EQ(code_info.GetRegisterMaskOf(stack_map), entry.register_mask);
+ const size_t num_stack_mask_bits = code_info.GetNumberOfStackMaskBits();
DCHECK_EQ(stack_map.GetStackMaskIndex(), entry.stack_mask_index);
BitMemoryRegion stack_mask = code_info.GetStackMaskOf(stack_map);
if (entry.sp_mask != nullptr) {
DCHECK_GE(stack_mask.size_in_bits(), entry.sp_mask->GetNumberOfBits());
- for (size_t b = 0; b < stack_mask.size_in_bits(); b++) {
- DCHECK_EQ(stack_mask.LoadBit(b), entry.sp_mask->IsBitSet(b)) << b;
+ for (size_t b = 0; b < num_stack_mask_bits; b++) {
+ DCHECK_EQ(stack_mask.LoadBit(b), entry.sp_mask->IsBitSet(b));
}
} else {
- DCHECK_EQ(stack_mask.size_in_bits(), 0u);
+ for (size_t b = 0; b < num_stack_mask_bits; b++) {
+ DCHECK_EQ(stack_mask.LoadBit(b), 0u);
+ }
}
if (entry.dex_method_index != dex::kDexNoIndex) {
InvokeInfo invoke_info = code_info.GetInvokeInfo(invoke_info_index);
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 19863d8..ea97cf6 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -68,8 +68,11 @@
location_catalog_entries_indices_(allocator->Adapter(kArenaAllocStackMapStream)),
dex_register_locations_(allocator->Adapter(kArenaAllocStackMapStream)),
inline_infos_(allocator->Adapter(kArenaAllocStackMapStream)),
+ stack_masks_(allocator->Adapter(kArenaAllocStackMapStream)),
+ register_masks_(allocator->Adapter(kArenaAllocStackMapStream)),
method_indices_(allocator->Adapter(kArenaAllocStackMapStream)),
dex_register_entries_(allocator->Adapter(kArenaAllocStackMapStream)),
+ stack_mask_max_(-1),
out_(allocator->Adapter(kArenaAllocStackMapStream)),
dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(),
allocator->Adapter(kArenaAllocStackMapStream)),
@@ -168,6 +171,12 @@
private:
size_t ComputeDexRegisterLocationCatalogSize() const;
+ // Returns the number of unique stack masks.
+ size_t PrepareStackMasks(size_t entry_size_in_bits);
+
+ // Returns the number of unique register masks.
+ size_t PrepareRegisterMasks();
+
// Prepare and deduplicate method indices.
void PrepareMethodIndices();
@@ -208,8 +217,11 @@
// A set of concatenated maps of Dex register locations indices to `location_catalog_entries_`.
ScopedArenaVector<size_t> dex_register_locations_;
ScopedArenaVector<InlineInfoEntry> inline_infos_;
+ ScopedArenaVector<uint8_t> stack_masks_;
+ ScopedArenaVector<uint32_t> register_masks_;
ScopedArenaVector<uint32_t> method_indices_;
ScopedArenaVector<DexRegisterMapEntry> dex_register_entries_;
+ int stack_mask_max_;
ScopedArenaVector<uint8_t> out_;
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index c372bb9..9db7588 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -32,10 +32,10 @@
const StackMap& stack_map,
const BitVector& bit_vector) {
BitMemoryRegion stack_mask = code_info.GetStackMaskOf(stack_map);
- if (bit_vector.GetNumberOfBits() > stack_mask.size_in_bits()) {
+ if (bit_vector.GetNumberOfBits() > code_info.GetNumberOfStackMaskBits()) {
return false;
}
- for (size_t i = 0; i < stack_mask.size_in_bits(); ++i) {
+ for (size_t i = 0; i < code_info.GetNumberOfStackMaskBits(); ++i) {
if (stack_mask.LoadBit(i) != bit_vector.IsBitSet(i)) {
return false;
}
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index b080f92..fcd6bfd 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -1731,7 +1731,7 @@
// Stack masks
stats_.AddBits(
Stats::kByteKindCodeInfoStackMasks,
- code_info.stack_masks_.DataBitSize());
+ code_info.stack_masks_.size_in_bits());
// Register masks
stats_.AddBits(
diff --git a/runtime/oat.h b/runtime/oat.h
index 8069a15..7b8f71a 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,8 +32,8 @@
class PACKED(4) OatHeader {
public:
static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
- // Last oat version changed reason: Optimize masks in stack maps.
- static constexpr uint8_t kOatVersion[] = { '1', '4', '5', '\0' };
+ // Last oat version changed reason: Refactor stackmap encoding.
+ static constexpr uint8_t kOatVersion[] = { '1', '4', '4', '\0' };
static constexpr const char* kImageLocationKey = "image-location";
static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 2648920..de613d3 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -439,7 +439,7 @@
const uint8_t* addr = reinterpret_cast<const uint8_t*>(GetCurrentQuickFrame()) + offset;
value = *reinterpret_cast<const uint32_t*>(addr);
uint32_t bit = (offset >> 2);
- if (bit < stack_mask.size_in_bits() && stack_mask.LoadBit(bit)) {
+ if (bit < code_info.GetNumberOfStackMaskBits() && stack_mask.LoadBit(bit)) {
is_reference = true;
}
break;
diff --git a/runtime/stack_map.cc b/runtime/stack_map.cc
index fd0e28d..2b7e8dd 100644
--- a/runtime/stack_map.cc
+++ b/runtime/stack_map.cc
@@ -200,7 +200,7 @@
<< std::dec
<< ", stack_mask=0b";
BitMemoryRegion stack_mask = code_info.GetStackMaskOf(*this);
- for (size_t i = 0, e = stack_mask.size_in_bits(); i < e; ++i) {
+ for (size_t i = 0, e = code_info.GetNumberOfStackMaskBits(); i < e; ++i) {
vios->Stream() << stack_mask.LoadBit(e - i - 1);
}
vios->Stream() << ")\n";
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index 02d8713..91cecf0 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -799,24 +799,6 @@
}
};
-// Register masks tend to have many tailing zero bits,
-// therefore it is worth encoding them as value+shift.
-class RegisterMask : public BitTable<2>::Accessor {
- public:
- enum Field {
- kValue,
- kShift,
- kCount,
- };
-
- RegisterMask(const BitTable<kCount>* table, uint32_t row)
- : BitTable<kCount>::Accessor(table, row) {}
-
- ALWAYS_INLINE uint32_t GetMask() const {
- return Get<kValue>() << Get<kShift>();
- }
-};
-
/**
* Wrapper around all compiler information collected for a method.
* The information is of the form:
@@ -851,22 +833,24 @@
return DexRegisterLocationCatalog(location_catalog_);
}
+ ALWAYS_INLINE size_t GetNumberOfStackMaskBits() const {
+ return stack_mask_bits_;
+ }
+
ALWAYS_INLINE StackMap GetStackMapAt(size_t index) const {
return StackMap(&stack_maps_, index);
}
BitMemoryRegion GetStackMask(size_t index) const {
- return stack_masks_.GetBitMemoryRegion(index);
+ return stack_masks_.Subregion(index * stack_mask_bits_, stack_mask_bits_);
}
BitMemoryRegion GetStackMaskOf(const StackMap& stack_map) const {
- uint32_t index = stack_map.GetStackMaskIndex();
- return (index == StackMap::kNoValue) ? BitMemoryRegion() : GetStackMask(index);
+ return GetStackMask(stack_map.GetStackMaskIndex());
}
uint32_t GetRegisterMaskOf(const StackMap& stack_map) const {
- uint32_t index = stack_map.GetRegisterMaskIndex();
- return (index == StackMap::kNoValue) ? 0 : RegisterMask(®ister_masks_, index).GetMask();
+ return register_masks_.Get(stack_map.GetRegisterMaskIndex());
}
uint32_t GetNumberOfLocationCatalogEntries() const {
@@ -1061,8 +1045,8 @@
invoke_infos_.Decode(bit_region, &bit_offset);
inline_infos_.Decode(bit_region, &bit_offset);
register_masks_.Decode(bit_region, &bit_offset);
- stack_masks_.Decode(bit_region, &bit_offset);
- CHECK_EQ(BitsToBytesRoundUp(bit_offset), non_header_size);
+ stack_mask_bits_ = DecodeVarintBits(bit_region, &bit_offset);
+ stack_masks_ = bit_region.Subregion(bit_offset, non_header_size * kBitsPerByte - bit_offset);
}
size_t size_;
@@ -1072,8 +1056,9 @@
BitTable<StackMap::Field::kCount> stack_maps_;
BitTable<InvokeInfo::Field::kCount> invoke_infos_;
BitTable<InlineInfo::Field::kCount> inline_infos_;
- BitTable<RegisterMask::Field::kCount> register_masks_;
- BitTable<1> stack_masks_;
+ BitTable<1> register_masks_;
+ uint32_t stack_mask_bits_ = 0;
+ BitMemoryRegion stack_masks_;
friend class OatDumper;
};
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 2e737f5..129bae6 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -3568,8 +3568,9 @@
T vreg_info(m, code_info, map, visitor_);
// Visit stack entries that hold pointers.
+ const size_t number_of_bits = code_info.GetNumberOfStackMaskBits();
BitMemoryRegion stack_mask = code_info.GetStackMaskOf(map);
- for (size_t i = 0; i < stack_mask.size_in_bits(); ++i) {
+ for (size_t i = 0; i < number_of_bits; ++i) {
if (stack_mask.LoadBit(i)) {
StackReference<mirror::Object>* ref_addr = vreg_base + i;
mirror::Object* ref = ref_addr->AsMirrorPtr();