Stack maps: Refactor constructors.
Create dedicated static methods instead of passing flags.
This creates dedicated methods for the purpose and merges
constructor and decoding into single optimized method.
This speeds up CodeInfo by 10%, and maps startup by 0.1%.
Test: ./art/test.py -b --host
Change-Id: Ic7d43e22bca0be9fb13bc2c7544ebfdf46798cfe
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index 71196d4..17be7ec 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -205,7 +205,7 @@
DCHECK(current_code->IsOptimized());
if (CodeInfo::HasInlineInfo(current_code->GetOptimizedCodeInfoPtr())) {
uintptr_t native_pc_offset = current_code->NativeQuickPcOffset(caller_pc);
- CodeInfo code_info(current_code, CodeInfo::DecodeFlags::InlineInfoOnly);
+ CodeInfo code_info = CodeInfo::DecodeInlineInfoOnly(current_code);
StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset);
DCHECK(stack_map.IsValid());
BitTableRange<InlineInfo> inline_infos = code_info.GetInlineInfosOf(stack_map);
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 9971f4a..7ac76ab 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -343,7 +343,7 @@
uintptr_t outer_pc_offset = current_code->NativeQuickPcOffset(outer_pc);
if (current_code->IsOptimized()) {
- CodeInfo code_info(current_code, CodeInfo::DecodeFlags::InlineInfoOnly);
+ CodeInfo code_info = CodeInfo::DecodeInlineInfoOnly(current_code);
StackMap stack_map = code_info.GetStackMapForNativePcOffset(outer_pc_offset);
DCHECK(stack_map.IsValid());
BitTableRange<InlineInfo> inline_infos = code_info.GetInlineInfosOf(stack_map);
diff --git a/runtime/oat_quick_method_header.cc b/runtime/oat_quick_method_header.cc
index 3ed2a91..7f47398 100644
--- a/runtime/oat_quick_method_header.cc
+++ b/runtime/oat_quick_method_header.cc
@@ -33,7 +33,7 @@
return dex::kDexNoIndex;
} else {
DCHECK(IsOptimized());
- CodeInfo code_info(this, CodeInfo::DecodeFlags::InlineInfoOnly);
+ CodeInfo code_info = CodeInfo::DecodeInlineInfoOnly(this);
StackMap stack_map = code_info.GetStackMapForNativePcOffset(sought_offset);
if (stack_map.IsValid()) {
return stack_map.GetDexPc();
@@ -58,7 +58,7 @@
DCHECK(!method->IsNative());
DCHECK(IsOptimized());
// Search for the dex-to-pc mapping in stack maps.
- CodeInfo code_info(this, CodeInfo::DecodeFlags::InlineInfoOnly);
+ CodeInfo code_info = CodeInfo::DecodeInlineInfoOnly(this);
// All stack maps are stored in the same CodeItem section, safepoint stack
// maps first, then catch stack maps. We use `is_for_catch_handler` to select
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 172fe3e..3fc6fd1 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -82,7 +82,7 @@
DCHECK(!(*cur_quick_frame_)->IsNative());
const OatQuickMethodHeader* header = GetCurrentOatQuickMethodHeader();
if (cur_inline_info_.first != header) {
- cur_inline_info_ = std::make_pair(header, CodeInfo(header, CodeInfo::InlineInfoOnly));
+ cur_inline_info_ = std::make_pair(header, CodeInfo::DecodeInlineInfoOnly(header));
}
return &cur_inline_info_.second;
}
diff --git a/runtime/stack_map.cc b/runtime/stack_map.cc
index eebca85..d813fd5 100644
--- a/runtime/stack_map.cc
+++ b/runtime/stack_map.cc
@@ -27,34 +27,66 @@
namespace art {
-CodeInfo::CodeInfo(const OatQuickMethodHeader* header, DecodeFlags flags)
- : CodeInfo(header->GetOptimizedCodeInfoPtr(), flags) {
-}
-
-void CodeInfo::Decode(const uint8_t* data, DecodeFlags flags) {
+// The callback is used to inform the caller about memory bounds of the bit-tables.
+template<typename DecodeCallback>
+CodeInfo::CodeInfo(const uint8_t* data, size_t* num_read_bits, DecodeCallback callback) {
BitMemoryReader reader(data);
std::array<uint32_t, kNumHeaders> header = reader.ReadInterleavedVarints<kNumHeaders>();
ForEachHeaderField([this, &header](size_t i, auto member_pointer) {
this->*member_pointer = header[i];
});
- ForEachBitTableField([this, &reader](size_t i, auto member_pointer) {
+ ForEachBitTableField([this, &reader, &callback](size_t i, auto member_pointer) {
auto& table = this->*member_pointer;
if (LIKELY(HasBitTable(i))) {
if (UNLIKELY(IsBitTableDeduped(i))) {
ssize_t bit_offset = reader.NumberOfReadBits() - reader.ReadVarint();
BitMemoryReader reader2(reader.data(), bit_offset); // The offset is negative.
table.Decode(reader2);
+ callback(i, &table, reader2.GetReadRegion());
} else {
+ ssize_t bit_offset = reader.NumberOfReadBits();
table.Decode(reader);
+ callback(i, &table, reader.GetReadRegion().Subregion(bit_offset));
}
}
- }, flags);
- size_in_bits_ = reader.NumberOfReadBits();
- if (flags == AllTables) {
- DCHECK_EQ(HasInlineInfo(data), HasInlineInfo());
+ });
+ if (num_read_bits != nullptr) {
+ *num_read_bits = reader.NumberOfReadBits();
}
}
+CodeInfo::CodeInfo(const uint8_t* data, size_t* num_read_bits)
+ : CodeInfo(data, num_read_bits, [](size_t, auto*, BitMemoryRegion){}) {}
+
+CodeInfo::CodeInfo(const OatQuickMethodHeader* header)
+ : CodeInfo(header->GetOptimizedCodeInfoPtr()) {}
+
+QuickMethodFrameInfo CodeInfo::DecodeFrameInfo(const uint8_t* data) {
+ CodeInfo code_info(data);
+ return QuickMethodFrameInfo(code_info.packed_frame_size_ * kStackAlignment,
+ code_info.core_spill_mask_,
+ code_info.fp_spill_mask_);
+}
+
+CodeInfo CodeInfo::DecodeGcMasksOnly(const OatQuickMethodHeader* header) {
+ CodeInfo code_info(header->GetOptimizedCodeInfoPtr());
+ CodeInfo copy; // Copy to dead-code-eliminate all fields that we do not need.
+ copy.stack_maps_ = code_info.stack_maps_;
+ copy.register_masks_ = code_info.register_masks_;
+ copy.stack_masks_ = code_info.stack_masks_;
+ return copy;
+}
+
+CodeInfo CodeInfo::DecodeInlineInfoOnly(const OatQuickMethodHeader* header) {
+ CodeInfo code_info(header->GetOptimizedCodeInfoPtr());
+ CodeInfo copy; // Copy to dead-code-eliminate all fields that we do not need.
+ copy.number_of_dex_registers_ = code_info.number_of_dex_registers_;
+ copy.stack_maps_ = code_info.stack_maps_;
+ copy.inline_infos_ = code_info.inline_infos_;
+ copy.method_infos_ = code_info.method_infos_;
+ return copy;
+}
+
size_t CodeInfo::Deduper::Dedupe(const uint8_t* code_info_data) {
writer_.ByteAlign();
size_t deduped_offset = writer_.NumberOfWrittenBits() / kBitsPerByte;
@@ -64,27 +96,16 @@
// Read the existing code info and find (and keep) dedup-map iterator for each table.
// The iterator stores BitMemoryRegion and bit_offset of previous identical BitTable.
- BitMemoryReader reader(code_info_data);
- CodeInfo code_info; // Temporary storage for decoded data.
- std::array<uint32_t, kNumHeaders> header = reader.ReadInterleavedVarints<kNumHeaders>();
- ForEachHeaderField([&code_info, &header](size_t i, auto member_pointer) {
- code_info.*member_pointer = header[i];
- });
std::map<BitMemoryRegion, uint32_t, BitMemoryRegion::Less>::iterator it[kNumBitTables];
- ForEachBitTableField([this, &reader, &code_info, &it](size_t i, auto member_pointer) {
- DCHECK(!code_info.IsBitTableDeduped(i));
- if (code_info.HasBitTable(i)) {
- size_t bit_table_start = reader.NumberOfReadBits();
- (code_info.*member_pointer).Decode(reader);
- BitMemoryRegion region = reader.GetReadRegion().Subregion(bit_table_start);
- it[i] = dedupe_map_.emplace(region, /* default bit_offset */ 0).first;
- if (it[i]->second != 0 && region.size_in_bits() > kMinDedupSize) { // Seen before and large?
- code_info.SetBitTableDeduped(i); // Mark as deduped before we write header.
- }
+ CodeInfo code_info(code_info_data, nullptr, [&](size_t i, auto*, BitMemoryRegion region) {
+ it[i] = dedupe_map_.emplace(region, /*bit_offset=*/0).first;
+ if (it[i]->second != 0 && region.size_in_bits() > kMinDedupSize) { // Seen before and large?
+ code_info.SetBitTableDeduped(i); // Mark as deduped before we write header.
}
});
// Write the code info back, but replace deduped tables with relative offsets.
+ std::array<uint32_t, kNumHeaders> header;
ForEachHeaderField([&code_info, &header](size_t i, auto member_pointer) {
header[i] = code_info.*member_pointer;
});
@@ -119,17 +140,15 @@
return deduped_offset;
}
-BitTable<StackMap>::const_iterator CodeInfo::BinarySearchNativePc(uint32_t packed_pc) const {
- return std::partition_point(
+StackMap CodeInfo::GetStackMapForNativePcOffset(uint32_t pc, InstructionSet isa) const {
+ uint32_t packed_pc = StackMap::PackNativePc(pc, isa);
+ // Binary search. All catch stack maps are stored separately at the end.
+ auto it = std::partition_point(
stack_maps_.begin(),
stack_maps_.end(),
[packed_pc](const StackMap& sm) {
return sm.GetPackedNativePc() < packed_pc && sm.GetKind() != StackMap::Kind::Catch;
});
-}
-
-StackMap CodeInfo::GetStackMapForNativePcOffset(uint32_t pc, InstructionSet isa) const {
- auto it = BinarySearchNativePc(StackMap::PackNativePc(pc, isa));
// Start at the lower bound and iterate over all stack maps with the given native pc.
for (; it != stack_maps_.end() && (*it).GetNativePcOffset(isa) == pc; ++it) {
StackMap::Kind kind = static_cast<StackMap::Kind>((*it).GetKind());
@@ -207,34 +226,23 @@
void CodeInfo::CollectSizeStats(const uint8_t* code_info_data, /*out*/ Stats* parent) {
Stats* codeinfo_stats = parent->Child("CodeInfo");
BitMemoryReader reader(code_info_data);
- CodeInfo code_info; // Temporary storage for decoded tables.
- std::array<uint32_t, kNumHeaders> header = reader.ReadInterleavedVarints<kNumHeaders>();
- ForEachHeaderField([&code_info, &header](size_t i, auto member_pointer) {
- code_info.*member_pointer = header[i];
- });
+ reader.ReadInterleavedVarints<kNumHeaders>();
codeinfo_stats->Child("Header")->AddBits(reader.NumberOfReadBits());
- ForEachBitTableField([codeinfo_stats, &reader, &code_info](size_t i, auto member_pointer) {
- auto& table = code_info.*member_pointer;
- size_t bit_offset = reader.NumberOfReadBits();
- if (code_info.HasBitTable(i)) {
- if (code_info.IsBitTableDeduped(i)) {
- reader.ReadVarint();
- codeinfo_stats->Child("DedupeOffset")->AddBits(reader.NumberOfReadBits() - bit_offset);
- } else {
- table.Decode(reader);
- Stats* table_stats = codeinfo_stats->Child(table.GetName());
- table_stats->AddBits(reader.NumberOfReadBits() - bit_offset);
- const char* const* column_names = table.GetColumnNames();
- for (size_t c = 0; c < table.NumColumns(); c++) {
- if (table.NumColumnBits(c) > 0) {
- Stats* column_stats = table_stats->Child(column_names[c]);
- column_stats->AddBits(table.NumRows() * table.NumColumnBits(c), table.NumRows());
- }
+ size_t num_bits;
+ CodeInfo code_info(code_info_data, &num_bits, [&](size_t i, auto* table, BitMemoryRegion region) {
+ if (!code_info.IsBitTableDeduped(i)) {
+ Stats* table_stats = codeinfo_stats->Child(table->GetName());
+ table_stats->AddBits(region.size_in_bits());
+ const char* const* column_names = table->GetColumnNames();
+ for (size_t c = 0; c < table->NumColumns(); c++) {
+ if (table->NumColumnBits(c) > 0) {
+ Stats* column_stats = table_stats->Child(column_names[c]);
+ column_stats->AddBits(table->NumRows() * table->NumColumnBits(c), table->NumRows());
}
}
}
});
- codeinfo_stats->AddBytes(BitsToBytesRoundUp(reader.NumberOfReadBits()));
+ codeinfo_stats->AddBytes(BitsToBytesRoundUp(num_bits));
}
void DexRegisterMap::Dump(VariableIndentationOutputStream* vios) const {
@@ -254,7 +262,7 @@
uint32_t code_offset,
bool verbose,
InstructionSet instruction_set) const {
- vios->Stream() << "CodeInfo BitSize=" << size_in_bits_
+ vios->Stream() << "CodeInfo "
<< " FrameSize:" << packed_frame_size_ * kStackAlignment
<< " CoreSpillMask:" << std::hex << core_spill_mask_
<< " FpSpillMask:" << std::hex << fp_spill_mask_
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index b438074..598f3e4 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -291,26 +291,14 @@
std::map<BitMemoryRegion, uint32_t, BitMemoryRegion::Less> dedupe_map_;
};
- enum DecodeFlags {
- AllTables = 0,
- // Limits the decoding only to the data needed by GC.
- GcMasksOnly = 1,
- // Limits the decoding only to the main stack map table and inline info table.
- // This is sufficient for many use cases and makes the header decoding faster.
- InlineInfoOnly = 2,
- };
+ ALWAYS_INLINE CodeInfo() {}
+ ALWAYS_INLINE explicit CodeInfo(const uint8_t* data, size_t* num_read_bits = nullptr);
+ ALWAYS_INLINE explicit CodeInfo(const OatQuickMethodHeader* header);
- CodeInfo() {}
-
- explicit CodeInfo(const uint8_t* data, DecodeFlags flags = AllTables) {
- Decode(reinterpret_cast<const uint8_t*>(data), flags);
- }
-
- explicit CodeInfo(const OatQuickMethodHeader* header, DecodeFlags flags = AllTables);
-
- size_t Size() const {
- return BitsToBytesRoundUp(size_in_bits_);
- }
+ // The following methods decode only part of the data.
+ static QuickMethodFrameInfo DecodeFrameInfo(const uint8_t* data);
+ static CodeInfo DecodeGcMasksOnly(const OatQuickMethodHeader* header);
+ static CodeInfo DecodeInlineInfoOnly(const OatQuickMethodHeader* header);
ALWAYS_INLINE const BitTable<StackMap>& GetStackMaps() const {
return stack_maps_;
@@ -441,23 +429,14 @@
return (*code_info_data & kHasInlineInfo) != 0;
}
- ALWAYS_INLINE static QuickMethodFrameInfo DecodeFrameInfo(const uint8_t* code_info_data) {
- BitMemoryReader reader(code_info_data);
- std::array<uint32_t, kNumHeaders> header = reader.ReadInterleavedVarints<kNumHeaders>();
- return QuickMethodFrameInfo(header[1] * kStackAlignment, header[2], header[3]);
- }
-
private:
- // Returns lower bound (fist stack map which has pc greater or equal than the desired one).
- // It ignores catch stack maps at the end (it is the same as if they had maximum pc value).
- ALWAYS_INLINE BitTable<StackMap>::const_iterator BinarySearchNativePc(uint32_t packed_pc) const;
-
// Scan backward to determine dex register locations at given stack map.
void DecodeDexRegisterMap(uint32_t stack_map_index,
uint32_t first_dex_register,
/*out*/ DexRegisterMap* map) const;
- void Decode(const uint8_t* data, DecodeFlags flags);
+ template<typename DecodeCallback> // (size_t index, BitTable<...>*, BitMemoryRegion).
+ ALWAYS_INLINE CodeInfo(const uint8_t* data, size_t* num_read_bits, DecodeCallback callback);
// Invokes the callback with index and member pointer of each header field.
template<typename Callback>
@@ -474,19 +453,13 @@
// Invokes the callback with index and member pointer of each BitTable field.
template<typename Callback>
- ALWAYS_INLINE static void ForEachBitTableField(Callback callback, DecodeFlags flags = AllTables) {
+ ALWAYS_INLINE static void ForEachBitTableField(Callback callback) {
size_t index = 0;
callback(index++, &CodeInfo::stack_maps_);
callback(index++, &CodeInfo::register_masks_);
callback(index++, &CodeInfo::stack_masks_);
- if (flags & DecodeFlags::GcMasksOnly) {
- return;
- }
callback(index++, &CodeInfo::inline_infos_);
callback(index++, &CodeInfo::method_infos_);
- if (flags & DecodeFlags::InlineInfoOnly) {
- return;
- }
callback(index++, &CodeInfo::dex_register_masks_);
callback(index++, &CodeInfo::dex_register_maps_);
callback(index++, &CodeInfo::dex_register_catalog_);
@@ -522,8 +495,6 @@
BitTable<DexRegisterMapInfo> dex_register_maps_;
BitTable<DexRegisterInfo> dex_register_catalog_;
- uint32_t size_in_bits_ = 0;
-
friend class StackMapStream;
};
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 6ff4c71..dcc015c 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -3766,9 +3766,9 @@
StackReference<mirror::Object>* vreg_base =
reinterpret_cast<StackReference<mirror::Object>*>(cur_quick_frame);
uintptr_t native_pc_offset = method_header->NativeQuickPcOffset(GetCurrentQuickFramePc());
- CodeInfo code_info(method_header, kPrecise
- ? CodeInfo::DecodeFlags::AllTables // We will need dex register maps.
- : CodeInfo::DecodeFlags::GcMasksOnly);
+ CodeInfo code_info = kPrecise
+ ? CodeInfo(method_header) // We will need dex register maps.
+ : CodeInfo::DecodeGcMasksOnly(method_header);
StackMap map = code_info.GetStackMapForNativePcOffset(native_pc_offset);
DCHECK(map.IsValid());