Add experiment for analyzing debug info
Analyze entropy of debug info to estimates a close upper bound
savings from huffman encoding debug infos.
Also measure how many debug info bytes are dedupable if you exclude
the line number and parameter names.
Sample output:
Debug info bytes 96101012(5.70%)
DBG_END_SEQUENCE: 6401069(0.38%)
DBG_ADVANCE_PC: 3709064(0.22%)
DBG_ADVANCE_LINE: 8620724(0.51%)
DBG_START_LOCAL: 5244232(0.31%)
DBG_START_LOCAL_EXTENDED: 1763845(0.10%)
DBG_END_LOCAL: 1216044(0.07%)
DBG_RESTART_LOCAL: 565412(0.03%)
DBG_SET_PROLOGUE bytes 5768714(0.34%)
DBG_SET_FILE bytes 0(0.00%)
special: 36310220(2.15%)
Debug info entropy 69199724(4.10%)
Debug info opcode bytes 55613021(3.30%)
Debug info opcode entropy 34792401(2.06%)
Debug info non header bytes 69599324(4.13%)
Debug info deduped non header bytes 52475493(3.11%)
Bug: 77721545
Test: test-art-host
Change-Id: I031322e3b79a1572fcbb9e513ded9708e3b48354
diff --git a/tools/dexanalyze/dexanalyze.cc b/tools/dexanalyze/dexanalyze.cc
index 38725d4..7d7e5f2 100644
--- a/tools/dexanalyze/dexanalyze.cc
+++ b/tools/dexanalyze/dexanalyze.cc
@@ -49,6 +49,8 @@
<< "Usage " << argv[0] << " [options] <dex files>\n"
<< " [options] is a combination of the following\n"
<< " -count_indices (Count dex indices accessed from code items)\n"
+ << " -analyze-strings (Analyze string data)\n"
+ << " -analyze-debug-info (Analyze debug info)\n"
<< " -i (Ignore Dex checksum and verification failures)\n"
<< " -a (Run all experiments)\n"
<< " -d (Dump on per DEX basis)\n";
@@ -69,6 +71,8 @@
exp_count_indices_ = true;
} else if (arg == "-analyze-strings") {
exp_analyze_strings_ = true;
+ } else if (arg == "-analyze-debug-info") {
+ exp_debug_info_ = true;
} else if (arg == "-d") {
dump_per_input_dex_ = true;
} else if (!arg.empty() && arg[0] == '-') {
@@ -90,6 +94,7 @@
bool exp_count_indices_ = false;
bool exp_code_metrics_ = false;
bool exp_analyze_strings_ = false;
+ bool exp_debug_info_ = false;
bool run_all_experiments_ = false;
std::vector<std::string> filenames_;
};
@@ -106,6 +111,9 @@
if (options->run_all_experiments_ || options->exp_code_metrics_) {
experiments_.emplace_back(new CodeMetrics);
}
+ if (options->run_all_experiments_ || options->exp_debug_info_) {
+ experiments_.emplace_back(new AnalyzeDebugInfo);
+ }
}
bool ProcessDexFile(const DexFile& dex_file) {
@@ -120,6 +128,7 @@
void Dump(std::ostream& os) {
for (std::unique_ptr<Experiment>& experiment : experiments_) {
experiment->Dump(os, total_size_);
+ os << "\n";
}
}
diff --git a/tools/dexanalyze/dexanalyze_experiments.cc b/tools/dexanalyze/dexanalyze_experiments.cc
index 7006370..1a3b89c 100644
--- a/tools/dexanalyze/dexanalyze_experiments.cc
+++ b/tools/dexanalyze/dexanalyze_experiments.cc
@@ -75,6 +75,128 @@
return len;
}
+void AnalyzeDebugInfo::ProcessDexFile(const DexFile& dex_file) {
+ std::set<const uint8_t*> seen;
+ std::vector<size_t> counts(256, 0u);
+ std::vector<size_t> opcode_counts(256, 0u);
+ std::set<std::vector<uint8_t>> unique_non_header;
+ for (ClassAccessor accessor : dex_file.GetClasses()) {
+ for (const ClassAccessor::Method& method : accessor.GetMethods()) {
+ CodeItemDebugInfoAccessor code_item(dex_file, method.GetCodeItem(), method.GetIndex());
+ const uint8_t* debug_info = dex_file.GetDebugInfoStream(code_item.DebugInfoOffset());
+ if (debug_info != nullptr && seen.insert(debug_info).second) {
+ const uint8_t* stream = debug_info;
+ DecodeUnsignedLeb128(&stream); // line_start
+ uint32_t parameters_size = DecodeUnsignedLeb128(&stream);
+ for (uint32_t i = 0; i < parameters_size; ++i) {
+ DecodeUnsignedLeb128P1(&stream); // Parameter name.
+ }
+ bool done = false;
+ const uint8_t* after_header_start = stream;
+ while (!done) {
+ const uint8_t* const op_start = stream;
+ uint8_t opcode = *stream++;
+ ++opcode_counts[opcode];
+ ++total_opcode_bytes_;
+ switch (opcode) {
+ case DexFile::DBG_END_SEQUENCE:
+ ++total_end_seq_bytes_;
+ done = true;
+ break;
+ case DexFile::DBG_ADVANCE_PC:
+ DecodeUnsignedLeb128(&stream); // addr_diff
+ total_advance_pc_bytes_ += stream - op_start;
+ break;
+ case DexFile::DBG_ADVANCE_LINE:
+ DecodeSignedLeb128(&stream); // line_diff
+ total_advance_line_bytes_ += stream - op_start;
+ break;
+ case DexFile::DBG_START_LOCAL:
+ DecodeUnsignedLeb128(&stream); // register_num
+ DecodeUnsignedLeb128P1(&stream); // name_idx
+ DecodeUnsignedLeb128P1(&stream); // type_idx
+ total_start_local_bytes_ += stream - op_start;
+ break;
+ case DexFile::DBG_START_LOCAL_EXTENDED:
+ DecodeUnsignedLeb128(&stream); // register_num
+ DecodeUnsignedLeb128P1(&stream); // name_idx
+ DecodeUnsignedLeb128P1(&stream); // type_idx
+ DecodeUnsignedLeb128P1(&stream); // sig_idx
+ total_start_local_extended_bytes_ += stream - op_start;
+ break;
+ case DexFile::DBG_END_LOCAL:
+ DecodeUnsignedLeb128(&stream); // register_num
+ total_end_local_bytes_ += stream - op_start;
+ break;
+ case DexFile::DBG_RESTART_LOCAL:
+ DecodeUnsignedLeb128(&stream); // register_num
+ total_restart_local_bytes_ += stream - op_start;
+ break;
+ case DexFile::DBG_SET_PROLOGUE_END:
+ case DexFile::DBG_SET_EPILOGUE_BEGIN:
+ total_epilogue_bytes_ += stream - op_start;
+ break;
+ case DexFile::DBG_SET_FILE: {
+ DecodeUnsignedLeb128P1(&stream); // name_idx
+ total_set_file_bytes_ += stream - op_start;
+ break;
+ }
+ default: {
+ total_other_bytes_ += stream - op_start;
+ break;
+ }
+ }
+ }
+ const size_t bytes = stream - debug_info;
+ total_bytes_ += bytes;
+ total_non_header_bytes_ += stream - after_header_start;
+ if (unique_non_header.insert(std::vector<uint8_t>(after_header_start, stream)).second) {
+ total_unique_non_header_bytes_ += stream - after_header_start;
+ }
+ for (size_t i = 0; i < bytes; ++i) {
+ ++counts[debug_info[i]];
+ }
+ }
+ }
+ }
+ auto calc_entropy = [](std::vector<size_t> data) {
+ size_t total = std::accumulate(data.begin(), data.end(), 0u);
+ double avg_entropy = 0.0;
+ for (size_t c : data) {
+ if (c > 0) {
+ double ratio = static_cast<double>(c) / static_cast<double>(total);
+ avg_entropy -= ratio * log(ratio) / log(256.0);
+ }
+ }
+ return avg_entropy * total;
+ };
+ total_entropy_ += calc_entropy(counts);
+ total_opcode_entropy_ += calc_entropy(opcode_counts);
+}
+
+void AnalyzeDebugInfo::Dump(std::ostream& os, uint64_t total_size) const {
+ os << "Debug info bytes " << Percent(total_bytes_, total_size) << "\n";
+
+ os << " DBG_END_SEQUENCE: " << Percent(total_end_seq_bytes_, total_size) << "\n";
+ os << " DBG_ADVANCE_PC: " << Percent(total_advance_pc_bytes_, total_size) << "\n";
+ os << " DBG_ADVANCE_LINE: " << Percent(total_advance_line_bytes_, total_size) << "\n";
+ os << " DBG_START_LOCAL: " << Percent(total_start_local_bytes_, total_size) << "\n";
+ os << " DBG_START_LOCAL_EXTENDED: "
+ << Percent(total_start_local_extended_bytes_, total_size) << "\n";
+ os << " DBG_END_LOCAL: " << Percent(total_end_local_bytes_, total_size) << "\n";
+ os << " DBG_RESTART_LOCAL: " << Percent(total_restart_local_bytes_, total_size) << "\n";
+ os << " DBG_SET_PROLOGUE bytes " << Percent(total_epilogue_bytes_, total_size) << "\n";
+ os << " DBG_SET_FILE bytes " << Percent(total_set_file_bytes_, total_size) << "\n";
+ os << " special: "
+ << Percent(total_other_bytes_, total_size) << "\n";
+ os << "Debug info entropy " << Percent(total_entropy_, total_size) << "\n";
+ os << "Debug info opcode bytes " << Percent(total_opcode_bytes_, total_size) << "\n";
+ os << "Debug info opcode entropy " << Percent(total_opcode_entropy_, total_size) << "\n";
+ os << "Debug info non header bytes " << Percent(total_non_header_bytes_, total_size) << "\n";
+ os << "Debug info deduped non header bytes "
+ << Percent(total_unique_non_header_bytes_, total_size) << "\n";
+}
+
void AnalyzeStrings::ProcessDexFile(const DexFile& dex_file) {
std::vector<std::string> strings;
for (size_t i = 0; i < dex_file.NumStringIds(); ++i) {
diff --git a/tools/dexanalyze/dexanalyze_experiments.h b/tools/dexanalyze/dexanalyze_experiments.h
index 7ba2a49..a2621c8 100644
--- a/tools/dexanalyze/dexanalyze_experiments.h
+++ b/tools/dexanalyze/dexanalyze_experiments.h
@@ -51,6 +51,32 @@
int64_t total_num_prefixes_ = 0u;
};
+// Analyze debug info sizes.
+class AnalyzeDebugInfo : public Experiment {
+ public:
+ void ProcessDexFile(const DexFile& dex_file);
+ void Dump(std::ostream& os, uint64_t total_size) const;
+
+ private:
+ int64_t total_bytes_ = 0u;
+ int64_t total_entropy_ = 0u;
+ int64_t total_opcode_bytes_ = 0u;
+ int64_t total_opcode_entropy_ = 0u;
+ int64_t total_non_header_bytes_ = 0u;
+ int64_t total_unique_non_header_bytes_ = 0u;
+ // Opcode and related data.
+ int64_t total_end_seq_bytes_ = 0u;
+ int64_t total_advance_pc_bytes_ = 0u;
+ int64_t total_advance_line_bytes_ = 0u;
+ int64_t total_start_local_bytes_ = 0u;
+ int64_t total_start_local_extended_bytes_ = 0u;
+ int64_t total_end_local_bytes_ = 0u;
+ int64_t total_restart_local_bytes_ = 0u;
+ int64_t total_epilogue_bytes_ = 0u;
+ int64_t total_set_file_bytes_ = 0u;
+ int64_t total_other_bytes_ = 0u;
+};
+
// Count numbers of dex indices.
class CountDexIndices : public Experiment {
public: