Add bytecode conversion experiments

Add a few new instructions and measure savings:
invoke-xx-[1-4]-[move-result].
iget/set-this
new-instance-invoke-this
if-eqz4
if-nez4
etc..

Cumulative Dex bytecode savings are ~14% of the Dex size.

Test: test-art-host
Bug: 77721545

Change-Id: Ibe2b04614e2c5782612b515fbb7f77dbcf9eb66c
diff --git a/libdexfile/dex/class_accessor-inl.h b/libdexfile/dex/class_accessor-inl.h
index a335f08..dd91438 100644
--- a/libdexfile/dex/class_accessor-inl.h
+++ b/libdexfile/dex/class_accessor-inl.h
@@ -110,6 +110,10 @@
   return CodeItemInstructionAccessor(dex_file_, dex_file_.GetCodeItem(GetCodeItemOffset()));
 }
 
+inline CodeItemDataAccessor ClassAccessor::Method::GetInstructionsAndData() const {
+  return CodeItemDataAccessor(dex_file_, dex_file_.GetCodeItem(GetCodeItemOffset()));
+}
+
 inline const char* ClassAccessor::GetDescriptor() const {
   return dex_file_.StringByTypeIdx(GetClassIdx());
 }
diff --git a/libdexfile/dex/class_accessor.h b/libdexfile/dex/class_accessor.h
index 34c7e07..0d87f93 100644
--- a/libdexfile/dex/class_accessor.h
+++ b/libdexfile/dex/class_accessor.h
@@ -78,6 +78,7 @@
     }
 
     CodeItemInstructionAccessor GetInstructions() const;
+    CodeItemDataAccessor GetInstructionsAndData() const;
 
     const DexFile::CodeItem* GetCodeItem() const;
 
diff --git a/tools/dexanalyze/Android.bp b/tools/dexanalyze/Android.bp
index a229d73..9515ca5 100644
--- a/tools/dexanalyze/Android.bp
+++ b/tools/dexanalyze/Android.bp
@@ -20,6 +20,7 @@
     host_supported: true,
     srcs: [
         "dexanalyze.cc",
+        "dexanalyze_bytecode.cc",
         "dexanalyze_experiments.cc",
     ],
     target: {
diff --git a/tools/dexanalyze/dexanalyze.cc b/tools/dexanalyze/dexanalyze.cc
index 7a9b8fb..c90bb9c 100644
--- a/tools/dexanalyze/dexanalyze.cc
+++ b/tools/dexanalyze/dexanalyze.cc
@@ -21,6 +21,7 @@
 
 #include <android-base/file.h>
 
+#include "dexanalyze_bytecode.h"
 #include "dexanalyze_experiments.h"
 #include "dex/code_item_accessors-inl.h"
 #include "dex/dex_file.h"
@@ -28,6 +29,7 @@
 #include "dex/dex_instruction-inl.h"
 
 namespace art {
+namespace dexanalyze {
 
 class DexAnalyze {
   static constexpr int kExitCodeUsageError = 1;
@@ -51,9 +53,12 @@
         << "    -count_indices (Count dex indices accessed from code items)\n"
         << "    -analyze-strings (Analyze string data)\n"
         << "    -analyze-debug-info (Analyze debug info)\n"
+        << "    -new-bytecode (Bytecode optimizations)\n"
         << "    -i (Ignore Dex checksum and verification failures)\n"
         << "    -a (Run all experiments)\n"
-        << "    -d (Dump on per DEX basis)\n";
+        << "    -n <int> (run experiment with 1 .. n as argument)\n"
+        << "    -d (Dump on per Dex basis)\n"
+        << "    -v (Verbose dumping)\n";
     return kExitCodeUsageError;
   }
 
@@ -65,14 +70,25 @@
         if (arg == "-i") {
           verify_checksum_ = false;
           run_dex_file_verifier_ = false;
+        } else if (arg == "-v") {
+          verbose_ = true;
         } else if (arg == "-a") {
           run_all_experiments_ = true;
+        } else if (arg == "-n") {
+          if (i + 1 >= argc) {
+            return Usage(argv);
+          }
+          std::istringstream iss(argv[i + 1]);
+          iss >> experiment_max_;
+          ++i;
         } else if (arg == "-count-indices") {
           exp_count_indices_ = true;
         } else if (arg == "-analyze-strings") {
           exp_analyze_strings_ = true;
         } else if (arg == "-analyze-debug-info") {
           exp_debug_info_ = true;
+        } else if (arg == "-new-bytecode") {
+          exp_bytecode_ = true;
         } else if (arg == "-d") {
           dump_per_input_dex_ = true;
         } else if (!arg.empty() && arg[0] == '-') {
@@ -88,6 +104,7 @@
       return 0;
     }
 
+    bool verbose_ = false;
     bool verify_checksum_ = true;
     bool run_dex_file_verifier_ = true;
     bool dump_per_input_dex_ = false;
@@ -95,7 +112,9 @@
     bool exp_code_metrics_ = false;
     bool exp_analyze_strings_ = false;
     bool exp_debug_info_ = false;
+    bool exp_bytecode_ = false;
     bool run_all_experiments_ = false;
+    uint64_t experiment_max_ = 1u;
     std::vector<std::string> filenames_;
   };
 
@@ -114,6 +133,22 @@
       if (options->run_all_experiments_ || options->exp_debug_info_) {
         experiments_.emplace_back(new AnalyzeDebugInfo);
       }
+      if (options->run_all_experiments_ || options->exp_bytecode_) {
+        for (size_t i = 0; i < options->experiment_max_; ++i) {
+          uint64_t exp_value = 0u;
+          if (i == 0) {
+            exp_value = std::numeric_limits<uint64_t>::max();
+          } else if (i == 1) {
+            exp_value = 0u;
+          } else {
+            exp_value = 1u << (i - 2);
+          }
+          experiments_.emplace_back(new NewRegisterInstructions(exp_value));
+        }
+      }
+      for (const std::unique_ptr<Experiment>& experiment : experiments_) {
+        experiment->dump_ = options->verbose_;
+      }
     }
 
     bool ProcessDexFiles(const std::vector<std::unique_ptr<const DexFile>>& dex_files) {
@@ -188,9 +223,10 @@
   }
 };
 
+}  // namespace dexanalyze
 }  // namespace art
 
 int main(int argc, char** argv) {
-  return art::DexAnalyze::Run(argc, argv);
+  return art::dexanalyze::DexAnalyze::Run(argc, argv);
 }
 
diff --git a/tools/dexanalyze/dexanalyze_bytecode.cc b/tools/dexanalyze/dexanalyze_bytecode.cc
new file mode 100644
index 0000000..6bc8921
--- /dev/null
+++ b/tools/dexanalyze/dexanalyze_bytecode.cc
@@ -0,0 +1,547 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dexanalyze_bytecode.h"
+
+#include <algorithm>
+#include <iomanip>
+#include <iostream>
+
+#include "dex/class_accessor-inl.h"
+#include "dex/code_item_accessors-inl.h"
+#include "dex/dex_instruction-inl.h"
+
+namespace art {
+namespace dexanalyze {
+
+// Given a map of <key, usage count>, sort by most used and assign index <key, index in most used>
+enum class Order {
+  kMostUsed,
+  kNormal,
+};
+
+template <typename T, typename U>
+static inline SafeMap<T, U> SortByOrder(const SafeMap<T, U>& usage, Order order) {
+  std::vector<std::pair<U, T>> most_used;
+  for (const auto& pair : usage) {
+    most_used.emplace_back(pair.second, pair.first);
+  }
+  if (order == Order::kMostUsed) {
+    std::sort(most_used.rbegin(), most_used.rend());
+  }
+  U current_index = 0u;
+  SafeMap<T, U> ret;
+  for (auto&& pair : most_used) {
+    CHECK(ret.emplace(pair.second, current_index++).second);
+  }
+  return ret;
+}
+
+static inline std::ostream& operator<<(std::ostream& os, const std::vector<uint8_t>& bytes) {
+  os << std::hex;
+  for (const uint8_t& c : bytes) {
+    os << std::setw(2) << std::setfill('0') << static_cast<uint32_t>(c)
+       << (&c != &bytes.back() ? " " : "");
+  }
+  os << std::dec;
+  return os;
+}
+
+void NewRegisterInstructions::ProcessDexFiles(
+    const std::vector<std::unique_ptr<const DexFile>>& dex_files) {
+  std::set<std::vector<uint8_t>> deduped;
+  for (const std::unique_ptr<const DexFile>& dex_file : dex_files) {
+    std::map<size_t, TypeLinkage> types;
+    std::set<const void*> visited;
+    for (ClassAccessor accessor : dex_file->GetClasses()) {
+      InstructionBuilder inst_builder(types,
+                                      /*count_types*/ true,
+                                      /*dump*/ false,
+                                      experiments_,
+                                      instruction_freq_);
+      for (const ClassAccessor::Method& method : accessor.GetMethods()) {
+        inst_builder.Process(*dex_file, method.GetInstructionsAndData(), accessor.GetClassIdx());
+      }
+    }
+    // Reorder to get an index for each map instead of a count.
+    for (auto&& pair : types) {
+      pair.second.types_ = SortByOrder(pair.second.types_, Order::kMostUsed);
+      pair.second.fields_ = SortByOrder(pair.second.fields_, Order::kMostUsed);
+      pair.second.methods_ = SortByOrder(pair.second.methods_, Order::kMostUsed);
+      pair.second.strings_ = SortByOrder(pair.second.strings_, Order::kMostUsed);
+    }
+    // Visit classes and convert code items.
+    for (ClassAccessor accessor : dex_file->GetClasses()) {
+      InstructionBuilder inst_builder(types,
+                                      /*count_types*/ false,
+                                      dump_,
+                                      experiments_,
+                                      instruction_freq_);
+      for (const ClassAccessor::Method& method : accessor.GetMethods()) {
+        if (method.GetCodeItem() == nullptr || !visited.insert(method.GetCodeItem()).second) {
+          continue;
+        }
+        if (dump_) {
+          std::cout << std::endl
+                    << "Processing " << dex_file->PrettyMethod(method.GetIndex(), true);
+        }
+        CodeItemDataAccessor data = method.GetInstructionsAndData();
+        inst_builder.Process(*dex_file, data, accessor.GetClassIdx());
+        std::vector<uint8_t> buffer = std::move(inst_builder.buffer_);
+        const size_t buffer_size = buffer.size();
+        dex_code_bytes_ += data.InsnsSizeInBytes();
+        output_size_ += buffer_size;
+        // Add extra data at the end to have fair dedupe.
+        EncodeUnsignedLeb128(&buffer, data.RegistersSize());
+        EncodeUnsignedLeb128(&buffer, data.InsSize());
+        EncodeUnsignedLeb128(&buffer, data.OutsSize());
+        EncodeUnsignedLeb128(&buffer, data.TriesSize());
+        EncodeUnsignedLeb128(&buffer, data.InsnsSizeInCodeUnits());
+        if (deduped.insert(buffer).second) {
+          deduped_size_ += buffer_size;
+        }
+      }
+      missing_field_idx_count_ += inst_builder.missing_field_idx_count_;
+      missing_method_idx_count_ += inst_builder.missing_method_idx_count_;
+    }
+  }
+}
+
+void NewRegisterInstructions::Dump(std::ostream& os, uint64_t total_size) const {
+  os << "Enabled experiments " << experiments_ << std::endl;
+  os << "Total Dex code bytes: " << Percent(dex_code_bytes_, total_size) << "\n";
+  os << "Total output code bytes: " << Percent(output_size_, total_size) << "\n";
+  os << "Total deduped code bytes: " << Percent(deduped_size_, total_size) << "\n";
+  os << "Missing field idx count: " << missing_field_idx_count_ << "\n";
+  os << "Missing method idx count: " << missing_method_idx_count_ << "\n";
+  std::vector<std::pair<size_t, std::vector<uint8_t>>> pairs;
+  for (auto&& pair : instruction_freq_) {
+    if (pair.second > 0 && !pair.first.empty()) {
+      // Savings exclude one byte per occurrence and one occurence from having the macro
+      // dictionary.
+      pairs.emplace_back((pair.second - 1) * (pair.first.size() - 1), pair.first);
+    }
+  }
+  std::sort(pairs.rbegin(), pairs.rend());
+  os << "Top instruction bytecode sizes and hex dump" << "\n";
+  uint64_t top_instructions_savings = 0u;
+  for (size_t i = 0; i < 128 && i < pairs.size(); ++i) {
+    top_instructions_savings += pairs[i].first;
+    if (dump_ || (true)) {
+      auto bytes = pairs[i].second;
+      // Remove opcode bytes.
+      bytes.erase(bytes.begin());
+      os << Percent(pairs[i].first, total_size) << " "
+         << Instruction::Name(static_cast<Instruction::Code>(pairs[i].second[0]))
+         << "(" << bytes << ")\n";
+    }
+  }
+  os << "Top instructions 1b macro savings "
+     << Percent(top_instructions_savings, total_size) << "\n";
+}
+
+InstructionBuilder::InstructionBuilder(std::map<size_t, TypeLinkage>& types,
+                                       bool count_types,
+                                       bool dump,
+                                       uint64_t experiments,
+                                       std::map<std::vector<uint8_t>, size_t>& instruction_freq)
+    : types_(types),
+      count_types_(count_types),
+      dump_(dump),
+      experiments_(experiments),
+      instruction_freq_(instruction_freq) {}
+
+void InstructionBuilder::Process(const DexFile& dex_file,
+                                 const CodeItemDataAccessor& code_item,
+                                 dex::TypeIndex current_class_type) {
+  TypeLinkage& current_type = types_[current_class_type.index_];
+  bool skip_next = false;
+  size_t last_start = 0u;
+  for (auto inst = code_item.begin(); ; ++inst) {
+    if (!count_types_ && last_start != buffer_.size()) {
+      // Register the instruction blob.
+      ++instruction_freq_[std::vector<uint8_t>(buffer_.begin() + last_start, buffer_.end())];
+      last_start = buffer_.size();
+    }
+    if (inst == code_item.end()) {
+      break;
+    }
+    if (dump_) {
+      std::cout << std::endl;
+      std::cout << inst->DumpString(nullptr);
+      if (skip_next) {
+        std::cout << " (SKIPPED)";
+      }
+    }
+    if (skip_next) {
+      skip_next = false;
+      continue;
+    }
+    bool is_iget = false;
+    const Instruction::Code opcode = inst->Opcode();
+    Instruction::Code new_opcode = opcode;
+    switch (opcode) {
+      case Instruction::IGET:
+      case Instruction::IGET_WIDE:
+      case Instruction::IGET_OBJECT:
+      case Instruction::IGET_BOOLEAN:
+      case Instruction::IGET_BYTE:
+      case Instruction::IGET_CHAR:
+      case Instruction::IGET_SHORT:
+        is_iget = true;
+        FALLTHROUGH_INTENDED;
+      case Instruction::IPUT:
+      case Instruction::IPUT_WIDE:
+      case Instruction::IPUT_OBJECT:
+      case Instruction::IPUT_BOOLEAN:
+      case Instruction::IPUT_BYTE:
+      case Instruction::IPUT_CHAR:
+      case Instruction::IPUT_SHORT: {
+        const uint32_t dex_field_idx = inst->VRegC_22c();
+        if (Enabled(kExperimentSingleGetSet)) {
+          // Test deduplication improvements from replacing all iget/set with the same opcode.
+          new_opcode = is_iget ? Instruction::IGET : Instruction::IPUT;
+        }
+        CHECK_LT(dex_field_idx, dex_file.NumFieldIds());
+        dex::TypeIndex holder_type = dex_file.GetFieldId(dex_field_idx).class_idx_;
+        uint32_t receiver = inst->VRegB_22c();
+        uint32_t first_arg_reg = code_item.RegistersSize() - code_item.InsSize();
+        uint32_t out_reg = inst->VRegA_22c();
+        if (Enabled(kExperimentInstanceFieldSelf) &&
+            first_arg_reg == receiver &&
+            holder_type == current_class_type) {
+          if (count_types_) {
+            ++current_type.fields_.FindOrAdd(dex_field_idx)->second;
+          } else {
+            uint32_t field_idx = types_[holder_type.index_].fields_.Get(dex_field_idx);
+            ExtendPrefix(&out_reg, &field_idx);
+            CHECK(InstNibbles(new_opcode, {out_reg, field_idx}));
+            continue;
+          }
+        } else if (Enabled(kExperimentInstanceField)) {
+          if (count_types_) {
+            ++current_type.types_.FindOrAdd(holder_type.index_)->second;
+            ++types_[holder_type.index_].fields_.FindOrAdd(dex_field_idx)->second;
+          } else {
+            uint32_t type_idx = current_type.types_.Get(holder_type.index_);
+            uint32_t field_idx = types_[holder_type.index_].fields_.Get(dex_field_idx);
+            ExtendPrefix(&type_idx, &field_idx);
+            CHECK(InstNibbles(new_opcode, {out_reg, receiver, type_idx, field_idx}));
+            continue;
+          }
+        }
+        break;
+      }
+      case Instruction::CONST_STRING:
+      case Instruction::CONST_STRING_JUMBO: {
+        const bool is_jumbo = opcode == Instruction::CONST_STRING_JUMBO;
+        const uint16_t str_idx = is_jumbo ? inst->VRegB_31c() : inst->VRegB_21c();
+        uint32_t out_reg = is_jumbo ? inst->VRegA_31c() : inst->VRegA_21c();
+        if (Enabled(kExperimentString)) {
+          new_opcode = Instruction::CONST_STRING;
+          if (count_types_) {
+            ++current_type.strings_.FindOrAdd(str_idx)->second;
+          } else {
+            uint32_t idx = current_type.strings_.Get(str_idx);
+            ExtendPrefix(&out_reg, &idx);
+            CHECK(InstNibbles(opcode, {out_reg, idx}));
+            continue;
+          }
+        }
+        break;
+      }
+      case Instruction::SGET:
+      case Instruction::SGET_WIDE:
+      case Instruction::SGET_OBJECT:
+      case Instruction::SGET_BOOLEAN:
+      case Instruction::SGET_BYTE:
+      case Instruction::SGET_CHAR:
+      case Instruction::SGET_SHORT:
+      case Instruction::SPUT:
+      case Instruction::SPUT_WIDE:
+      case Instruction::SPUT_OBJECT:
+      case Instruction::SPUT_BOOLEAN:
+      case Instruction::SPUT_BYTE:
+      case Instruction::SPUT_CHAR:
+      case Instruction::SPUT_SHORT: {
+        uint32_t out_reg = inst->VRegA_21c();
+        const uint32_t dex_field_idx = inst->VRegB_21c();
+        CHECK_LT(dex_field_idx, dex_file.NumFieldIds());
+        dex::TypeIndex holder_type = dex_file.GetFieldId(dex_field_idx).class_idx_;
+        if (Enabled(kExperimentStaticField)) {
+          if (holder_type == current_class_type) {
+            if (count_types_) {
+              ++types_[holder_type.index_].fields_.FindOrAdd(dex_field_idx)->second;
+            } else {
+              uint32_t field_idx = types_[holder_type.index_].fields_.Get(dex_field_idx);
+              ExtendPrefix(&out_reg, &field_idx);
+              if (InstNibbles(new_opcode, {out_reg, field_idx})) {
+                continue;
+              }
+            }
+          } else {
+            if (count_types_) {
+              ++types_[current_class_type.index_].types_.FindOrAdd(holder_type.index_)->second;
+              ++types_[holder_type.index_].fields_.FindOrAdd(dex_field_idx)->second;
+            } else {
+              uint32_t type_idx = current_type.types_.Get(holder_type.index_);
+              uint32_t field_idx = types_[holder_type.index_].fields_.Get(dex_field_idx);
+              ExtendPrefix(&type_idx, &field_idx);
+              if (InstNibbles(new_opcode, {out_reg >> 4, out_reg & 0xF, type_idx, field_idx})) {
+                continue;
+              }
+            }
+          }
+        }
+        break;
+      }
+      // Invoke cases.
+      case Instruction::INVOKE_VIRTUAL:
+      case Instruction::INVOKE_DIRECT:
+      case Instruction::INVOKE_STATIC:
+      case Instruction::INVOKE_INTERFACE:
+      case Instruction::INVOKE_SUPER: {
+        const uint32_t method_idx = DexMethodIndex(inst.Inst());
+        const DexFile::MethodId& method = dex_file.GetMethodId(method_idx);
+        const dex::TypeIndex receiver_type = method.class_idx_;
+        if (Enabled(kExperimentInvoke)) {
+          if (count_types_) {
+            ++current_type.types_.FindOrAdd(receiver_type.index_)->second;
+            ++types_[receiver_type.index_].methods_.FindOrAdd(method_idx)->second;
+          } else {
+            uint32_t args[6] = {};
+            uint32_t arg_count = inst->GetVarArgs(args);
+
+            bool next_move_result = false;
+            uint32_t dest_reg = 0;
+            auto next = std::next(inst);
+            if (next != code_item.end()) {
+              next_move_result =
+                  next->Opcode() == Instruction::MOVE_RESULT ||
+                  next->Opcode() == Instruction::MOVE_RESULT_WIDE ||
+                  next->Opcode() == Instruction::MOVE_RESULT_OBJECT;
+              if (next_move_result) {
+                dest_reg = next->VRegA_11x();
+              }
+            }
+
+            bool result = false;
+            uint32_t type_idx = current_type.types_.Get(receiver_type.index_);
+            uint32_t local_idx = types_[receiver_type.index_].methods_.Get(method_idx);
+            ExtendPrefix(&type_idx, &local_idx);
+            ExtendPrefix(&dest_reg, &local_idx);
+            if (arg_count == 0) {
+              result = InstNibbles(opcode, {dest_reg, type_idx, local_idx});
+            } else if (arg_count == 1) {
+              result = InstNibbles(opcode, {dest_reg, type_idx, local_idx, args[0]});
+            } else if (arg_count == 2) {
+              result = InstNibbles(opcode, {dest_reg, type_idx, local_idx, args[0],
+                                            args[1]});
+            } else if (arg_count == 3) {
+              result = InstNibbles(opcode, {dest_reg, type_idx, local_idx, args[0],
+                                            args[1], args[2]});
+            } else if (arg_count == 4) {
+              result = InstNibbles(opcode, {dest_reg, type_idx, local_idx, args[0],
+                                            args[1], args[2], args[3]});
+            } else if (arg_count == 5) {
+              result = InstNibbles(opcode, {dest_reg, type_idx, local_idx, args[0],
+                                            args[1], args[2], args[3], args[4]});
+            }
+
+            if (result) {
+              skip_next = next_move_result;
+              continue;
+            }
+          }
+        }
+        break;
+      }
+      case Instruction::IF_EQZ:
+      case Instruction::IF_NEZ: {
+        uint32_t reg = inst->VRegA_21t();
+        int16_t offset = inst->VRegB_21t();
+        if (!count_types_ &&
+            Enabled(kExperimentSmallIf) &&
+            InstNibbles(opcode, {reg, static_cast<uint16_t>(offset)})) {
+          continue;
+        }
+        break;
+      }
+      case Instruction::INSTANCE_OF: {
+        uint32_t type_idx = inst->VRegC_22c();
+        uint32_t in_reg = inst->VRegB_22c();
+        uint32_t out_reg = inst->VRegB_22c();
+        if (count_types_) {
+          ++current_type.types_.FindOrAdd(type_idx)->second;
+        } else {
+          uint32_t local_type = current_type.types_.Get(type_idx);
+          ExtendPrefix(&in_reg, &local_type);
+          CHECK(InstNibbles(new_opcode, {in_reg, out_reg, local_type}));
+          continue;
+        }
+        break;
+      }
+      case Instruction::NEW_ARRAY: {
+        uint32_t len_reg = inst->VRegB_22c();
+        uint32_t type_idx = inst->VRegC_22c();
+        uint32_t out_reg = inst->VRegA_22c();
+        if (count_types_) {
+          ++current_type.types_.FindOrAdd(type_idx)->second;
+        } else {
+          uint32_t local_type = current_type.types_.Get(type_idx);
+          ExtendPrefix(&out_reg, &local_type);
+          CHECK(InstNibbles(new_opcode, {len_reg, out_reg, local_type}));
+          continue;
+        }
+        break;
+      }
+      case Instruction::CONST_CLASS:
+      case Instruction::CHECK_CAST:
+      case Instruction::NEW_INSTANCE: {
+        uint32_t type_idx = inst->VRegB_21c();
+        uint32_t out_reg = inst->VRegA_21c();
+        if (Enabled(kExperimentLocalType)) {
+          if (count_types_) {
+            ++current_type.types_.FindOrAdd(type_idx)->second;
+          } else {
+            bool next_is_init = false;
+            if (opcode == Instruction::NEW_INSTANCE && inst != code_item.end()) {
+              auto next = std::next(inst);
+              if (next->Opcode() == Instruction::INVOKE_DIRECT) {
+                uint32_t args[6] = {};
+                uint32_t arg_count = next->GetVarArgs(args);
+                uint32_t method_idx = DexMethodIndex(next.Inst());
+                if (arg_count == 1u &&
+                    args[0] == out_reg &&
+                    dex_file.GetMethodName(dex_file.GetMethodId(method_idx)) ==
+                        std::string("<init>")) {
+                  next_is_init = true;
+                }
+              }
+            }
+            uint32_t local_type = current_type.types_.Get(type_idx);
+            ExtendPrefix(&out_reg, &local_type);
+            CHECK(InstNibbles(opcode, {out_reg, local_type}));
+            skip_next = next_is_init;
+            continue;
+          }
+        }
+        break;
+      }
+      case Instruction::RETURN:
+      case Instruction::RETURN_OBJECT:
+      case Instruction::RETURN_WIDE:
+      case Instruction::RETURN_VOID: {
+        if (!count_types_ && Enabled(kExperimentReturn)) {
+          if (opcode == Instruction::RETURN_VOID || inst->VRegA_11x() == 0) {
+            if (InstNibbles(opcode, {})) {
+              continue;
+            }
+          }
+        }
+        break;
+      }
+      default:
+        break;
+    }
+    if (!count_types_) {
+      Add(new_opcode, inst.Inst());
+    }
+  }
+  if (dump_) {
+    std::cout << std::endl
+              << "Bytecode size " << code_item.InsnsSizeInBytes() << " -> " << buffer_.size();
+    std::cout << std::endl;
+  }
+}
+
+void InstructionBuilder::Add(Instruction::Code opcode, const Instruction& inst) {
+  const uint8_t* start = reinterpret_cast<const uint8_t*>(&inst);
+  buffer_.push_back(opcode);
+  buffer_.insert(buffer_.end(), start + 1, start + 2 * inst.SizeInCodeUnits());
+}
+
+void InstructionBuilder::ExtendPrefix(uint32_t* value1, uint32_t* value2) {
+  if (*value1 < 16 && *value2 < 16) {
+    return;
+  }
+  if ((*value1 >> 4) == 1 && *value2 < 16) {
+    InstNibbles(0xE5, {});
+    *value1 ^= 1u << 4;
+    return;
+  } else if ((*value2 >> 4) == 1 && *value1 < 16) {
+    InstNibbles(0xE6, {});
+    *value2 ^= 1u << 4;
+    return;
+  }
+  if (*value1 < 256 && *value2 < 256) {
+    // Extend each value by 4 bits.
+    CHECK(InstNibbles(0xE3, {*value1 >> 4, *value2 >> 4}));
+  } else {
+    // Extend each value by 12 bits.
+    CHECK(InstNibbles(0xE4, {
+        (*value1 >> 12) & 0xF,
+        (*value1 >> 8) & 0xF,
+        (*value1 >> 4) & 0xF,
+        (*value2 >> 12) & 0xF,
+        (*value2 >> 8) & 0xF,
+        (*value2 >> 4) & 0xF}));
+  }
+  *value1 &= 0xF;
+  *value2 &= 0XF;
+}
+
+bool InstructionBuilder::InstNibblesAndIndex(uint8_t opcode,
+                                             uint16_t idx,
+                                             const std::vector<uint32_t>& args) {
+  if (!InstNibbles(opcode, args)) {
+    return false;
+  }
+  buffer_.push_back(static_cast<uint8_t>(idx >> 8));
+  buffer_.push_back(static_cast<uint8_t>(idx));
+  return true;
+}
+
+bool InstructionBuilder::InstNibbles(uint8_t opcode, const std::vector<uint32_t>& args) {
+  if (dump_) {
+    std::cout << " ==> " << Instruction::Name(static_cast<Instruction::Code>(opcode)) << " ";
+    for (int v : args) {
+      std::cout << v << ", ";
+    }
+  }
+  for (int v : args) {
+    if (v >= 16) {
+      if (dump_) {
+        std::cout << "(OUT_OF_RANGE)";
+      }
+      return false;
+    }
+  }
+  buffer_.push_back(opcode);
+  for (size_t i = 0; i < args.size(); i += 2) {
+    buffer_.push_back(args[i] << 4);
+    if (i + 1 < args.size()) {
+      buffer_.back() |= args[i + 1];
+    }
+  }
+  while (buffer_.size() % alignment_ != 0) {
+    buffer_.push_back(0);
+  }
+  return true;
+}
+
+}  // namespace dexanalyze
+}  // namespace art
diff --git a/tools/dexanalyze/dexanalyze_bytecode.h b/tools/dexanalyze/dexanalyze_bytecode.h
new file mode 100644
index 0000000..e7c5e7b
--- /dev/null
+++ b/tools/dexanalyze/dexanalyze_bytecode.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_TOOLS_DEXANALYZE_DEXANALYZE_BYTECODE_H_
+#define ART_TOOLS_DEXANALYZE_DEXANALYZE_BYTECODE_H_
+
+#include <vector>
+#include <map>
+
+#include "base/safe_map.h"
+#include "dexanalyze_experiments.h"
+#include "dex/code_item_accessors.h"
+
+namespace art {
+namespace dexanalyze {
+
+enum BytecodeExperiment {
+  kExperimentInvoke,
+  kExperimentInstanceField,
+  kExperimentInstanceFieldSelf,
+  kExperimentStaticField,
+  kExperimentLocalType,
+  kExperimentReturn,
+  kExperimentSmallIf,
+  kExperimentString,
+  kExperimentSingleGetSet,
+};
+
+// Maps from global index to local index.
+struct TypeLinkage {
+  // Referenced types.
+  SafeMap<size_t, size_t> types_;
+  // Owned fields.
+  SafeMap<size_t, size_t> fields_;
+  // Owned methods.
+  SafeMap<size_t, size_t> methods_;
+  // Referenced strings.
+  SafeMap<size_t, size_t> strings_;
+};
+
+class InstructionBuilder {
+ public:
+  InstructionBuilder(std::map<size_t, TypeLinkage>& types,
+                     bool count_types,
+                     bool dump,
+                     uint64_t experiments,
+                     std::map<std::vector<uint8_t>, size_t>& instruction_freq);
+  void Process(const DexFile& dex_file,
+               const CodeItemDataAccessor& code_item,
+               dex::TypeIndex current_class_type);
+  void Add(Instruction::Code opcode, const Instruction& inst);
+  bool InstNibblesAndIndex(uint8_t opcode, uint16_t idx, const std::vector<uint32_t>& args);
+  bool InstNibbles(uint8_t opcode, const std::vector<uint32_t>& args);
+  void ExtendPrefix(uint32_t* value1, uint32_t* value2);
+  bool Enabled(BytecodeExperiment experiment) const {
+    return experiments_ & (1u << static_cast<uint64_t>(experiment));
+  }
+
+  size_t alignment_ = 1u;
+  std::vector<uint8_t> buffer_;
+  // Global index -> local index maps.
+  std::map<size_t, TypeLinkage>& types_;
+  uint64_t missing_field_idx_count_ = 0u;
+  uint64_t missing_method_idx_count_ = 0u;
+  const bool count_types_;
+  const bool dump_;
+  uint64_t experiments_ = std::numeric_limits<uint64_t>::max();
+  std::map<std::vector<uint8_t>, size_t>& instruction_freq_;
+};
+
+class NewRegisterInstructions : public Experiment {
+ public:
+  explicit NewRegisterInstructions(uint64_t experiments) : experiments_(experiments) {}
+  void ProcessDexFiles(const std::vector<std::unique_ptr<const DexFile>>& dex_files);
+  void Dump(std::ostream& os, uint64_t total_size) const;
+
+ private:
+  uint64_t output_size_ = 0u;
+  uint64_t deduped_size_ = 0u;
+  uint64_t dex_code_bytes_ = 0u;
+  uint64_t missing_field_idx_count_ = 0u;
+  uint64_t missing_method_idx_count_ = 0u;
+  uint64_t experiments_ = std::numeric_limits<uint64_t>::max();
+  std::map<std::vector<uint8_t>, size_t> instruction_freq_;
+};
+
+}  // namespace dexanalyze
+}  // namespace art
+
+#endif  // ART_TOOLS_DEXANALYZE_DEXANALYZE_BYTECODE_H_
diff --git a/tools/dexanalyze/dexanalyze_experiments.cc b/tools/dexanalyze/dexanalyze_experiments.cc
index c5f7e8e..b9a2ede 100644
--- a/tools/dexanalyze/dexanalyze_experiments.cc
+++ b/tools/dexanalyze/dexanalyze_experiments.cc
@@ -32,8 +32,9 @@
 #include "dex/utf-inl.h"
 
 namespace art {
+namespace dexanalyze {
 
-static inline bool IsRange(Instruction::Code code) {
+bool IsRange(Instruction::Code code) {
   return code == Instruction::INVOKE_VIRTUAL_RANGE ||
       code == Instruction::INVOKE_DIRECT_RANGE ||
       code == Instruction::INVOKE_SUPER_RANGE ||
@@ -41,11 +42,11 @@
       code == Instruction::INVOKE_INTERFACE_RANGE;
 }
 
-static inline uint16_t NumberOfArgs(const Instruction& inst) {
+uint16_t NumberOfArgs(const Instruction& inst) {
   return IsRange(inst.Opcode()) ? inst.VRegA_3rc() : inst.VRegA_35c();
 }
 
-static inline uint16_t DexMethodIndex(const Instruction& inst) {
+uint16_t DexMethodIndex(const Instruction& inst) {
   return IsRange(inst.Opcode()) ? inst.VRegB_3rc() : inst.VRegB_35c();
 }
 
@@ -70,7 +71,7 @@
       static_cast<double>(value * 100) / static_cast<double>(max));
 }
 
-static size_t PrefixLen(const std::string& a, const std::string& b) {
+size_t PrefixLen(const std::string& a, const std::string& b) {
   size_t len = 0;
   for (; len < a.length() && len < b.length() && a[len] == b[len]; ++len) {}
   return len;
@@ -608,9 +609,17 @@
   os << "Super same class: " << PercentDivide(same_class_super_, total_super_) << "\n";
   os << "Num strings accessed from code: " << num_string_ids_from_code_ << "\n";
   os << "Avg unique methods accessed per class: "
-     << double(total_unique_method_ids_) / double(num_class_defs_) << "\n";
+     << static_cast<double>(total_unique_method_ids_) / static_cast<double>(num_class_defs_) << "\n";
   os << "Avg unique strings accessed per class: "
-     << double(total_unique_string_ids_) / double(num_class_defs_) << "\n";
+     << static_cast<double>(total_unique_string_ids_) / static_cast<double>(num_class_defs_) << "\n";
+  os << "Avg unique types accessed per class " <<
+        static_cast<double>(total_unique_types_) / static_cast<double>(num_class_defs_) << "\n";
+  os << "Total unique methods accessed per class: "
+     << Percent(total_unique_method_ids_, total_size) << "\n";
+  os << "Total unique strings accessed per class: "
+     << Percent(total_unique_string_ids_, total_size) << "\n";
+  os << "Total unique types accessed per class: "
+     << Percent(total_unique_types_, total_size) << "\n";
   const size_t same_class_total =
       same_class_direct_ +
       same_class_virtual_ +
@@ -632,7 +641,6 @@
   os << "Invokes from code: " << (same_class_total + other_class_total) << "\n";
   os << "Type uses on top types: " << PercentDivide(uses_top_types_, uses_all_types_) << "\n";
   os << "Type uses 1b savings: " << PercentDivide(uses_top_types_, total_size) << "\n";
-  os << "Total unique types accessed per class " << total_unique_types_ << "\n";
   os << "Total Dex code bytes: " << Percent(dex_code_bytes_, total_size) << "\n";
   os << "Total unique code items: " << total_unique_code_items_ << "\n";
   os << "Total Dex size: " << total_size << "\n";
@@ -682,4 +690,5 @@
   os << "Low arg savings: " << Percent(low_arg_total * 2, total_size) << "\n";
 }
 
+}  // namespace dexanalyze
 }  // namespace art
diff --git a/tools/dexanalyze/dexanalyze_experiments.h b/tools/dexanalyze/dexanalyze_experiments.h
index 0e147dd..468b74b 100644
--- a/tools/dexanalyze/dexanalyze_experiments.h
+++ b/tools/dexanalyze/dexanalyze_experiments.h
@@ -24,11 +24,24 @@
 #include <vector>
 
 #include "base/macros.h"
+#include "dex/dex_instruction.h"
 
 namespace art {
 
 class DexFile;
 
+namespace dexanalyze {
+
+bool IsRange(Instruction::Code code);
+
+uint16_t NumberOfArgs(const Instruction& inst);
+
+uint16_t DexMethodIndex(const Instruction& inst);
+
+std::string PercentDivide(uint64_t value, uint64_t max);
+
+size_t PrefixLen(const std::string& a, const std::string& b);
+
 std::string Percent(uint64_t value, uint64_t max);
 
 // An experiment a stateful visitor that runs on dex files. Results are cumulative.
@@ -38,6 +51,8 @@
   virtual void ProcessDexFiles(const std::vector<std::unique_ptr<const DexFile>>& dex_files);
   virtual void ProcessDexFile(const DexFile&) {}
   virtual void Dump(std::ostream& os, uint64_t total_size) const = 0;
+
+  bool dump_ = false;
 };
 
 // Analyze string data and strings accessed from code.
@@ -167,6 +182,7 @@
   uint64_t move_result_savings_ = 0u;
 };
 
+}  // namespace dexanalyze
 }  // namespace art
 
 #endif  // ART_TOOLS_DEXANALYZE_DEXANALYZE_EXPERIMENTS_H_