Merge "ART: ChildBlockIterator Implementation"
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index 59ed827..7441dac 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -138,7 +138,7 @@
   oatdata_offsets_to_compiled_code_offset_.push_back(offset);
 }
 
-CompiledMethod::CompiledMethod(CompilerDriver& driver,
+CompiledMethod::CompiledMethod(CompilerDriver* driver,
                                InstructionSet instruction_set,
                                const std::vector<uint8_t>& quick_code,
                                const size_t frame_size_in_bytes,
@@ -148,48 +148,48 @@
                                const std::vector<uint8_t>& vmap_table,
                                const std::vector<uint8_t>& native_gc_map,
                                const std::vector<uint8_t>* cfi_info)
-    : CompiledCode(&driver, instruction_set, quick_code), frame_size_in_bytes_(frame_size_in_bytes),
+    : CompiledCode(driver, instruction_set, quick_code), frame_size_in_bytes_(frame_size_in_bytes),
       core_spill_mask_(core_spill_mask), fp_spill_mask_(fp_spill_mask),
-  mapping_table_(driver.DeduplicateMappingTable(mapping_table)),
-  vmap_table_(driver.DeduplicateVMapTable(vmap_table)),
-  gc_map_(driver.DeduplicateGCMap(native_gc_map)),
-  cfi_info_(driver.DeduplicateCFIInfo(cfi_info)) {
+  mapping_table_(driver->DeduplicateMappingTable(mapping_table)),
+  vmap_table_(driver->DeduplicateVMapTable(vmap_table)),
+  gc_map_(driver->DeduplicateGCMap(native_gc_map)),
+  cfi_info_(driver->DeduplicateCFIInfo(cfi_info)) {
 }
 
-CompiledMethod::CompiledMethod(CompilerDriver& driver,
+CompiledMethod::CompiledMethod(CompilerDriver* driver,
                                InstructionSet instruction_set,
                                const std::vector<uint8_t>& code,
                                const size_t frame_size_in_bytes,
                                const uint32_t core_spill_mask,
                                const uint32_t fp_spill_mask)
-    : CompiledCode(&driver, instruction_set, code),
+    : CompiledCode(driver, instruction_set, code),
       frame_size_in_bytes_(frame_size_in_bytes),
       core_spill_mask_(core_spill_mask), fp_spill_mask_(fp_spill_mask),
-      mapping_table_(driver.DeduplicateMappingTable(std::vector<uint8_t>())),
-      vmap_table_(driver.DeduplicateVMapTable(std::vector<uint8_t>())),
-      gc_map_(driver.DeduplicateGCMap(std::vector<uint8_t>())),
+      mapping_table_(driver->DeduplicateMappingTable(std::vector<uint8_t>())),
+      vmap_table_(driver->DeduplicateVMapTable(std::vector<uint8_t>())),
+      gc_map_(driver->DeduplicateGCMap(std::vector<uint8_t>())),
       cfi_info_(nullptr) {
 }
 
 // Constructs a CompiledMethod for the Portable compiler.
-CompiledMethod::CompiledMethod(CompilerDriver& driver, InstructionSet instruction_set,
+CompiledMethod::CompiledMethod(CompilerDriver* driver, InstructionSet instruction_set,
                                const std::string& code, const std::vector<uint8_t>& gc_map,
                                const std::string& symbol)
-    : CompiledCode(&driver, instruction_set, code, symbol),
+    : CompiledCode(driver, instruction_set, code, symbol),
       frame_size_in_bytes_(kStackAlignment), core_spill_mask_(0),
-      fp_spill_mask_(0), gc_map_(driver.DeduplicateGCMap(gc_map)) {
-  mapping_table_ = driver.DeduplicateMappingTable(std::vector<uint8_t>());
-  vmap_table_ = driver.DeduplicateVMapTable(std::vector<uint8_t>());
+      fp_spill_mask_(0), gc_map_(driver->DeduplicateGCMap(gc_map)) {
+  mapping_table_ = driver->DeduplicateMappingTable(std::vector<uint8_t>());
+  vmap_table_ = driver->DeduplicateVMapTable(std::vector<uint8_t>());
 }
 
-CompiledMethod::CompiledMethod(CompilerDriver& driver, InstructionSet instruction_set,
+CompiledMethod::CompiledMethod(CompilerDriver* driver, InstructionSet instruction_set,
                                const std::string& code, const std::string& symbol)
-    : CompiledCode(&driver, instruction_set, code, symbol),
+    : CompiledCode(driver, instruction_set, code, symbol),
       frame_size_in_bytes_(kStackAlignment), core_spill_mask_(0),
       fp_spill_mask_(0) {
-  mapping_table_ = driver.DeduplicateMappingTable(std::vector<uint8_t>());
-  vmap_table_ = driver.DeduplicateVMapTable(std::vector<uint8_t>());
-  gc_map_ = driver.DeduplicateGCMap(std::vector<uint8_t>());
+  mapping_table_ = driver->DeduplicateMappingTable(std::vector<uint8_t>());
+  vmap_table_ = driver->DeduplicateVMapTable(std::vector<uint8_t>());
+  gc_map_ = driver->DeduplicateGCMap(std::vector<uint8_t>());
 }
 
 }  // namespace art
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index 90ae6ee..844b53c 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -102,7 +102,7 @@
 class CompiledMethod : public CompiledCode {
  public:
   // Constructs a CompiledMethod for the non-LLVM compilers.
-  CompiledMethod(CompilerDriver& driver,
+  CompiledMethod(CompilerDriver* driver,
                  InstructionSet instruction_set,
                  const std::vector<uint8_t>& quick_code,
                  const size_t frame_size_in_bytes,
@@ -114,7 +114,7 @@
                  const std::vector<uint8_t>* cfi_info);
 
   // Constructs a CompiledMethod for the QuickJniCompiler.
-  CompiledMethod(CompilerDriver& driver,
+  CompiledMethod(CompilerDriver* driver,
                  InstructionSet instruction_set,
                  const std::vector<uint8_t>& quick_code,
                  const size_t frame_size_in_bytes,
@@ -122,11 +122,11 @@
                  const uint32_t fp_spill_mask);
 
   // Constructs a CompiledMethod for the Portable compiler.
-  CompiledMethod(CompilerDriver& driver, InstructionSet instruction_set, const std::string& code,
+  CompiledMethod(CompilerDriver* driver, InstructionSet instruction_set, const std::string& code,
                  const std::vector<uint8_t>& gc_map, const std::string& symbol);
 
   // Constructs a CompiledMethod for the Portable JniCompiler.
-  CompiledMethod(CompilerDriver& driver, InstructionSet instruction_set, const std::string& code,
+  CompiledMethod(CompilerDriver* driver, InstructionSet instruction_set, const std::string& code,
                  const std::string& symbol);
 
   ~CompiledMethod() {}
diff --git a/compiler/compiler.cc b/compiler/compiler.cc
index c88c38e..a832c31 100644
--- a/compiler/compiler.cc
+++ b/compiler/compiler.cc
@@ -27,8 +27,7 @@
 namespace art {
 
 #ifdef ART_SEA_IR_MODE
-extern "C" art::CompiledMethod* SeaIrCompileMethod(art::CompilerDriver& driver,
-                                                   const art::DexFile::CodeItem* code_item,
+extern "C" art::CompiledMethod* SeaIrCompileMethod(const art::DexFile::CodeItem* code_item,
                                                    uint32_t access_flags,
                                                    art::InvokeType invoke_type,
                                                    uint16_t class_def_idx,
@@ -38,8 +37,7 @@
 #endif
 
 
-CompiledMethod* Compiler::TryCompileWithSeaIR(art::CompilerDriver& driver,
-                                              const art::DexFile::CodeItem* code_item,
+CompiledMethod* Compiler::TryCompileWithSeaIR(const art::DexFile::CodeItem* code_item,
                                               uint32_t access_flags,
                                               art::InvokeType invoke_type,
                                               uint16_t class_def_idx,
@@ -47,13 +45,10 @@
                                               jobject class_loader,
                                               const art::DexFile& dex_file) {
 #ifdef ART_SEA_IR_MODE
-    bool use_sea = Runtime::Current()->IsSeaIRMode();
-    use_sea = use_sea &&
-        (std::string::npos != PrettyMethod(method_idx, dex_file).find("fibonacci"));
+    bool use_sea = (std::string::npos != PrettyMethod(method_idx, dex_file).find("fibonacci"));
     if (use_sea) {
       LOG(INFO) << "Using SEA IR to compile..." << std::endl;
-      return SeaIrCompileMethod(compiler,
-                                code_item,
+      return SeaIrCompileMethod(code_item,
                                 access_flags,
                                 invoke_type,
                                 class_def_idx,
@@ -68,11 +63,11 @@
 
 #ifdef ART_USE_PORTABLE_COMPILER
 
-extern "C" void ArtInitCompilerContext(art::CompilerDriver& driver);
+extern "C" void ArtInitCompilerContext(art::CompilerDriver* driver);
 
-extern "C" void ArtUnInitCompilerContext(art::CompilerDriver& driver);
+extern "C" void ArtUnInitCompilerContext(art::CompilerDriver* driver);
 
-extern "C" art::CompiledMethod* ArtCompileMethod(art::CompilerDriver& driver,
+extern "C" art::CompiledMethod* ArtCompileMethod(art::CompilerDriver* driver,
                                                  const art::DexFile::CodeItem* code_item,
                                                  uint32_t access_flags,
                                                  art::InvokeType invoke_type,
@@ -81,45 +76,45 @@
                                                  jobject class_loader,
                                                  const art::DexFile& dex_file);
 
-extern "C" art::CompiledMethod* ArtLLVMJniCompileMethod(art::CompilerDriver& driver,
+extern "C" art::CompiledMethod* ArtLLVMJniCompileMethod(art::CompilerDriver* driver,
                                                         uint32_t access_flags, uint32_t method_idx,
                                                         const art::DexFile& dex_file);
 
-extern "C" void compilerLLVMSetBitcodeFileName(art::CompilerDriver& driver,
+extern "C" void compilerLLVMSetBitcodeFileName(art::CompilerDriver* driver,
                                                std::string const& filename);
 
 
-class LLVMCompiler : public Compiler {
+class LLVMCompiler FINAL : public Compiler {
  public:
-  LLVMCompiler() : Compiler(1000) {}
+  explicit LLVMCompiler(CompilerDriver* driver) : Compiler(driver, 1000) {}
 
-  void Init(CompilerDriver& driver) const {
-    ArtInitCompilerContext(driver);
+  void Init() const OVERRIDE {
+    ArtInitCompilerContext(GetCompilerDriver());
   }
 
-  void UnInit(CompilerDriver& driver) const {
-    ArtUnInitCompilerContext(driver);
+  void UnInit() const OVERRIDE {
+    ArtUnInitCompilerContext(GetCompilerDriver());
   }
 
-  CompiledMethod* Compile(CompilerDriver& driver,
-                          const DexFile::CodeItem* code_item,
+  CompiledMethod* Compile(const DexFile::CodeItem* code_item,
                           uint32_t access_flags,
                           InvokeType invoke_type,
                           uint16_t class_def_idx,
                           uint32_t method_idx,
                           jobject class_loader,
-                          const DexFile& dex_file) const {
-    CompiledMethod* method = TryCompileWithSeaIR(driver,
-                                                 code_item,
+                          const DexFile& dex_file) const OVERRIDE {
+    CompiledMethod* method = TryCompileWithSeaIR(code_item,
                                                  access_flags,
                                                  invoke_type,
                                                  class_def_idx,
                                                  method_idx,
                                                  class_loader,
                                                  dex_file);
-    if (method != nullptr) return method;
+    if (method != nullptr) {
+      return method;
+    }
 
-    return ArtCompileMethod(compiler,
+    return ArtCompileMethod(GetCompilerDriver(),
                             code_item,
                             access_flags,
                             invoke_type,
@@ -129,11 +124,10 @@
                             dex_file);
   }
 
-  CompiledMethod* JniCompile(CompilerDriver& driver,
-                             uint32_t access_flags,
+  CompiledMethod* JniCompile(uint32_t access_flags,
                              uint32_t method_idx,
-                             const DexFile& dex_file) const {
-    return ArtLLVMJniCompileMethod(driver, access_flags, method_idx, dex_file);
+                             const DexFile& dex_file) const OVERRIDE {
+    return ArtLLVMJniCompileMethod(GetCompilerDriver(), access_flags, method_idx, dex_file);
   }
 
   uintptr_t GetEntryPointOf(mirror::ArtMethod* method) const {
@@ -182,17 +176,17 @@
 };
 #endif
 
-Compiler* Compiler::Create(Compiler::Kind kind) {
+Compiler* Compiler::Create(CompilerDriver* driver, Compiler::Kind kind) {
   switch (kind) {
     case kQuick:
-      return new QuickCompiler();
+      return new QuickCompiler(driver);
       break;
     case kOptimizing:
-      return new OptimizingCompiler();
+      return new OptimizingCompiler(driver);
       break;
     case kPortable:
 #ifdef ART_USE_PORTABLE_COMPILER
-      return new LLVMCompiler();
+      return new LLVMCompiler(driver);
 #else
       LOG(FATAL) << "Portable compiler not compiled";
 #endif
diff --git a/compiler/compiler.h b/compiler/compiler.h
index 2357297..4caebf3 100644
--- a/compiler/compiler.h
+++ b/compiler/compiler.h
@@ -41,18 +41,13 @@
     kPortable
   };
 
-  explicit Compiler(uint64_t warning)
-      : maximum_compilation_time_before_warning_(warning) {
-  }
+  static Compiler* Create(CompilerDriver* driver, Kind kind);
 
-  static Compiler* Create(Kind kind);
+  virtual void Init() const = 0;
 
-  virtual void Init(CompilerDriver& driver) const = 0;
+  virtual void UnInit() const = 0;
 
-  virtual void UnInit(CompilerDriver& driver) const = 0;
-
-  virtual CompiledMethod* Compile(CompilerDriver& driver,
-                                  const DexFile::CodeItem* code_item,
+  virtual CompiledMethod* Compile(const DexFile::CodeItem* code_item,
                                   uint32_t access_flags,
                                   InvokeType invoke_type,
                                   uint16_t class_def_idx,
@@ -60,8 +55,7 @@
                                   jobject class_loader,
                                   const DexFile& dex_file) const = 0;
 
-  static CompiledMethod* TryCompileWithSeaIR(art::CompilerDriver& driver,
-                                             const art::DexFile::CodeItem* code_item,
+  static CompiledMethod* TryCompileWithSeaIR(const art::DexFile::CodeItem* code_item,
                                              uint32_t access_flags,
                                              art::InvokeType invoke_type,
                                              uint16_t class_def_idx,
@@ -69,8 +63,7 @@
                                              jobject class_loader,
                                              const art::DexFile& dex_file);
 
-  virtual CompiledMethod* JniCompile(CompilerDriver& driver,
-                                     uint32_t access_flags,
+  virtual CompiledMethod* JniCompile(uint32_t access_flags,
                                      uint32_t method_idx,
                                      const DexFile& dex_file) const = 0;
 
@@ -81,11 +74,10 @@
                         OatWriter* oat_writer,
                         const std::vector<const art::DexFile*>& dex_files,
                         const std::string& android_root,
-                        bool is_host, const CompilerDriver& driver) const
+                        bool is_host) const
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
 
-  virtual Backend* GetCodeGenerator(CompilationUnit* cu,
-                                    void* compilation_unit) const = 0;
+  virtual Backend* GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) const = 0;
 
   uint64_t GetMaximumCompilationTimeBeforeWarning() const {
     return maximum_compilation_time_before_warning_;
@@ -117,7 +109,17 @@
     return nullptr;
   }
 
+ protected:
+  explicit Compiler(CompilerDriver* driver, uint64_t warning) :
+      driver_(driver), maximum_compilation_time_before_warning_(warning) {
+  }
+
+  CompilerDriver* GetCompilerDriver() const {
+    return driver_;
+  }
+
  private:
+  CompilerDriver* const driver_;
   const uint64_t maximum_compilation_time_before_warning_;
 
   DISALLOW_COPY_AND_ASSIGN(Compiler);
diff --git a/compiler/compilers.cc b/compiler/compilers.cc
index 1237e70..188ce6f 100644
--- a/compiler/compilers.cc
+++ b/compiler/compilers.cc
@@ -22,9 +22,9 @@
 
 namespace art {
 
-extern "C" void ArtInitQuickCompilerContext(art::CompilerDriver& driver);
-extern "C" void ArtUnInitQuickCompilerContext(art::CompilerDriver& driver);
-extern "C" art::CompiledMethod* ArtQuickCompileMethod(art::CompilerDriver& driver,
+extern "C" void ArtInitQuickCompilerContext(art::CompilerDriver* driver);
+extern "C" void ArtUnInitQuickCompilerContext(art::CompilerDriver* driver);
+extern "C" art::CompiledMethod* ArtQuickCompileMethod(art::CompilerDriver* driver,
                                                       const art::DexFile::CodeItem* code_item,
                                                       uint32_t access_flags,
                                                       art::InvokeType invoke_type,
@@ -33,40 +33,40 @@
                                                       jobject class_loader,
                                                       const art::DexFile& dex_file);
 
-extern "C" art::CompiledMethod* ArtQuickJniCompileMethod(art::CompilerDriver& driver,
+extern "C" art::CompiledMethod* ArtQuickJniCompileMethod(art::CompilerDriver* driver,
                                                          uint32_t access_flags, uint32_t method_idx,
                                                          const art::DexFile& dex_file);
 
 // Hack for CFI CIE initialization
 extern std::vector<uint8_t>* X86CFIInitialization();
 
-void QuickCompiler::Init(CompilerDriver& driver) const {
-  ArtInitQuickCompilerContext(driver);
+void QuickCompiler::Init() const {
+  ArtInitQuickCompilerContext(GetCompilerDriver());
 }
 
-void QuickCompiler::UnInit(CompilerDriver& driver) const {
-  ArtUnInitQuickCompilerContext(driver);
+void QuickCompiler::UnInit() const {
+  ArtUnInitQuickCompilerContext(GetCompilerDriver());
 }
 
-CompiledMethod* QuickCompiler::Compile(CompilerDriver& driver,
-                                      const DexFile::CodeItem* code_item,
-                                      uint32_t access_flags,
-                                      InvokeType invoke_type,
-                                      uint16_t class_def_idx,
-                                      uint32_t method_idx,
-                                      jobject class_loader,
-                                      const DexFile& dex_file) const {
-  CompiledMethod* method = TryCompileWithSeaIR(driver,
-                                               code_item,
+CompiledMethod* QuickCompiler::Compile(const DexFile::CodeItem* code_item,
+                                       uint32_t access_flags,
+                                       InvokeType invoke_type,
+                                       uint16_t class_def_idx,
+                                       uint32_t method_idx,
+                                       jobject class_loader,
+                                       const DexFile& dex_file) const {
+  CompiledMethod* method = TryCompileWithSeaIR(code_item,
                                                access_flags,
                                                invoke_type,
                                                class_def_idx,
                                                method_idx,
                                                class_loader,
                                                dex_file);
-  if (method != nullptr) return method;
+  if (method != nullptr) {
+    return method;
+  }
 
-  return ArtQuickCompileMethod(driver,
+  return ArtQuickCompileMethod(GetCompilerDriver(),
                                code_item,
                                access_flags,
                                invoke_type,
@@ -76,11 +76,10 @@
                                dex_file);
 }
 
-CompiledMethod* QuickCompiler::JniCompile(CompilerDriver& driver,
-                                          uint32_t access_flags,
+CompiledMethod* QuickCompiler::JniCompile(uint32_t access_flags,
                                           uint32_t method_idx,
                                           const DexFile& dex_file) const {
-  return ArtQuickJniCompileMethod(driver, access_flags, method_idx, dex_file);
+  return ArtQuickJniCompileMethod(GetCompilerDriver(), access_flags, method_idx, dex_file);
 }
 
 uintptr_t QuickCompiler::GetEntryPointOf(mirror::ArtMethod* method) const {
@@ -88,11 +87,12 @@
 }
 
 bool QuickCompiler::WriteElf(art::File* file,
-                            OatWriter* oat_writer,
-                            const std::vector<const art::DexFile*>& dex_files,
-                            const std::string& android_root,
-                            bool is_host, const CompilerDriver& driver) const {
-  return art::ElfWriterQuick::Create(file, oat_writer, dex_files, android_root, is_host, driver);
+                             OatWriter* oat_writer,
+                             const std::vector<const art::DexFile*>& dex_files,
+                             const std::string& android_root,
+                             bool is_host) const {
+  return art::ElfWriterQuick::Create(file, oat_writer, dex_files, android_root, is_host,
+                                     *GetCompilerDriver());
 }
 
 Backend* QuickCompiler::GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) const {
@@ -134,22 +134,21 @@
   return nullptr;
 }
 
-CompiledMethod* OptimizingCompiler::Compile(CompilerDriver& driver,
-                                            const DexFile::CodeItem* code_item,
+CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
                                             uint32_t access_flags,
                                             InvokeType invoke_type,
                                             uint16_t class_def_idx,
                                             uint32_t method_idx,
                                             jobject class_loader,
                                             const DexFile& dex_file) const {
-  CompiledMethod* method = TryCompile(
-      driver, code_item, access_flags, invoke_type, class_def_idx, method_idx,
-      class_loader, dex_file);
-  if (method != nullptr) return method;
+  CompiledMethod* method = TryCompile(code_item, access_flags, invoke_type, class_def_idx,
+                                      method_idx, class_loader, dex_file);
+  if (method != nullptr) {
+    return method;
+  }
 
-  return QuickCompiler::Compile(
-      driver, code_item, access_flags, invoke_type, class_def_idx, method_idx,
-      class_loader, dex_file);
+  return QuickCompiler::Compile(code_item, access_flags, invoke_type, class_def_idx, method_idx,
+                                class_loader, dex_file);
 }
 
 }  // namespace art
diff --git a/compiler/compilers.h b/compiler/compilers.h
index 255dd23..3ca78c9 100644
--- a/compiler/compilers.h
+++ b/compiler/compilers.h
@@ -23,14 +23,13 @@
 
 class QuickCompiler : public Compiler {
  public:
-  QuickCompiler() : Compiler(100) {}
+  explicit QuickCompiler(CompilerDriver* driver) : Compiler(driver, 100) {}
 
-  void Init(CompilerDriver& driver) const OVERRIDE;
+  void Init() const OVERRIDE;
 
-  void UnInit(CompilerDriver& driver) const OVERRIDE;
+  void UnInit() const OVERRIDE;
 
-  CompiledMethod* Compile(CompilerDriver& driver,
-                          const DexFile::CodeItem* code_item,
+  CompiledMethod* Compile(const DexFile::CodeItem* code_item,
                           uint32_t access_flags,
                           InvokeType invoke_type,
                           uint16_t class_def_idx,
@@ -38,8 +37,7 @@
                           jobject class_loader,
                           const DexFile& dex_file) const OVERRIDE;
 
-  CompiledMethod* JniCompile(CompilerDriver& driver,
-                             uint32_t access_flags,
+  CompiledMethod* JniCompile(uint32_t access_flags,
                              uint32_t method_idx,
                              const DexFile& dex_file) const OVERRIDE;
 
@@ -50,7 +48,7 @@
                 OatWriter* oat_writer,
                 const std::vector<const art::DexFile*>& dex_files,
                 const std::string& android_root,
-                bool is_host, const CompilerDriver& driver) const
+                bool is_host) const
     OVERRIDE
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -73,12 +71,11 @@
   DISALLOW_COPY_AND_ASSIGN(QuickCompiler);
 };
 
-class OptimizingCompiler : public QuickCompiler {
+class OptimizingCompiler FINAL : public QuickCompiler {
  public:
-  OptimizingCompiler() { }
+  explicit OptimizingCompiler(CompilerDriver* driver) : QuickCompiler(driver) { }
 
-  CompiledMethod* Compile(CompilerDriver& driver,
-                          const DexFile::CodeItem* code_item,
+  CompiledMethod* Compile(const DexFile::CodeItem* code_item,
                           uint32_t access_flags,
                           InvokeType invoke_type,
                           uint16_t class_def_idx,
@@ -86,8 +83,7 @@
                           jobject class_loader,
                           const DexFile& dex_file) const OVERRIDE;
 
-  CompiledMethod* TryCompile(CompilerDriver& driver,
-                             const DexFile::CodeItem* code_item,
+  CompiledMethod* TryCompile(const DexFile::CodeItem* code_item,
                              uint32_t access_flags,
                              InvokeType invoke_type,
                              uint16_t class_def_idx,
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 1a9379a..5e13722 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -30,12 +30,12 @@
 
 namespace art {
 
-extern "C" void ArtInitQuickCompilerContext(art::CompilerDriver& driver) {
-  CHECK(driver.GetCompilerContext() == NULL);
+extern "C" void ArtInitQuickCompilerContext(art::CompilerDriver* driver) {
+  CHECK(driver->GetCompilerContext() == nullptr);
 }
 
-extern "C" void ArtUnInitQuickCompilerContext(art::CompilerDriver& driver) {
-  CHECK(driver.GetCompilerContext() == NULL);
+extern "C" void ArtUnInitQuickCompilerContext(art::CompilerDriver* driver) {
+  CHECK(driver->GetCompilerContext() == nullptr);
 }
 
 /* Default optimizer/debug setting for the compiler. */
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index 2f17e08..ed7e1f5 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -925,11 +925,17 @@
 int MIRGraph::AddNewSReg(int v_reg) {
   // Compiler temps always have a subscript of 0
   int subscript = (v_reg < 0) ? 0 : ++ssa_last_defs_[v_reg];
-  int ssa_reg = GetNumSSARegs();
+  uint32_t ssa_reg = GetNumSSARegs();
   SetNumSSARegs(ssa_reg + 1);
   ssa_base_vregs_->Insert(v_reg);
   ssa_subscripts_->Insert(subscript);
   DCHECK_EQ(ssa_base_vregs_->Size(), ssa_subscripts_->Size());
+  // If we are expanding very late, update use counts too.
+  if (ssa_reg > 0 && use_counts_.Size() == ssa_reg) {
+    // Need to expand the counts.
+    use_counts_.Insert(0);
+    raw_use_counts_.Insert(0);
+  }
   return ssa_reg;
 }
 
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index 5c1bdf4..5cc994f 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -286,10 +286,6 @@
         reg_location_[ssa_reg_high].high_word = 1;
         reg_location_[ssa_reg_high].s_reg_low = ssa_reg_low;
         reg_location_[ssa_reg_high].wide = true;
-
-        // A new SSA needs new use counts.
-        use_counts_.Insert(0);
-        raw_use_counts_.Insert(0);
       }
 
       num_non_special_compiler_temps_++;
@@ -302,10 +298,6 @@
     reg_location_[ssa_reg_low] = temp_loc;
     reg_location_[ssa_reg_low].s_reg_low = ssa_reg_low;
     reg_location_[ssa_reg_low].wide = wide;
-
-    // A new SSA needs new use counts.
-    use_counts_.Insert(0);
-    raw_use_counts_.Insert(0);
   }
 
   compiler_temps_.Insert(compiler_temp);
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index cac766d..a895e6e 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -1213,7 +1213,7 @@
   cu_->NewTimingSplit("Assemble");
   int assembler_retries = 0;
   CodeOffset starting_offset = LinkFixupInsns(first_lir_insn_, last_lir_insn_, 0);
-  data_offset_ = (starting_offset + 0x3) & ~0x3;
+  data_offset_ = RoundUp(starting_offset, 4);
   int32_t offset_adjustment;
   AssignDataOffsets();
 
@@ -1596,7 +1596,7 @@
         LOG(FATAL) << "Assembler error - too many retries";
       }
       starting_offset += offset_adjustment;
-      data_offset_ = (starting_offset + 0x3) & ~0x3;
+      data_offset_ = RoundUp(starting_offset, 4);
       AssignDataOffsets();
     }
   }
@@ -1609,7 +1609,7 @@
   write_pos = EncodeLIRs(write_pos, first_lir_insn_);
   DCHECK_EQ(static_cast<CodeOffset>(write_pos - &code_buffer_[0]), starting_offset);
 
-  DCHECK_EQ(data_offset_, (code_buffer_.size() + 0x3) & ~0x3);
+  DCHECK_EQ(data_offset_, RoundUp(code_buffer_.size(), 4));
 
   // Install literals
   InstallLiteralPools();
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 163c0fe..d3477c9 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -360,6 +360,22 @@
     if (Runtime::Current()->ExplicitStackOverflowChecks()) {
       /* Load stack limit */
       Load32Disp(rs_rARM_SELF, Thread::StackEndOffset<4>().Int32Value(), rs_r12);
+    } else {
+      // Implicit stack overflow check.
+      // Generate a load from [sp, #-overflowsize].  If this is in the stack
+      // redzone we will get a segmentation fault.
+      //
+      // Caveat coder: if someone changes the kStackOverflowReservedBytes value
+      // we need to make sure that it's loadable in an immediate field of
+      // a sub instruction.  Otherwise we will get a temp allocation and the
+      // code size will increase.
+      //
+      // This is done before the callee save instructions to avoid any possibility
+      // of these overflowing.  This uses r12 and that's never saved in a callee
+      // save.
+      OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, Thread::kStackOverflowReservedBytes);
+      Load32Disp(rs_r12, 0, rs_r12);
+      MarkPossibleStackOverflowException();
     }
   }
   /* Spill core callee saves */
@@ -418,17 +434,8 @@
         AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, false, frame_size_));
       }
     } else {
-      // Implicit stack overflow check.
-      // Generate a load from [sp, #-overflowsize].  If this is in the stack
-      // redzone we will get a segmentation fault.
-      //
-      // Caveat coder: if someone changes the kStackOverflowReservedBytes value
-      // we need to make sure that it's loadable in an immediate field of
-      // a sub instruction.  Otherwise we will get a temp allocation and the
-      // code size will increase.
-      OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, Thread::kStackOverflowReservedBytes);
-      Load32Disp(rs_r12, 0, rs_r12);
-      MarkPossibleStackOverflowException();
+      // Implicit stack overflow check has already been done.  Just make room on the
+      // stack for the frame now.
       OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
     }
   } else {
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
new file mode 100644
index 0000000..c6d6295
--- /dev/null
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -0,0 +1,590 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_QUICK_ARM64_ARM64_LIR_H_
+#define ART_COMPILER_DEX_QUICK_ARM64_ARM64_LIR_H_
+
+#include "dex/compiler_internals.h"
+
+namespace art {
+
+/*
+ * Runtime register usage conventions.
+ *
+ * r0-r3: Argument registers in both Dalvik and C/C++ conventions.
+ *        However, for Dalvik->Dalvik calls we'll pass the target's Method*
+ *        pointer in r0 as a hidden arg0. Otherwise used as codegen scratch
+ *        registers.
+ * r0-r1: As in C/C++ r0 is 32-bit return register and r0/r1 is 64-bit
+ * r4   : (rARM_SUSPEND) is reserved (suspend check/debugger assist)
+ * r5   : Callee save (promotion target)
+ * r6   : Callee save (promotion target)
+ * r7   : Callee save (promotion target)
+ * r8   : Callee save (promotion target)
+ * r9   : (rARM_SELF) is reserved (pointer to thread-local storage)
+ * r10  : Callee save (promotion target)
+ * r11  : Callee save (promotion target)
+ * r12  : Scratch, may be trashed by linkage stubs
+ * r13  : (sp) is reserved
+ * r14  : (lr) is reserved
+ * r15  : (pc) is reserved
+ *
+ * 5 core temps that codegen can use (r0, r1, r2, r3, r12)
+ * 7 core registers that can be used for promotion
+ *
+ * Floating pointer registers
+ * s0-s31
+ * d0-d15, where d0={s0,s1}, d1={s2,s3}, ... , d15={s30,s31}
+ *
+ * s16-s31 (d8-d15) preserved across C calls
+ * s0-s15 (d0-d7) trashed across C calls
+ *
+ * s0-s15/d0-d7 used as codegen temp/scratch
+ * s16-s31/d8-d31 can be used for promotion.
+ *
+ * Calling convention
+ *     o On a call to a Dalvik method, pass target's Method* in r0
+ *     o r1-r3 will be used for up to the first 3 words of arguments
+ *     o Arguments past the first 3 words will be placed in appropriate
+ *       out slots by the caller.
+ *     o If a 64-bit argument would span the register/memory argument
+ *       boundary, it will instead be fully passed in the frame.
+ *     o Maintain a 16-byte stack alignment
+ *
+ *  Stack frame diagram (stack grows down, higher addresses at top):
+ *
+ * +------------------------+
+ * | IN[ins-1]              |  {Note: resides in caller's frame}
+ * |       .                |
+ * | IN[0]                  |
+ * | caller's Method*       |
+ * +========================+  {Note: start of callee's frame}
+ * | spill region           |  {variable sized - will include lr if non-leaf.}
+ * +------------------------+
+ * | ...filler word...      |  {Note: used as 2nd word of V[locals-1] if long]
+ * +------------------------+
+ * | V[locals-1]            |
+ * | V[locals-2]            |
+ * |      .                 |
+ * |      .                 |
+ * | V[1]                   |
+ * | V[0]                   |
+ * +------------------------+
+ * |  0 to 3 words padding  |
+ * +------------------------+
+ * | OUT[outs-1]            |
+ * | OUT[outs-2]            |
+ * |       .                |
+ * | OUT[0]                 |
+ * | cur_method*            | <<== sp w/ 16-byte alignment
+ * +========================+
+ */
+
+// First FP callee save.
+#define ARM_FP_CALLEE_SAVE_BASE 16
+
+enum ArmResourceEncodingPos {
+  kArmGPReg0   = 0,
+  kArmRegSP    = 13,
+  kArmRegLR    = 14,
+  kArmRegPC    = 15,
+  kArmFPReg0   = 16,
+  kArmFPReg16  = 32,
+  kArmRegEnd   = 48,
+};
+
+#define ENCODE_ARM_REG_LIST(N)      (static_cast<uint64_t>(N))
+#define ENCODE_ARM_REG_SP           (1ULL << kArmRegSP)
+#define ENCODE_ARM_REG_LR           (1ULL << kArmRegLR)
+#define ENCODE_ARM_REG_PC           (1ULL << kArmRegPC)
+#define ENCODE_ARM_REG_FPCS_LIST(N) (static_cast<uint64_t>(N) << kArmFPReg16)
+
+enum ArmNativeRegisterPool {
+  r0           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  0,
+  r1           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  1,
+  r2           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  2,
+  r3           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  3,
+  rARM_SUSPEND = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  4,
+  r5           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  5,
+  r6           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  6,
+  r7           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  7,
+  r8           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  8,
+  rARM_SELF    = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  9,
+  r10          = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 10,
+  r11          = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 11,
+  r12          = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 12,
+  r13sp        = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 13,
+  rARM_SP      = r13sp,
+  r14lr        = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 14,
+  rARM_LR      = r14lr,
+  r15pc        = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 15,
+  rARM_PC      = r15pc,
+
+  fr0          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  0,
+  fr1          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  1,
+  fr2          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  2,
+  fr3          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  3,
+  fr4          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  4,
+  fr5          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  5,
+  fr6          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  6,
+  fr7          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  7,
+  fr8          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  8,
+  fr9          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  9,
+  fr10         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 10,
+  fr11         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 11,
+  fr12         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 12,
+  fr13         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 13,
+  fr14         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 14,
+  fr15         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 15,
+  fr16         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 16,
+  fr17         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 17,
+  fr18         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 18,
+  fr19         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 19,
+  fr20         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 20,
+  fr21         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 21,
+  fr22         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 22,
+  fr23         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 23,
+  fr24         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 24,
+  fr25         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 25,
+  fr26         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 26,
+  fr27         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 27,
+  fr28         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 28,
+  fr29         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 29,
+  fr30         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 30,
+  fr31         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 31,
+
+  dr0          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  0,
+  dr1          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  1,
+  dr2          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  2,
+  dr3          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  3,
+  dr4          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  4,
+  dr5          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  5,
+  dr6          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  6,
+  dr7          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  7,
+  dr8          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  8,
+  dr9          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  9,
+  dr10         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 10,
+  dr11         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 11,
+  dr12         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 12,
+  dr13         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 13,
+  dr14         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 14,
+  dr15         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 15,
+#if 0
+  // Enable when def/use and runtime able to handle these.
+  dr16         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 16,
+  dr17         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 17,
+  dr18         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 18,
+  dr19         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 19,
+  dr20         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 20,
+  dr21         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 21,
+  dr22         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 22,
+  dr23         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 23,
+  dr24         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 24,
+  dr25         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 25,
+  dr26         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 26,
+  dr27         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 27,
+  dr28         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 28,
+  dr29         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 29,
+  dr30         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 30,
+  dr31         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 31,
+#endif
+};
+
+constexpr RegStorage rs_r0(RegStorage::kValid | r0);
+constexpr RegStorage rs_r1(RegStorage::kValid | r1);
+constexpr RegStorage rs_r2(RegStorage::kValid | r2);
+constexpr RegStorage rs_r3(RegStorage::kValid | r3);
+constexpr RegStorage rs_rARM_SUSPEND(RegStorage::kValid | rARM_SUSPEND);
+constexpr RegStorage rs_r5(RegStorage::kValid | r5);
+constexpr RegStorage rs_r6(RegStorage::kValid | r6);
+constexpr RegStorage rs_r7(RegStorage::kValid | r7);
+constexpr RegStorage rs_r8(RegStorage::kValid | r8);
+constexpr RegStorage rs_rARM_SELF(RegStorage::kValid | rARM_SELF);
+constexpr RegStorage rs_r10(RegStorage::kValid | r10);
+constexpr RegStorage rs_r11(RegStorage::kValid | r11);
+constexpr RegStorage rs_r12(RegStorage::kValid | r12);
+constexpr RegStorage rs_r13sp(RegStorage::kValid | r13sp);
+constexpr RegStorage rs_rARM_SP(RegStorage::kValid | rARM_SP);
+constexpr RegStorage rs_r14lr(RegStorage::kValid | r14lr);
+constexpr RegStorage rs_rARM_LR(RegStorage::kValid | rARM_LR);
+constexpr RegStorage rs_r15pc(RegStorage::kValid | r15pc);
+constexpr RegStorage rs_rARM_PC(RegStorage::kValid | rARM_PC);
+constexpr RegStorage rs_invalid(RegStorage::kInvalid);
+
+constexpr RegStorage rs_fr0(RegStorage::kValid | fr0);
+constexpr RegStorage rs_fr1(RegStorage::kValid | fr1);
+constexpr RegStorage rs_fr2(RegStorage::kValid | fr2);
+constexpr RegStorage rs_fr3(RegStorage::kValid | fr3);
+constexpr RegStorage rs_fr4(RegStorage::kValid | fr4);
+constexpr RegStorage rs_fr5(RegStorage::kValid | fr5);
+constexpr RegStorage rs_fr6(RegStorage::kValid | fr6);
+constexpr RegStorage rs_fr7(RegStorage::kValid | fr7);
+constexpr RegStorage rs_fr8(RegStorage::kValid | fr8);
+constexpr RegStorage rs_fr9(RegStorage::kValid | fr9);
+constexpr RegStorage rs_fr10(RegStorage::kValid | fr10);
+constexpr RegStorage rs_fr11(RegStorage::kValid | fr11);
+constexpr RegStorage rs_fr12(RegStorage::kValid | fr12);
+constexpr RegStorage rs_fr13(RegStorage::kValid | fr13);
+constexpr RegStorage rs_fr14(RegStorage::kValid | fr14);
+constexpr RegStorage rs_fr15(RegStorage::kValid | fr15);
+constexpr RegStorage rs_fr16(RegStorage::kValid | fr16);
+constexpr RegStorage rs_fr17(RegStorage::kValid | fr17);
+constexpr RegStorage rs_fr18(RegStorage::kValid | fr18);
+constexpr RegStorage rs_fr19(RegStorage::kValid | fr19);
+constexpr RegStorage rs_fr20(RegStorage::kValid | fr20);
+constexpr RegStorage rs_fr21(RegStorage::kValid | fr21);
+constexpr RegStorage rs_fr22(RegStorage::kValid | fr22);
+constexpr RegStorage rs_fr23(RegStorage::kValid | fr23);
+constexpr RegStorage rs_fr24(RegStorage::kValid | fr24);
+constexpr RegStorage rs_fr25(RegStorage::kValid | fr25);
+constexpr RegStorage rs_fr26(RegStorage::kValid | fr26);
+constexpr RegStorage rs_fr27(RegStorage::kValid | fr27);
+constexpr RegStorage rs_fr28(RegStorage::kValid | fr28);
+constexpr RegStorage rs_fr29(RegStorage::kValid | fr29);
+constexpr RegStorage rs_fr30(RegStorage::kValid | fr30);
+constexpr RegStorage rs_fr31(RegStorage::kValid | fr31);
+
+constexpr RegStorage rs_dr0(RegStorage::kValid | dr0);
+constexpr RegStorage rs_dr1(RegStorage::kValid | dr1);
+constexpr RegStorage rs_dr2(RegStorage::kValid | dr2);
+constexpr RegStorage rs_dr3(RegStorage::kValid | dr3);
+constexpr RegStorage rs_dr4(RegStorage::kValid | dr4);
+constexpr RegStorage rs_dr5(RegStorage::kValid | dr5);
+constexpr RegStorage rs_dr6(RegStorage::kValid | dr6);
+constexpr RegStorage rs_dr7(RegStorage::kValid | dr7);
+constexpr RegStorage rs_dr8(RegStorage::kValid | dr8);
+constexpr RegStorage rs_dr9(RegStorage::kValid | dr9);
+constexpr RegStorage rs_dr10(RegStorage::kValid | dr10);
+constexpr RegStorage rs_dr11(RegStorage::kValid | dr11);
+constexpr RegStorage rs_dr12(RegStorage::kValid | dr12);
+constexpr RegStorage rs_dr13(RegStorage::kValid | dr13);
+constexpr RegStorage rs_dr14(RegStorage::kValid | dr14);
+constexpr RegStorage rs_dr15(RegStorage::kValid | dr15);
+#if 0
+constexpr RegStorage rs_dr16(RegStorage::kValid | dr16);
+constexpr RegStorage rs_dr17(RegStorage::kValid | dr17);
+constexpr RegStorage rs_dr18(RegStorage::kValid | dr18);
+constexpr RegStorage rs_dr19(RegStorage::kValid | dr19);
+constexpr RegStorage rs_dr20(RegStorage::kValid | dr20);
+constexpr RegStorage rs_dr21(RegStorage::kValid | dr21);
+constexpr RegStorage rs_dr22(RegStorage::kValid | dr22);
+constexpr RegStorage rs_dr23(RegStorage::kValid | dr23);
+constexpr RegStorage rs_dr24(RegStorage::kValid | dr24);
+constexpr RegStorage rs_dr25(RegStorage::kValid | dr25);
+constexpr RegStorage rs_dr26(RegStorage::kValid | dr26);
+constexpr RegStorage rs_dr27(RegStorage::kValid | dr27);
+constexpr RegStorage rs_dr28(RegStorage::kValid | dr28);
+constexpr RegStorage rs_dr29(RegStorage::kValid | dr29);
+constexpr RegStorage rs_dr30(RegStorage::kValid | dr30);
+constexpr RegStorage rs_dr31(RegStorage::kValid | dr31);
+#endif
+
+// RegisterLocation templates return values (r0, or r0/r1).
+const RegLocation arm_loc_c_return
+    {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1,
+     RegStorage(RegStorage::k32BitSolo, r0), INVALID_SREG, INVALID_SREG};
+const RegLocation arm_loc_c_return_wide
+    {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
+     RegStorage(RegStorage::k64BitPair, r0, r1), INVALID_SREG, INVALID_SREG};
+const RegLocation arm_loc_c_return_float
+    {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1,
+     RegStorage(RegStorage::k32BitSolo, r0), INVALID_SREG, INVALID_SREG};
+const RegLocation arm_loc_c_return_double
+    {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
+     RegStorage(RegStorage::k64BitPair, r0, r1), INVALID_SREG, INVALID_SREG};
+
+enum ArmShiftEncodings {
+  kArmLsl = 0x0,
+  kArmLsr = 0x1,
+  kArmAsr = 0x2,
+  kArmRor = 0x3
+};
+
+/*
+ * The following enum defines the list of supported Thumb instructions by the
+ * assembler. Their corresponding EncodingMap positions will be defined in
+ * Assemble.cc.
+ */
+enum ArmOpcode {
+  kArmFirst = 0,
+  kArm16BitData = kArmFirst,  // DATA   [0] rd[15..0].
+  kThumbAdcRR,       // adc   [0100000101] rm[5..3] rd[2..0].
+  kThumbAddRRI3,     // add(1)  [0001110] imm_3[8..6] rn[5..3] rd[2..0].
+  kThumbAddRI8,      // add(2)  [00110] rd[10..8] imm_8[7..0].
+  kThumbAddRRR,      // add(3)  [0001100] rm[8..6] rn[5..3] rd[2..0].
+  kThumbAddRRLH,     // add(4)  [01000100] H12[01] rm[5..3] rd[2..0].
+  kThumbAddRRHL,     // add(4)  [01001000] H12[10] rm[5..3] rd[2..0].
+  kThumbAddRRHH,     // add(4)  [01001100] H12[11] rm[5..3] rd[2..0].
+  kThumbAddPcRel,    // add(5)  [10100] rd[10..8] imm_8[7..0].
+  kThumbAddSpRel,    // add(6)  [10101] rd[10..8] imm_8[7..0].
+  kThumbAddSpI7,     // add(7)  [101100000] imm_7[6..0].
+  kThumbAndRR,       // and   [0100000000] rm[5..3] rd[2..0].
+  kThumbAsrRRI5,     // asr(1)  [00010] imm_5[10..6] rm[5..3] rd[2..0].
+  kThumbAsrRR,       // asr(2)  [0100000100] rs[5..3] rd[2..0].
+  kThumbBCond,       // b(1)  [1101] cond[11..8] offset_8[7..0].
+  kThumbBUncond,     // b(2)  [11100] offset_11[10..0].
+  kThumbBicRR,       // bic   [0100001110] rm[5..3] rd[2..0].
+  kThumbBkpt,        // bkpt  [10111110] imm_8[7..0].
+  kThumbBlx1,        // blx(1)  [111] H[10] offset_11[10..0].
+  kThumbBlx2,        // blx(1)  [111] H[01] offset_11[10..0].
+  kThumbBl1,         // blx(1)  [111] H[10] offset_11[10..0].
+  kThumbBl2,         // blx(1)  [111] H[11] offset_11[10..0].
+  kThumbBlxR,        // blx(2)  [010001111] rm[6..3] [000].
+  kThumbBx,          // bx    [010001110] H2[6..6] rm[5..3] SBZ[000].
+  kThumbCmnRR,       // cmn   [0100001011] rm[5..3] rd[2..0].
+  kThumbCmpRI8,      // cmp(1)  [00101] rn[10..8] imm_8[7..0].
+  kThumbCmpRR,       // cmp(2)  [0100001010] rm[5..3] rd[2..0].
+  kThumbCmpLH,       // cmp(3)  [01000101] H12[01] rm[5..3] rd[2..0].
+  kThumbCmpHL,       // cmp(3)  [01000110] H12[10] rm[5..3] rd[2..0].
+  kThumbCmpHH,       // cmp(3)  [01000111] H12[11] rm[5..3] rd[2..0].
+  kThumbEorRR,       // eor   [0100000001] rm[5..3] rd[2..0].
+  kThumbLdmia,       // ldmia   [11001] rn[10..8] reglist [7..0].
+  kThumbLdrRRI5,     // ldr(1)  [01101] imm_5[10..6] rn[5..3] rd[2..0].
+  kThumbLdrRRR,      // ldr(2)  [0101100] rm[8..6] rn[5..3] rd[2..0].
+  kThumbLdrPcRel,    // ldr(3)  [01001] rd[10..8] imm_8[7..0].
+  kThumbLdrSpRel,    // ldr(4)  [10011] rd[10..8] imm_8[7..0].
+  kThumbLdrbRRI5,    // ldrb(1) [01111] imm_5[10..6] rn[5..3] rd[2..0].
+  kThumbLdrbRRR,     // ldrb(2) [0101110] rm[8..6] rn[5..3] rd[2..0].
+  kThumbLdrhRRI5,    // ldrh(1) [10001] imm_5[10..6] rn[5..3] rd[2..0].
+  kThumbLdrhRRR,     // ldrh(2) [0101101] rm[8..6] rn[5..3] rd[2..0].
+  kThumbLdrsbRRR,    // ldrsb   [0101011] rm[8..6] rn[5..3] rd[2..0].
+  kThumbLdrshRRR,    // ldrsh   [0101111] rm[8..6] rn[5..3] rd[2..0].
+  kThumbLslRRI5,     // lsl(1)  [00000] imm_5[10..6] rm[5..3] rd[2..0].
+  kThumbLslRR,       // lsl(2)  [0100000010] rs[5..3] rd[2..0].
+  kThumbLsrRRI5,     // lsr(1)  [00001] imm_5[10..6] rm[5..3] rd[2..0].
+  kThumbLsrRR,       // lsr(2)  [0100000011] rs[5..3] rd[2..0].
+  kThumbMovImm,      // mov(1)  [00100] rd[10..8] imm_8[7..0].
+  kThumbMovRR,       // mov(2)  [0001110000] rn[5..3] rd[2..0].
+  kThumbMovRR_H2H,   // mov(3)  [01000111] H12[11] rm[5..3] rd[2..0].
+  kThumbMovRR_H2L,   // mov(3)  [01000110] H12[01] rm[5..3] rd[2..0].
+  kThumbMovRR_L2H,   // mov(3)  [01000101] H12[10] rm[5..3] rd[2..0].
+  kThumbMul,         // mul   [0100001101] rm[5..3] rd[2..0].
+  kThumbMvn,         // mvn   [0100001111] rm[5..3] rd[2..0].
+  kThumbNeg,         // neg   [0100001001] rm[5..3] rd[2..0].
+  kThumbOrr,         // orr   [0100001100] rm[5..3] rd[2..0].
+  kThumbPop,         // pop   [1011110] r[8..8] rl[7..0].
+  kThumbPush,        // push  [1011010] r[8..8] rl[7..0].
+  kThumbRev,         // rev   [1011101000] rm[5..3] rd[2..0]
+  kThumbRevsh,       // revsh   [1011101011] rm[5..3] rd[2..0]
+  kThumbRorRR,       // ror   [0100000111] rs[5..3] rd[2..0].
+  kThumbSbc,         // sbc   [0100000110] rm[5..3] rd[2..0].
+  kThumbStmia,       // stmia   [11000] rn[10..8] reglist [7.. 0].
+  kThumbStrRRI5,     // str(1)  [01100] imm_5[10..6] rn[5..3] rd[2..0].
+  kThumbStrRRR,      // str(2)  [0101000] rm[8..6] rn[5..3] rd[2..0].
+  kThumbStrSpRel,    // str(3)  [10010] rd[10..8] imm_8[7..0].
+  kThumbStrbRRI5,    // strb(1) [01110] imm_5[10..6] rn[5..3] rd[2..0].
+  kThumbStrbRRR,     // strb(2) [0101010] rm[8..6] rn[5..3] rd[2..0].
+  kThumbStrhRRI5,    // strh(1) [10000] imm_5[10..6] rn[5..3] rd[2..0].
+  kThumbStrhRRR,     // strh(2) [0101001] rm[8..6] rn[5..3] rd[2..0].
+  kThumbSubRRI3,     // sub(1)  [0001111] imm_3[8..6] rn[5..3] rd[2..0]*/
+  kThumbSubRI8,      // sub(2)  [00111] rd[10..8] imm_8[7..0].
+  kThumbSubRRR,      // sub(3)  [0001101] rm[8..6] rn[5..3] rd[2..0].
+  kThumbSubSpI7,     // sub(4)  [101100001] imm_7[6..0].
+  kThumbSwi,         // swi   [11011111] imm_8[7..0].
+  kThumbTst,         // tst   [0100001000] rm[5..3] rn[2..0].
+  kThumb2Vldrs,      // vldr low  sx [111011011001] rn[19..16] rd[15-12] [1010] imm_8[7..0].
+  kThumb2Vldrd,      // vldr low  dx [111011011001] rn[19..16] rd[15-12] [1011] imm_8[7..0].
+  kThumb2Vmuls,      // vmul vd, vn, vm [111011100010] rn[19..16] rd[15-12] [10100000] rm[3..0].
+  kThumb2Vmuld,      // vmul vd, vn, vm [111011100010] rn[19..16] rd[15-12] [10110000] rm[3..0].
+  kThumb2Vstrs,      // vstr low  sx [111011011000] rn[19..16] rd[15-12] [1010] imm_8[7..0].
+  kThumb2Vstrd,      // vstr low  dx [111011011000] rn[19..16] rd[15-12] [1011] imm_8[7..0].
+  kThumb2Vsubs,      // vsub vd, vn, vm [111011100011] rn[19..16] rd[15-12] [10100040] rm[3..0].
+  kThumb2Vsubd,      // vsub vd, vn, vm [111011100011] rn[19..16] rd[15-12] [10110040] rm[3..0].
+  kThumb2Vadds,      // vadd vd, vn, vm [111011100011] rn[19..16] rd[15-12] [10100000] rm[3..0].
+  kThumb2Vaddd,      // vadd vd, vn, vm [111011100011] rn[19..16] rd[15-12] [10110000] rm[3..0].
+  kThumb2Vdivs,      // vdiv vd, vn, vm [111011101000] rn[19..16] rd[15-12] [10100000] rm[3..0].
+  kThumb2Vdivd,      // vdiv vd, vn, vm [111011101000] rn[19..16] rd[15-12] [10110000] rm[3..0].
+  kThumb2VmlaF64,    // vmla.F64 vd, vn, vm [111011100000] vn[19..16] vd[15..12] [10110000] vm[3..0].
+  kThumb2VcvtIF,     // vcvt.F32.S32 vd, vm [1110111010111000] vd[15..12] [10101100] vm[3..0].
+  kThumb2VcvtFI,     // vcvt.S32.F32 vd, vm [1110111010111101] vd[15..12] [10101100] vm[3..0].
+  kThumb2VcvtDI,     // vcvt.S32.F32 vd, vm [1110111010111101] vd[15..12] [10111100] vm[3..0].
+  kThumb2VcvtFd,     // vcvt.F64.F32 vd, vm [1110111010110111] vd[15..12] [10101100] vm[3..0].
+  kThumb2VcvtDF,     // vcvt.F32.F64 vd, vm [1110111010110111] vd[15..12] [10111100] vm[3..0].
+  kThumb2VcvtF64S32,  // vcvt.F64.S32 vd, vm [1110111010111000] vd[15..12] [10111100] vm[3..0].
+  kThumb2VcvtF64U32,  // vcvt.F64.U32 vd, vm [1110111010111000] vd[15..12] [10110100] vm[3..0].
+  kThumb2Vsqrts,     // vsqrt.f32 vd, vm [1110111010110001] vd[15..12] [10101100] vm[3..0].
+  kThumb2Vsqrtd,     // vsqrt.f64 vd, vm [1110111010110001] vd[15..12] [10111100] vm[3..0].
+  kThumb2MovI8M,     // mov(T2) rd, #<const> [11110] i [00001001111] imm3 rd[11..8] imm8.
+  kThumb2MovImm16,   // mov(T3) rd, #<const> [11110] i [0010100] imm4 [0] imm3 rd[11..8] imm8.
+  kThumb2StrRRI12,   // str(Imm,T3) rd,[rn,#imm12] [111110001100] rn[19..16] rt[15..12] imm12[11..0].
+  kThumb2LdrRRI12,   // str(Imm,T3) rd,[rn,#imm12] [111110001100] rn[19..16] rt[15..12] imm12[11..0].
+  kThumb2StrRRI8Predec,  // str(Imm,T4) rd,[rn,#-imm8] [111110000100] rn[19..16] rt[15..12] [1100] imm[7..0].
+  kThumb2LdrRRI8Predec,  // ldr(Imm,T4) rd,[rn,#-imm8] [111110000101] rn[19..16] rt[15..12] [1100] imm[7..0].
+  kThumb2Cbnz,       // cbnz rd,<label> [101110] i [1] imm5[7..3] rn[2..0].
+  kThumb2Cbz,        // cbn rd,<label> [101100] i [1] imm5[7..3] rn[2..0].
+  kThumb2AddRRI12,   // add rd, rn, #imm12 [11110] i [100000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2MovRR,      // mov rd, rm [11101010010011110000] rd[11..8] [0000] rm[3..0].
+  kThumb2Vmovs,      // vmov.f32 vd, vm [111011101] D [110000] vd[15..12] 101001] M [0] vm[3..0].
+  kThumb2Vmovd,      // vmov.f64 vd, vm [111011101] D [110000] vd[15..12] 101101] M [0] vm[3..0].
+  kThumb2Ldmia,      // ldmia  [111010001001] rn[19..16] mask[15..0].
+  kThumb2Stmia,      // stmia  [111010001000] rn[19..16] mask[15..0].
+  kThumb2AddRRR,     // add [111010110000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
+  kThumb2SubRRR,     // sub [111010111010] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
+  kThumb2SbcRRR,     // sbc [111010110110] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
+  kThumb2CmpRR,      // cmp [111010111011] rn[19..16] [0000] [1111] [0000] rm[3..0].
+  kThumb2SubRRI12,   // sub rd, rn, #imm12 [11110] i [101010] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2MvnI8M,     // mov(T2) rd, #<const> [11110] i [00011011110] imm3 rd[11..8] imm8.
+  kThumb2Sel,        // sel rd, rn, rm [111110101010] rn[19-16] rd[11-8] rm[3-0].
+  kThumb2Ubfx,       // ubfx rd,rn,#lsb,#width [111100111100] rn[19..16] [0] imm3[14-12] rd[11-8] w[4-0].
+  kThumb2Sbfx,       // ubfx rd,rn,#lsb,#width [111100110100] rn[19..16] [0] imm3[14-12] rd[11-8] w[4-0].
+  kThumb2LdrRRR,     // ldr rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0].
+  kThumb2LdrhRRR,    // ldrh rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0].
+  kThumb2LdrshRRR,   // ldrsh rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0].
+  kThumb2LdrbRRR,    // ldrb rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0].
+  kThumb2LdrsbRRR,   // ldrsb rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0].
+  kThumb2StrRRR,     // str rt,[rn,rm,LSL #imm] [111110000100] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0].
+  kThumb2StrhRRR,    // str rt,[rn,rm,LSL #imm] [111110000010] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0].
+  kThumb2StrbRRR,    // str rt,[rn,rm,LSL #imm] [111110000000] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0].
+  kThumb2LdrhRRI12,  // ldrh rt,[rn,#imm12] [111110001011] rt[15..12] rn[19..16] imm12[11..0].
+  kThumb2LdrshRRI12,  // ldrsh rt,[rn,#imm12] [111110011011] rt[15..12] rn[19..16] imm12[11..0].
+  kThumb2LdrbRRI12,  // ldrb rt,[rn,#imm12] [111110001001] rt[15..12] rn[19..16] imm12[11..0].
+  kThumb2LdrsbRRI12,  // ldrsb rt,[rn,#imm12] [111110011001] rt[15..12] rn[19..16] imm12[11..0].
+  kThumb2StrhRRI12,  // strh rt,[rn,#imm12] [111110001010] rt[15..12] rn[19..16] imm12[11..0].
+  kThumb2StrbRRI12,  // strb rt,[rn,#imm12] [111110001000] rt[15..12] rn[19..16] imm12[11..0].
+  kThumb2Pop,        // pop   [1110100010111101] list[15-0]*/
+  kThumb2Push,       // push  [1110100100101101] list[15-0]*/
+  kThumb2CmpRI8M,    // cmp rn, #<const> [11110] i [011011] rn[19-16] [0] imm3 [1111] imm8[7..0].
+  kThumb2CmnRI8M,    // cmn rn, #<const> [11110] i [010001] rn[19-16] [0] imm3 [1111] imm8[7..0].
+  kThumb2AdcRRR,     // adc [111010110101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
+  kThumb2AndRRR,     // and [111010100000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
+  kThumb2BicRRR,     // bic [111010100010] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
+  kThumb2CmnRR,      // cmn [111010110001] rn[19..16] [0000] [1111] [0000] rm[3..0].
+  kThumb2EorRRR,     // eor [111010101000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
+  kThumb2MulRRR,     // mul [111110110000] rn[19..16] [1111] rd[11..8] [0000] rm[3..0].
+  kThumb2SdivRRR,    // sdiv [111110111001] rn[19..16] [1111] rd[11..8] [1111] rm[3..0].
+  kThumb2UdivRRR,    // udiv [111110111011] rn[19..16] [1111] rd[11..8] [1111] rm[3..0].
+  kThumb2MnvRR,      // mvn [11101010011011110] rd[11-8] [0000] rm[3..0].
+  kThumb2RsubRRI8M,  // rsb rd, rn, #<const> [11110] i [011101] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2NegRR,      // actually rsub rd, rn, #0.
+  kThumb2OrrRRR,     // orr [111010100100] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
+  kThumb2TstRR,      // tst [111010100001] rn[19..16] [0000] [1111] [0000] rm[3..0].
+  kThumb2LslRRR,     // lsl [111110100000] rn[19..16] [1111] rd[11..8] [0000] rm[3..0].
+  kThumb2LsrRRR,     // lsr [111110100010] rn[19..16] [1111] rd[11..8] [0000] rm[3..0].
+  kThumb2AsrRRR,     // asr [111110100100] rn[19..16] [1111] rd[11..8] [0000] rm[3..0].
+  kThumb2RorRRR,     // ror [111110100110] rn[19..16] [1111] rd[11..8] [0000] rm[3..0].
+  kThumb2LslRRI5,    // lsl [11101010010011110] imm[14.12] rd[11..8] [00] rm[3..0].
+  kThumb2LsrRRI5,    // lsr [11101010010011110] imm[14.12] rd[11..8] [01] rm[3..0].
+  kThumb2AsrRRI5,    // asr [11101010010011110] imm[14.12] rd[11..8] [10] rm[3..0].
+  kThumb2RorRRI5,    // ror [11101010010011110] imm[14.12] rd[11..8] [11] rm[3..0].
+  kThumb2BicRRI8M,   // bic rd, rn, #<const> [11110] i [000010] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2AndRRI8M,   // and rd, rn, #<const> [11110] i [000000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2OrrRRI8M,   // orr rd, rn, #<const> [11110] i [000100] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2EorRRI8M,   // eor rd, rn, #<const> [11110] i [001000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2AddRRI8M,   // add rd, rn, #<const> [11110] i [010001] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2AdcRRI8M,   // adc rd, rn, #<const> [11110] i [010101] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2SubRRI8M,   // sub rd, rn, #<const> [11110] i [011011] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2SbcRRI8M,   // sub rd, rn, #<const> [11110] i [010111] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2RevRR,      // rev [111110101001] rm[19..16] [1111] rd[11..8] 1000 rm[3..0]
+  kThumb2RevshRR,    // rev [111110101001] rm[19..16] [1111] rd[11..8] 1011 rm[3..0]
+  kThumb2It,         // it [10111111] firstcond[7-4] mask[3-0].
+  kThumb2Fmstat,     // fmstat [11101110111100011111101000010000].
+  kThumb2Vcmpd,      // vcmp [111011101] D [11011] rd[15-12] [1011] E [1] M [0] rm[3-0].
+  kThumb2Vcmps,      // vcmp [111011101] D [11010] rd[15-12] [1011] E [1] M [0] rm[3-0].
+  kThumb2LdrPcRel12,  // ldr rd,[pc,#imm12] [1111100011011111] rt[15-12] imm12[11-0].
+  kThumb2BCond,      // b<c> [1110] S cond[25-22] imm6[21-16] [10] J1 [0] J2 imm11[10..0].
+  kThumb2Fmrs,       // vmov [111011100000] vn[19-16] rt[15-12] [1010] N [0010000].
+  kThumb2Fmsr,       // vmov [111011100001] vn[19-16] rt[15-12] [1010] N [0010000].
+  kThumb2Fmrrd,      // vmov [111011000100] rt2[19-16] rt[15-12] [101100] M [1] vm[3-0].
+  kThumb2Fmdrr,      // vmov [111011000101] rt2[19-16] rt[15-12] [101100] M [1] vm[3-0].
+  kThumb2Vabsd,      // vabs.f64 [111011101] D [110000] rd[15-12] [1011110] M [0] vm[3-0].
+  kThumb2Vabss,      // vabs.f32 [111011101] D [110000] rd[15-12] [1010110] M [0] vm[3-0].
+  kThumb2Vnegd,      // vneg.f64 [111011101] D [110000] rd[15-12] [1011110] M [0] vm[3-0].
+  kThumb2Vnegs,      // vneg.f32 [111011101] D [110000] rd[15-12] [1010110] M [0] vm[3-0].
+  kThumb2Vmovs_IMM8,  // vmov.f32 [111011101] D [11] imm4h[19-16] vd[15-12] [10100000] imm4l[3-0].
+  kThumb2Vmovd_IMM8,  // vmov.f64 [111011101] D [11] imm4h[19-16] vd[15-12] [10110000] imm4l[3-0].
+  kThumb2Mla,        // mla [111110110000] rn[19-16] ra[15-12] rd[7-4] [0000] rm[3-0].
+  kThumb2Umull,      // umull [111110111010] rn[19-16], rdlo[15-12] rdhi[11-8] [0000] rm[3-0].
+  kThumb2Ldrex,      // ldrex [111010000101] rn[19-16] rt[15-12] [1111] imm8[7-0].
+  kThumb2Ldrexd,     // ldrexd [111010001101] rn[19-16] rt[15-12] rt2[11-8] [11111111].
+  kThumb2Strex,      // strex [111010000100] rn[19-16] rt[15-12] rd[11-8] imm8[7-0].
+  kThumb2Strexd,     // strexd [111010001100] rn[19-16] rt[15-12] rt2[11-8] [0111] Rd[3-0].
+  kThumb2Clrex,      // clrex [11110011101111111000111100101111].
+  kThumb2Bfi,        // bfi [111100110110] rn[19-16] [0] imm3[14-12] rd[11-8] imm2[7-6] [0] msb[4-0].
+  kThumb2Bfc,        // bfc [11110011011011110] [0] imm3[14-12] rd[11-8] imm2[7-6] [0] msb[4-0].
+  kThumb2Dmb,        // dmb [1111001110111111100011110101] option[3-0].
+  kThumb2LdrPcReln12,  // ldr rd,[pc,-#imm12] [1111100011011111] rt[15-12] imm12[11-0].
+  kThumb2Stm,        // stm <list> [111010010000] rn[19-16] 000 rl[12-0].
+  kThumbUndefined,   // undefined [11011110xxxxxxxx].
+  kThumb2VPopCS,     // vpop <list of callee save fp singles (s16+).
+  kThumb2VPushCS,    // vpush <list callee save fp singles (s16+).
+  kThumb2Vldms,      // vldms rd, <list>.
+  kThumb2Vstms,      // vstms rd, <list>.
+  kThumb2BUncond,    // b <label>.
+  kThumb2MovImm16H,  // similar to kThumb2MovImm16, but target high hw.
+  kThumb2AddPCR,     // Thumb2 2-operand add with hard-coded PC target.
+  kThumb2Adr,        // Special purpose encoding of ADR for switch tables.
+  kThumb2MovImm16LST,  // Special purpose version for switch table use.
+  kThumb2MovImm16HST,  // Special purpose version for switch table use.
+  kThumb2LdmiaWB,    // ldmia  [111010011001[ rn[19..16] mask[15..0].
+  kThumb2OrrRRRs,    // orrs [111010100101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
+  kThumb2Push1,      // t3 encoding of push.
+  kThumb2Pop1,       // t3 encoding of pop.
+  kThumb2RsubRRR,    // rsb [111010111101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
+  kThumb2Smull,      // smull [111110111000] rn[19-16], rdlo[15-12] rdhi[11-8] [0000] rm[3-0].
+  kThumb2LdrdPcRel8,  // ldrd rt, rt2, pc +-/1024.
+  kThumb2LdrdI8,     // ldrd rt, rt2, [rn +-/1024].
+  kThumb2StrdI8,     // strd rt, rt2, [rn +-/1024].
+  kArmLast,
+};
+
+enum ArmOpDmbOptions {
+  kSY = 0xf,
+  kST = 0xe,
+  kISH = 0xb,
+  kISHST = 0xa,
+  kNSH = 0x7,
+  kNSHST = 0x6
+};
+
+// Instruction assembly field_loc kind.
+enum ArmEncodingKind {
+  kFmtUnused,    // Unused field and marks end of formats.
+  kFmtBitBlt,    // Bit string using end/start.
+  kFmtDfp,       // Double FP reg.
+  kFmtSfp,       // Single FP reg.
+  kFmtModImm,    // Shifted 8-bit immed using [26,14..12,7..0].
+  kFmtImm16,     // Zero-extended immed using [26,19..16,14..12,7..0].
+  kFmtImm6,      // Encoded branch target using [9,7..3]0.
+  kFmtImm12,     // Zero-extended immediate using [26,14..12,7..0].
+  kFmtShift,     // Shift descriptor, [14..12,7..4].
+  kFmtLsb,       // least significant bit using [14..12][7..6].
+  kFmtBWidth,    // bit-field width, encoded as width-1.
+  kFmtShift5,    // Shift count, [14..12,7..6].
+  kFmtBrOffset,  // Signed extended [26,11,13,21-16,10-0]:0.
+  kFmtFPImm,     // Encoded floating point immediate.
+  kFmtOff24,     // 24-bit Thumb2 unconditional branch encoding.
+  kFmtSkip,      // Unused field, but continue to next.
+};
+
+// Struct used to define the snippet positions for each Thumb opcode.
+struct ArmEncodingMap {
+  uint32_t skeleton;
+  struct {
+    ArmEncodingKind kind;
+    int end;   // end for kFmtBitBlt, 1-bit slice end for FP regs.
+    int start;  // start for kFmtBitBlt, 4-bit slice end for FP regs.
+  } field_loc[4];
+  ArmOpcode opcode;
+  uint64_t flags;
+  const char* name;
+  const char* fmt;
+  int size;   // Note: size is in bytes.
+  FixupKind fixup;
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_QUICK_ARM64_ARM64_LIR_H_
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
new file mode 100644
index 0000000..e79ebad
--- /dev/null
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -0,0 +1,1682 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm64_lir.h"
+#include "codegen_arm64.h"
+#include "dex/quick/mir_to_lir-inl.h"
+
+namespace art {
+
+/*
+ * opcode: ArmOpcode enum
+ * skeleton: pre-designated bit-pattern for this opcode
+ * k0: key to applying ds/de
+ * ds: dest start bit position
+ * de: dest end bit position
+ * k1: key to applying s1s/s1e
+ * s1s: src1 start bit position
+ * s1e: src1 end bit position
+ * k2: key to applying s2s/s2e
+ * s2s: src2 start bit position
+ * s2e: src2 end bit position
+ * operands: number of operands (for sanity check purposes)
+ * name: mnemonic name
+ * fmt: for pretty-printing
+ */
+#define ENCODING_MAP(opcode, skeleton, k0, ds, de, k1, s1s, s1e, k2, s2s, s2e, \
+                     k3, k3s, k3e, flags, name, fmt, size, fixup) \
+        {skeleton, {{k0, ds, de}, {k1, s1s, s1e}, {k2, s2s, s2e}, \
+                    {k3, k3s, k3e}}, opcode, flags, name, fmt, size, fixup}
+
+/* Instruction dump string format keys: !pf, where "!" is the start
+ * of the key, "p" is which numeric operand to use and "f" is the
+ * print format.
+ *
+ * [p]ositions:
+ *     0 -> operands[0] (dest)
+ *     1 -> operands[1] (src1)
+ *     2 -> operands[2] (src2)
+ *     3 -> operands[3] (extra)
+ *
+ * [f]ormats:
+ *     h -> 4-digit hex
+ *     d -> decimal
+ *     E -> decimal*4
+ *     F -> decimal*2
+ *     c -> branch condition (beq, bne, etc.)
+ *     t -> pc-relative target
+ *     u -> 1st half of bl[x] target
+ *     v -> 2nd half ob bl[x] target
+ *     R -> register list
+ *     s -> single precision floating point register
+ *     S -> double precision floating point register
+ *     m -> Thumb2 modified immediate
+ *     n -> complimented Thumb2 modified immediate
+ *     M -> Thumb2 16-bit zero-extended immediate
+ *     b -> 4-digit binary
+ *     B -> dmb option string (sy, st, ish, ishst, nsh, hshst)
+ *     H -> operand shift
+ *     C -> core register name
+ *     P -> fp cs register list (base of s16)
+ *     Q -> fp cs register list (base of s0)
+ *
+ *  [!] escape.  To insert "!", use "!!"
+ */
+/* NOTE: must be kept in sync with enum ArmOpcode from LIR.h */
+const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kArmLast] = {
+    ENCODING_MAP(kArm16BitData,    0x0000,
+                 kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP, "data", "0x!0h(!0d)", 2, kFixupNone),
+    ENCODING_MAP(kThumbAdcRR,        0x4140,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES | USES_CCODES,
+                 "adcs", "!0C, !1C", 2, kFixupNone),
+    ENCODING_MAP(kThumbAddRRI3,      0x1c00,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
+                 "adds", "!0C, !1C, #!2d", 2, kFixupNone),
+    ENCODING_MAP(kThumbAddRI8,       0x3000,
+                 kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES,
+                 "adds", "!0C, !0C, #!1d", 2, kFixupNone),
+    ENCODING_MAP(kThumbAddRRR,       0x1800,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE12 | SETS_CCODES,
+                 "adds", "!0C, !1C, !2C", 2, kFixupNone),
+    ENCODING_MAP(kThumbAddRRLH,     0x4440,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01,
+                 "add", "!0C, !1C", 2, kFixupNone),
+    ENCODING_MAP(kThumbAddRRHL,     0x4480,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01,
+                 "add", "!0C, !1C", 2, kFixupNone),
+    ENCODING_MAP(kThumbAddRRHH,     0x44c0,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01,
+                 "add", "!0C, !1C", 2, kFixupNone),
+    ENCODING_MAP(kThumbAddPcRel,    0xa000,
+                 kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | IS_BRANCH | NEEDS_FIXUP,
+                 "add", "!0C, pc, #!1E", 2, kFixupLoad),
+    ENCODING_MAP(kThumbAddSpRel,    0xa800,
+                 kFmtBitBlt, 10, 8, kFmtSkip, -1, -1, kFmtBitBlt, 7, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF_SP | REG_USE_SP,
+                 "add", "!0C, sp, #!2E", 2, kFixupNone),
+    ENCODING_MAP(kThumbAddSpI7,      0xb000,
+                 kFmtBitBlt, 6, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF_SP | REG_USE_SP,
+                 "add", "sp, #!0d*4", 2, kFixupNone),
+    ENCODING_MAP(kThumbAndRR,        0x4000,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
+                 "ands", "!0C, !1C", 2, kFixupNone),
+    ENCODING_MAP(kThumbAsrRRI5,      0x1000,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
+                 "asrs", "!0C, !1C, #!2d", 2, kFixupNone),
+    ENCODING_MAP(kThumbAsrRR,        0x4100,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
+                 "asrs", "!0C, !1C", 2, kFixupNone),
+    ENCODING_MAP(kThumbBCond,        0xd000,
+                 kFmtBitBlt, 7, 0, kFmtBitBlt, 11, 8, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | USES_CCODES |
+                 NEEDS_FIXUP, "b!1c", "!0t", 2, kFixupCondBranch),
+    ENCODING_MAP(kThumbBUncond,      0xe000,
+                 kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP,
+                 "b", "!0t", 2, kFixupT1Branch),
+    ENCODING_MAP(kThumbBicRR,        0x4380,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
+                 "bics", "!0C, !1C", 2, kFixupNone),
+    ENCODING_MAP(kThumbBkpt,          0xbe00,
+                 kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH,
+                 "bkpt", "!0d", 2, kFixupNone),
+    ENCODING_MAP(kThumbBlx1,         0xf000,
+                 kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_DEF_LR |
+                 NEEDS_FIXUP, "blx_1", "!0u", 2, kFixupBlx1),
+    ENCODING_MAP(kThumbBlx2,         0xe800,
+                 kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_DEF_LR |
+                 NEEDS_FIXUP, "blx_2", "!0v", 2, kFixupLabel),
+    ENCODING_MAP(kThumbBl1,          0xf000,
+                 kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR | NEEDS_FIXUP,
+                 "bl_1", "!0u", 2, kFixupBl1),
+    ENCODING_MAP(kThumbBl2,          0xf800,
+                 kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR | NEEDS_FIXUP,
+                 "bl_2", "!0v", 2, kFixupLabel),
+    ENCODING_MAP(kThumbBlxR,         0x4780,
+                 kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_UNARY_OP | REG_USE0 | IS_BRANCH | REG_DEF_LR,
+                 "blx", "!0C", 2, kFixupNone),
+    ENCODING_MAP(kThumbBx,            0x4700,
+                 kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH,
+                 "bx", "!0C", 2, kFixupNone),
+    ENCODING_MAP(kThumbCmnRR,        0x42c0,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
+                 "cmn", "!0C, !1C", 2, kFixupNone),
+    ENCODING_MAP(kThumbCmpRI8,       0x2800,
+                 kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | SETS_CCODES,
+                 "cmp", "!0C, #!1d", 2, kFixupNone),
+    ENCODING_MAP(kThumbCmpRR,        0x4280,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
+                 "cmp", "!0C, !1C", 2, kFixupNone),
+    ENCODING_MAP(kThumbCmpLH,        0x4540,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
+                 "cmp", "!0C, !1C", 2, kFixupNone),
+    ENCODING_MAP(kThumbCmpHL,        0x4580,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
+                 "cmp", "!0C, !1C", 2, kFixupNone),
+    ENCODING_MAP(kThumbCmpHH,        0x45c0,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
+                 "cmp", "!0C, !1C", 2, kFixupNone),
+    ENCODING_MAP(kThumbEorRR,        0x4040,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
+                 "eors", "!0C, !1C", 2, kFixupNone),
+    ENCODING_MAP(kThumbLdmia,         0xc800,
+                 kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD,
+                 "ldmia", "!0C!!, <!1R>", 2, kFixupNone),
+    ENCODING_MAP(kThumbLdrRRI5,      0x6800,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldr", "!0C, [!1C, #!2E]", 2, kFixupNone),
+    ENCODING_MAP(kThumbLdrRRR,       0x5800,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldr", "!0C, [!1C, !2C]", 2, kFixupNone),
+    ENCODING_MAP(kThumbLdrPcRel,    0x4800,
+                 kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC
+                 | IS_LOAD | NEEDS_FIXUP, "ldr", "!0C, [pc, #!1E]", 2, kFixupLoad),
+    ENCODING_MAP(kThumbLdrSpRel,    0x9800,
+                 kFmtBitBlt, 10, 8, kFmtSkip, -1, -1, kFmtBitBlt, 7, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_SP
+                 | IS_LOAD, "ldr", "!0C, [sp, #!2E]", 2, kFixupNone),
+    ENCODING_MAP(kThumbLdrbRRI5,     0x7800,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldrb", "!0C, [!1C, #2d]", 2, kFixupNone),
+    ENCODING_MAP(kThumbLdrbRRR,      0x5c00,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldrb", "!0C, [!1C, !2C]", 2, kFixupNone),
+    ENCODING_MAP(kThumbLdrhRRI5,     0x8800,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldrh", "!0C, [!1C, #!2F]", 2, kFixupNone),
+    ENCODING_MAP(kThumbLdrhRRR,      0x5a00,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldrh", "!0C, [!1C, !2C]", 2, kFixupNone),
+    ENCODING_MAP(kThumbLdrsbRRR,     0x5600,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldrsb", "!0C, [!1C, !2C]", 2, kFixupNone),
+    ENCODING_MAP(kThumbLdrshRRR,     0x5e00,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldrsh", "!0C, [!1C, !2C]", 2, kFixupNone),
+    ENCODING_MAP(kThumbLslRRI5,      0x0000,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
+                 "lsls", "!0C, !1C, #!2d", 2, kFixupNone),
+    ENCODING_MAP(kThumbLslRR,        0x4080,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
+                 "lsls", "!0C, !1C", 2, kFixupNone),
+    ENCODING_MAP(kThumbLsrRRI5,      0x0800,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
+                 "lsrs", "!0C, !1C, #!2d", 2, kFixupNone),
+    ENCODING_MAP(kThumbLsrRR,        0x40c0,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
+                 "lsrs", "!0C, !1C", 2, kFixupNone),
+    ENCODING_MAP(kThumbMovImm,       0x2000,
+                 kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0 | SETS_CCODES,
+                 "movs", "!0C, #!1d", 2, kFixupNone),
+    ENCODING_MAP(kThumbMovRR,        0x1c00,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES,
+                 "movs", "!0C, !1C", 2, kFixupNone),
+    ENCODING_MAP(kThumbMovRR_H2H,    0x46c0,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "mov", "!0C, !1C", 2, kFixupNone),
+    ENCODING_MAP(kThumbMovRR_H2L,    0x4640,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "mov", "!0C, !1C", 2, kFixupNone),
+    ENCODING_MAP(kThumbMovRR_L2H,    0x4680,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "mov", "!0C, !1C", 2, kFixupNone),
+    ENCODING_MAP(kThumbMul,           0x4340,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
+                 "muls", "!0C, !1C", 2, kFixupNone),
+    ENCODING_MAP(kThumbMvn,           0x43c0,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES,
+                 "mvns", "!0C, !1C", 2, kFixupNone),
+    ENCODING_MAP(kThumbNeg,           0x4240,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES,
+                 "negs", "!0C, !1C", 2, kFixupNone),
+    ENCODING_MAP(kThumbOrr,           0x4300,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
+                 "orrs", "!0C, !1C", 2, kFixupNone),
+    ENCODING_MAP(kThumbPop,           0xbc00,
+                 kFmtBitBlt, 8, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0
+                 | IS_LOAD, "pop", "<!0R>", 2, kFixupNone),
+    ENCODING_MAP(kThumbPush,          0xb400,
+                 kFmtBitBlt, 8, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0
+                 | IS_STORE, "push", "<!0R>", 2, kFixupNone),
+    ENCODING_MAP(kThumbRev,           0xba00,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE1,
+                 "rev", "!0C, !1C", 2, kFixupNone),
+    ENCODING_MAP(kThumbRevsh,         0xbac0,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE1,
+                 "rev", "!0C, !1C", 2, kFixupNone),
+    ENCODING_MAP(kThumbRorRR,        0x41c0,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
+                 "rors", "!0C, !1C", 2, kFixupNone),
+    ENCODING_MAP(kThumbSbc,           0x4180,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE01 | USES_CCODES | SETS_CCODES,
+                 "sbcs", "!0C, !1C", 2, kFixupNone),
+    ENCODING_MAP(kThumbStmia,         0xc000,
+                 kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0 | REG_USE0 | REG_USE_LIST1 | IS_STORE,
+                 "stmia", "!0C!!, <!1R>", 2, kFixupNone),
+    ENCODING_MAP(kThumbStrRRI5,      0x6000,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "str", "!0C, [!1C, #!2E]", 2, kFixupNone),
+    ENCODING_MAP(kThumbStrRRR,       0x5000,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE,
+                 "str", "!0C, [!1C, !2C]", 2, kFixupNone),
+    ENCODING_MAP(kThumbStrSpRel,    0x9000,
+                 kFmtBitBlt, 10, 8, kFmtSkip, -1, -1, kFmtBitBlt, 7, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | REG_USE_SP
+                 | IS_STORE, "str", "!0C, [sp, #!2E]", 2, kFixupNone),
+    ENCODING_MAP(kThumbStrbRRI5,     0x7000,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "strb", "!0C, [!1C, #!2d]", 2, kFixupNone),
+    ENCODING_MAP(kThumbStrbRRR,      0x5400,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE,
+                 "strb", "!0C, [!1C, !2C]", 2, kFixupNone),
+    ENCODING_MAP(kThumbStrhRRI5,     0x8000,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "strh", "!0C, [!1C, #!2F]", 2, kFixupNone),
+    ENCODING_MAP(kThumbStrhRRR,      0x5200,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE,
+                 "strh", "!0C, [!1C, !2C]", 2, kFixupNone),
+    ENCODING_MAP(kThumbSubRRI3,      0x1e00,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
+                 "subs", "!0C, !1C, #!2d", 2, kFixupNone),
+    ENCODING_MAP(kThumbSubRI8,       0x3800,
+                 kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES,
+                 "subs", "!0C, #!1d", 2, kFixupNone),
+    ENCODING_MAP(kThumbSubRRR,       0x1a00,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE12 | SETS_CCODES,
+                 "subs", "!0C, !1C, !2C", 2, kFixupNone),
+    ENCODING_MAP(kThumbSubSpI7,      0xb080,
+                 kFmtBitBlt, 6, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP,
+                 "sub", "sp, #!0d*4", 2, kFixupNone),
+    ENCODING_MAP(kThumbSwi,           0xdf00,
+                 kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH,
+                 "swi", "!0d", 2, kFixupNone),
+    ENCODING_MAP(kThumbTst,           0x4200,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE01 | SETS_CCODES,
+                 "tst", "!0C, !1C", 2, kFixupNone),
+    /*
+     * Note: The encoding map entries for vldrd and vldrs include REG_DEF_LR, even though
+     * these instructions don't define lr.  The reason is that these instructions
+     * are used for loading values from the literal pool, and the displacement may be found
+     * to be insuffient at assembly time.  In that case, we need to materialize a new base
+     * register - and will use lr as the temp register.  This works because lr is used as
+     * a temp register in very limited situations, and never in conjunction with a floating
+     * point constant load.  However, it is possible that during instruction scheduling,
+     * another use of lr could be moved across a vldrd/vldrs.  By setting REG_DEF_LR, we
+     * prevent that from happening.  Note that we set REG_DEF_LR on all vldrd/vldrs - even those
+     * not used in a pc-relative case.  It is really only needed on the pc-relative loads, but
+     * the case we're handling is rare enough that it seemed not worth the trouble to distinguish.
+     */
+    ENCODING_MAP(kThumb2Vldrs,       0xed900a00,
+                 kFmtSfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD |
+                 REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0s, [!1C, #!2E]", 4, kFixupVLoad),
+    ENCODING_MAP(kThumb2Vldrd,       0xed900b00,
+                 kFmtDfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD |
+                 REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0S, [!1C, #!2E]", 4, kFixupVLoad),
+    ENCODING_MAP(kThumb2Vmuls,        0xee200a00,
+                 kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "vmuls", "!0s, !1s, !2s", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Vmuld,        0xee200b00,
+                 kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "vmuld", "!0S, !1S, !2S", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Vstrs,       0xed800a00,
+                 kFmtSfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "vstr", "!0s, [!1C, #!2E]", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Vstrd,       0xed800b00,
+                 kFmtDfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "vstr", "!0S, [!1C, #!2E]", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Vsubs,        0xee300a40,
+                 kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "vsub", "!0s, !1s, !2s", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Vsubd,        0xee300b40,
+                 kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "vsub", "!0S, !1S, !2S", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Vadds,        0xee300a00,
+                 kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "vadd", "!0s, !1s, !2s", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Vaddd,        0xee300b00,
+                 kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "vadd", "!0S, !1S, !2S", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Vdivs,        0xee800a00,
+                 kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "vdivs", "!0s, !1s, !2s", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Vdivd,        0xee800b00,
+                 kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "vdivd", "!0S, !1S, !2S", 4, kFixupNone),
+    ENCODING_MAP(kThumb2VmlaF64,     0xee000b00,
+                 kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE012,
+                 "vmla", "!0S, !1S, !2S", 4, kFixupNone),
+    ENCODING_MAP(kThumb2VcvtIF,       0xeeb80ac0,
+                 kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vcvt.f32.s32", "!0s, !1s", 4, kFixupNone),
+    ENCODING_MAP(kThumb2VcvtFI,       0xeebd0ac0,
+                 kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vcvt.s32.f32 ", "!0s, !1s", 4, kFixupNone),
+    ENCODING_MAP(kThumb2VcvtDI,       0xeebd0bc0,
+                 kFmtSfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vcvt.s32.f64 ", "!0s, !1S", 4, kFixupNone),
+    ENCODING_MAP(kThumb2VcvtFd,       0xeeb70ac0,
+                 kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vcvt.f64.f32 ", "!0S, !1s", 4, kFixupNone),
+    ENCODING_MAP(kThumb2VcvtDF,       0xeeb70bc0,
+                 kFmtSfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vcvt.f32.f64 ", "!0s, !1S", 4, kFixupNone),
+    ENCODING_MAP(kThumb2VcvtF64S32,   0xeeb80bc0,
+                 kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vcvt.f64.s32 ", "!0S, !1s", 4, kFixupNone),
+    ENCODING_MAP(kThumb2VcvtF64U32,   0xeeb80b40,
+                 kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vcvt.f64.u32 ", "!0S, !1s", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Vsqrts,       0xeeb10ac0,
+                 kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vsqrt.f32 ", "!0s, !1s", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Vsqrtd,       0xeeb10bc0,
+                 kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vsqrt.f64 ", "!0S, !1S", 4, kFixupNone),
+    ENCODING_MAP(kThumb2MovI8M, 0xf04f0000, /* no setflags encoding */
+                 kFmtBitBlt, 11, 8, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
+                 "mov", "!0C, #!1m", 4, kFixupNone),
+    ENCODING_MAP(kThumb2MovImm16,       0xf2400000,
+                 kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
+                 "mov", "!0C, #!1M", 4, kFixupNone),
+    ENCODING_MAP(kThumb2StrRRI12,       0xf8c00000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "str", "!0C, [!1C, #!2d]", 4, kFixupNone),
+    ENCODING_MAP(kThumb2LdrRRI12,       0xf8d00000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldr", "!0C, [!1C, #!2d]", 4, kFixupNone),
+    ENCODING_MAP(kThumb2StrRRI8Predec,       0xf8400c00,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 8, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "str", "!0C, [!1C, #-!2d]", 4, kFixupNone),
+    ENCODING_MAP(kThumb2LdrRRI8Predec,       0xf8500c00,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 8, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldr", "!0C, [!1C, #-!2d]", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Cbnz,       0xb900, /* Note: does not affect flags */
+                 kFmtBitBlt, 2, 0, kFmtImm6, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | IS_BRANCH |
+                 NEEDS_FIXUP, "cbnz", "!0C,!1t", 2, kFixupCBxZ),
+    ENCODING_MAP(kThumb2Cbz,       0xb100, /* Note: does not affect flags */
+                 kFmtBitBlt, 2, 0, kFmtImm6, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | IS_BRANCH |
+                 NEEDS_FIXUP, "cbz", "!0C,!1t", 2, kFixupCBxZ),
+    ENCODING_MAP(kThumb2AddRRI12,       0xf2000000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtImm12, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE1,/* Note: doesn't affect flags */
+                 "add", "!0C,!1C,#!2d", 4, kFixupNone),
+    ENCODING_MAP(kThumb2MovRR,       0xea4f0000, /* no setflags encoding */
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "mov", "!0C, !1C", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Vmovs,       0xeeb00a40,
+                 kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vmov.f32 ", " !0s, !1s", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Vmovd,       0xeeb00b40,
+                 kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vmov.f64 ", " !0S, !1S", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Ldmia,         0xe8900000,
+                 kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD,
+                 "ldmia", "!0C!!, <!1R>", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Stmia,         0xe8800000,
+                 kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE0 | REG_USE_LIST1 | IS_STORE,
+                 "stmia", "!0C!!, <!1R>", 4, kFixupNone),
+    ENCODING_MAP(kThumb2AddRRR,  0xeb100000, /* setflags encoding */
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtShift, -1, -1,
+                 IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
+                 "adds", "!0C, !1C, !2C!3H", 4, kFixupNone),
+    ENCODING_MAP(kThumb2SubRRR,       0xebb00000, /* setflags enconding */
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtShift, -1, -1,
+                 IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
+                 "subs", "!0C, !1C, !2C!3H", 4, kFixupNone),
+    ENCODING_MAP(kThumb2SbcRRR,       0xeb700000, /* setflags encoding */
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtShift, -1, -1,
+                 IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES | SETS_CCODES,
+                 "sbcs", "!0C, !1C, !2C!3H", 4, kFixupNone),
+    ENCODING_MAP(kThumb2CmpRR,       0xebb00f00,
+                 kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
+                 "cmp", "!0C, !1C", 4, kFixupNone),
+    ENCODING_MAP(kThumb2SubRRI12,       0xf2a00000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtImm12, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE1,/* Note: doesn't affect flags */
+                 "sub", "!0C,!1C,#!2d", 4, kFixupNone),
+    ENCODING_MAP(kThumb2MvnI8M,  0xf06f0000, /* no setflags encoding */
+                 kFmtBitBlt, 11, 8, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
+                 "mvn", "!0C, #!1n", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Sel,       0xfaa0f080,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE12 | USES_CCODES,
+                 "sel", "!0C, !1C, !2C", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Ubfx,       0xf3c00000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtLsb, -1, -1,
+                 kFmtBWidth, 4, 0, IS_QUAD_OP | REG_DEF0_USE1,
+                 "ubfx", "!0C, !1C, #!2d, #!3d", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Sbfx,       0xf3400000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtLsb, -1, -1,
+                 kFmtBWidth, 4, 0, IS_QUAD_OP | REG_DEF0_USE1,
+                 "sbfx", "!0C, !1C, #!2d, #!3d", 4, kFixupNone),
+    ENCODING_MAP(kThumb2LdrRRR,    0xf8500000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldr", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
+    ENCODING_MAP(kThumb2LdrhRRR,    0xf8300000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldrh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
+    ENCODING_MAP(kThumb2LdrshRRR,    0xf9300000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldrsh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
+    ENCODING_MAP(kThumb2LdrbRRR,    0xf8100000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldrb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
+    ENCODING_MAP(kThumb2LdrsbRRR,    0xf9100000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldrsb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
+    ENCODING_MAP(kThumb2StrRRR,    0xf8400000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE,
+                 "str", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
+    ENCODING_MAP(kThumb2StrhRRR,    0xf8200000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE,
+                 "strh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
+    ENCODING_MAP(kThumb2StrbRRR,    0xf8000000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE,
+                 "strb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
+    ENCODING_MAP(kThumb2LdrhRRI12,       0xf8b00000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldrh", "!0C, [!1C, #!2d]", 4, kFixupNone),
+    ENCODING_MAP(kThumb2LdrshRRI12,       0xf9b00000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldrsh", "!0C, [!1C, #!2d]", 4, kFixupNone),
+    ENCODING_MAP(kThumb2LdrbRRI12,       0xf8900000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldrb", "!0C, [!1C, #!2d]", 4, kFixupNone),
+    ENCODING_MAP(kThumb2LdrsbRRI12,       0xf9900000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldrsb", "!0C, [!1C, #!2d]", 4, kFixupNone),
+    ENCODING_MAP(kThumb2StrhRRI12,       0xf8a00000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "strh", "!0C, [!1C, #!2d]", 4, kFixupNone),
+    ENCODING_MAP(kThumb2StrbRRI12,       0xf8800000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "strb", "!0C, [!1C, #!2d]", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Pop,           0xe8bd0000,
+                 kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0
+                 | IS_LOAD | NEEDS_FIXUP, "pop", "<!0R>", 4, kFixupPushPop),
+    ENCODING_MAP(kThumb2Push,          0xe92d0000,
+                 kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0
+                 | IS_STORE | NEEDS_FIXUP, "push", "<!0R>", 4, kFixupPushPop),
+    ENCODING_MAP(kThumb2CmpRI8M, 0xf1b00f00,
+                 kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_USE0 | SETS_CCODES,
+                 "cmp", "!0C, #!1m", 4, kFixupNone),
+    ENCODING_MAP(kThumb2CmnRI8M, 0xf1100f00,
+                 kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_USE0 | SETS_CCODES,
+                 "cmn", "!0C, #!1m", 4, kFixupNone),
+    ENCODING_MAP(kThumb2AdcRRR,  0xeb500000, /* setflags encoding */
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtShift, -1, -1,
+                 IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
+                 "adcs", "!0C, !1C, !2C!3H", 4, kFixupNone),
+    ENCODING_MAP(kThumb2AndRRR,  0xea000000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
+                 "and", "!0C, !1C, !2C!3H", 4, kFixupNone),
+    ENCODING_MAP(kThumb2BicRRR,  0xea200000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
+                 "bic", "!0C, !1C, !2C!3H", 4, kFixupNone),
+    ENCODING_MAP(kThumb2CmnRR,  0xeb000000,
+                 kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
+                 "cmn", "!0C, !1C, shift !2d", 4, kFixupNone),
+    ENCODING_MAP(kThumb2EorRRR,  0xea800000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
+                 "eor", "!0C, !1C, !2C!3H", 4, kFixupNone),
+    ENCODING_MAP(kThumb2MulRRR,  0xfb00f000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "mul", "!0C, !1C, !2C", 4, kFixupNone),
+    ENCODING_MAP(kThumb2SdivRRR,  0xfb90f0f0,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "sdiv", "!0C, !1C, !2C", 4, kFixupNone),
+    ENCODING_MAP(kThumb2UdivRRR,  0xfbb0f0f0,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "udiv", "!0C, !1C, !2C", 4, kFixupNone),
+    ENCODING_MAP(kThumb2MnvRR,  0xea6f0000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "mvn", "!0C, !1C, shift !2d", 4, kFixupNone),
+    ENCODING_MAP(kThumb2RsubRRI8M,       0xf1d00000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
+                 "rsbs", "!0C,!1C,#!2m", 4, kFixupNone),
+    ENCODING_MAP(kThumb2NegRR,       0xf1d00000, /* instance of rsub */
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES,
+                 "neg", "!0C,!1C", 4, kFixupNone),
+    ENCODING_MAP(kThumb2OrrRRR,  0xea400000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
+                 "orr", "!0C, !1C, !2C!3H", 4, kFixupNone),
+    ENCODING_MAP(kThumb2TstRR,       0xea100f00,
+                 kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
+                 "tst", "!0C, !1C, shift !2d", 4, kFixupNone),
+    ENCODING_MAP(kThumb2LslRRR,  0xfa00f000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "lsl", "!0C, !1C, !2C", 4, kFixupNone),
+    ENCODING_MAP(kThumb2LsrRRR,  0xfa20f000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "lsr", "!0C, !1C, !2C", 4, kFixupNone),
+    ENCODING_MAP(kThumb2AsrRRR,  0xfa40f000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "asr", "!0C, !1C, !2C", 4, kFixupNone),
+    ENCODING_MAP(kThumb2RorRRR,  0xfa60f000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "ror", "!0C, !1C, !2C", 4, kFixupNone),
+    ENCODING_MAP(kThumb2LslRRI5,  0xea4f0000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "lsl", "!0C, !1C, #!2d", 4, kFixupNone),
+    ENCODING_MAP(kThumb2LsrRRI5,  0xea4f0010,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "lsr", "!0C, !1C, #!2d", 4, kFixupNone),
+    ENCODING_MAP(kThumb2AsrRRI5,  0xea4f0020,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "asr", "!0C, !1C, #!2d", 4, kFixupNone),
+    ENCODING_MAP(kThumb2RorRRI5,  0xea4f0030,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "ror", "!0C, !1C, #!2d", 4, kFixupNone),
+    ENCODING_MAP(kThumb2BicRRI8M,  0xf0200000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "bic", "!0C, !1C, #!2m", 4, kFixupNone),
+    ENCODING_MAP(kThumb2AndRRI8M,  0xf0000000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "and", "!0C, !1C, #!2m", 4, kFixupNone),
+    ENCODING_MAP(kThumb2OrrRRI8M,  0xf0400000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "orr", "!0C, !1C, #!2m", 4, kFixupNone),
+    ENCODING_MAP(kThumb2EorRRI8M,  0xf0800000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "eor", "!0C, !1C, #!2m", 4, kFixupNone),
+    ENCODING_MAP(kThumb2AddRRI8M,  0xf1100000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
+                 "adds", "!0C, !1C, #!2m", 4, kFixupNone),
+    ENCODING_MAP(kThumb2AdcRRI8M,  0xf1500000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES | USES_CCODES,
+                 "adcs", "!0C, !1C, #!2m", 4, kFixupNone),
+    ENCODING_MAP(kThumb2SubRRI8M,  0xf1b00000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
+                 "subs", "!0C, !1C, #!2m", 4, kFixupNone),
+    ENCODING_MAP(kThumb2SbcRRI8M,  0xf1700000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES | USES_CCODES,
+                 "sbcs", "!0C, !1C, #!2m", 4, kFixupNone),
+    ENCODING_MAP(kThumb2RevRR, 0xfa90f080,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE12,  // Binary, but rm is stored twice.
+                 "rev", "!0C, !1C", 4, kFixupNone),
+    ENCODING_MAP(kThumb2RevshRR, 0xfa90f0b0,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE12,  // Binary, but rm is stored twice.
+                 "revsh", "!0C, !1C", 4, kFixupNone),
+    ENCODING_MAP(kThumb2It,  0xbf00,
+                 kFmtBitBlt, 7, 4, kFmtBitBlt, 3, 0, kFmtModImm, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | IS_IT | USES_CCODES,
+                 "it:!1b", "!0c", 2, kFixupNone),
+    ENCODING_MAP(kThumb2Fmstat,  0xeef1fa10,
+                 kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, NO_OPERAND | SETS_CCODES,
+                 "fmstat", "", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Vcmpd,        0xeeb40b40,
+                 kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01,
+                 "vcmp.f64", "!0S, !1S", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Vcmps,        0xeeb40a40,
+                 kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01,
+                 "vcmp.f32", "!0s, !1s", 4, kFixupNone),
+    ENCODING_MAP(kThumb2LdrPcRel12,       0xf8df0000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP,
+                 "ldr", "!0C, [r15pc, #!1d]", 4, kFixupLoad),
+    ENCODING_MAP(kThumb2BCond,        0xf0008000,
+                 kFmtBrOffset, -1, -1, kFmtBitBlt, 25, 22, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | IS_BRANCH | USES_CCODES | NEEDS_FIXUP,
+                 "b!1c", "!0t", 4, kFixupCondBranch),
+    ENCODING_MAP(kThumb2Fmrs,       0xee100a10,
+                 kFmtBitBlt, 15, 12, kFmtSfp, 7, 16, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fmrs", "!0C, !1s", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Fmsr,       0xee000a10,
+                 kFmtSfp, 7, 16, kFmtBitBlt, 15, 12, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fmsr", "!0s, !1C", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Fmrrd,       0xec500b10,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtDfp, 5, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF01_USE2,
+                 "fmrrd", "!0C, !1C, !2S", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Fmdrr,       0xec400b10,
+                 kFmtDfp, 5, 0, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "fmdrr", "!0S, !1C, !2C", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Vabsd,       0xeeb00bc0,
+                 kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vabs.f64", "!0S, !1S", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Vabss,       0xeeb00ac0,
+                 kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vabs.f32", "!0s, !1s", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Vnegd,       0xeeb10b40,
+                 kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vneg.f64", "!0S, !1S", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Vnegs,       0xeeb10a40,
+                 kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vneg.f32", "!0s, !1s", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Vmovs_IMM8,       0xeeb00a00,
+                 kFmtSfp, 22, 12, kFmtFPImm, 16, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
+                 "vmov.f32", "!0s, #0x!1h", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Vmovd_IMM8,       0xeeb00b00,
+                 kFmtDfp, 22, 12, kFmtFPImm, 16, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
+                 "vmov.f64", "!0S, #0x!1h", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Mla,  0xfb000000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE123,
+                 "mla", "!0C, !1C, !2C, !3C", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Umull,  0xfba00000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
+                 kFmtBitBlt, 3, 0,
+                 IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | REG_USE3,
+                 "umull", "!0C, !1C, !2C, !3C", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Ldrex,       0xe8500f00,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldrex", "!0C, [!1C, #!2E]", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Ldrexd,      0xe8d0007f,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF01_USE2 | IS_LOAD,
+                 "ldrexd", "!0C, !1C, [!2C]", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Strex,       0xe8400000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16,
+                 kFmtBitBlt, 7, 0, IS_QUAD_OP | REG_DEF0_USE12 | IS_STORE,
+                 "strex", "!0C, !1C, [!2C, #!2E]", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Strexd,      0xe8c00070,
+                 kFmtBitBlt, 3, 0, kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8,
+                 kFmtBitBlt, 19, 16, IS_QUAD_OP | REG_DEF0_USE123 | IS_STORE,
+                 "strexd", "!0C, !1C, !2C, [!3C]", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Clrex,       0xf3bf8f2f,
+                 kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, NO_OPERAND,
+                 "clrex", "", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Bfi,         0xf3600000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtShift5, -1, -1,
+                 kFmtBitBlt, 4, 0, IS_QUAD_OP | REG_DEF0_USE1,
+                 "bfi", "!0C,!1C,#!2d,#!3d", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Bfc,         0xf36f0000,
+                 kFmtBitBlt, 11, 8, kFmtShift5, -1, -1, kFmtBitBlt, 4, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0,
+                 "bfc", "!0C,#!1d,#!2d", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Dmb,         0xf3bf8f50,
+                 kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP,
+                 "dmb", "#!0B", 4, kFixupNone),
+    ENCODING_MAP(kThumb2LdrPcReln12,       0xf85f0000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD,
+                 "ldr", "!0C, [r15pc, -#!1d]", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Stm,          0xe9000000,
+                 kFmtBitBlt, 19, 16, kFmtBitBlt, 12, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_USE0 | REG_USE_LIST1 | IS_STORE,
+                 "stm", "!0C, <!1R>", 4, kFixupNone),
+    ENCODING_MAP(kThumbUndefined,       0xde00,
+                 kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, NO_OPERAND,
+                 "undefined", "", 2, kFixupNone),
+    // NOTE: vpop, vpush hard-encoded for s16+ reg list
+    ENCODING_MAP(kThumb2VPopCS,       0xecbd8a00,
+                 kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_FPCS_LIST0
+                 | IS_LOAD, "vpop", "<!0P>", 4, kFixupNone),
+    ENCODING_MAP(kThumb2VPushCS,      0xed2d8a00,
+                 kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_FPCS_LIST0
+                 | IS_STORE, "vpush", "<!0P>", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Vldms,        0xec900a00,
+                 kFmtBitBlt, 19, 16, kFmtSfp, 22, 12, kFmtBitBlt, 7, 0,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_USE0 | REG_DEF_FPCS_LIST2
+                 | IS_LOAD, "vldms", "!0C, <!2Q>", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Vstms,        0xec800a00,
+                 kFmtBitBlt, 19, 16, kFmtSfp, 22, 12, kFmtBitBlt, 7, 0,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_USE0 | REG_USE_FPCS_LIST2
+                 | IS_STORE, "vstms", "!0C, <!2Q>", 4, kFixupNone),
+    ENCODING_MAP(kThumb2BUncond,      0xf0009000,
+                 kFmtOff24, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, NO_OPERAND | IS_BRANCH,
+                 "b", "!0t", 4, kFixupT2Branch),
+    ENCODING_MAP(kThumb2MovImm16H,       0xf2c00000,
+                 kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0,
+                 "movt", "!0C, #!1M", 4, kFixupNone),
+    ENCODING_MAP(kThumb2AddPCR,      0x4487,
+                 kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_UNARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP,
+                 "add", "rPC, !0C", 2, kFixupLabel),
+    ENCODING_MAP(kThumb2Adr,         0xf20f0000,
+                 kFmtBitBlt, 11, 8, kFmtImm12, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 /* Note: doesn't affect flags */
+                 IS_TERTIARY_OP | REG_DEF0 | NEEDS_FIXUP,
+                 "adr", "!0C,#!1d", 4, kFixupAdr),
+    ENCODING_MAP(kThumb2MovImm16LST,     0xf2400000,
+                 kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | NEEDS_FIXUP,
+                 "mov", "!0C, #!1M", 4, kFixupMovImmLST),
+    ENCODING_MAP(kThumb2MovImm16HST,     0xf2c00000,
+                 kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0 | NEEDS_FIXUP,
+                 "movt", "!0C, #!1M", 4, kFixupMovImmHST),
+    ENCODING_MAP(kThumb2LdmiaWB,         0xe8b00000,
+                 kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD,
+                 "ldmia", "!0C!!, <!1R>", 4, kFixupNone),
+    ENCODING_MAP(kThumb2OrrRRRs,  0xea500000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
+                 "orrs", "!0C, !1C, !2C!3H", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Push1,    0xf84d0d04,
+                 kFmtBitBlt, 15, 12, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE0
+                 | IS_STORE, "push1", "!0C", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Pop1,    0xf85d0b04,
+                 kFmtBitBlt, 15, 12, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF0
+                 | IS_LOAD, "pop1", "!0C", 4, kFixupNone),
+    ENCODING_MAP(kThumb2RsubRRR,  0xebd00000, /* setflags encoding */
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtShift, -1, -1,
+                 IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
+                 "rsbs", "!0C, !1C, !2C!3H", 4, kFixupNone),
+    ENCODING_MAP(kThumb2Smull,  0xfb800000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
+                 kFmtBitBlt, 3, 0,
+                 IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | REG_USE3,
+                 "smull", "!0C, !1C, !2C, !3C", 4, kFixupNone),
+    ENCODING_MAP(kThumb2LdrdPcRel8,  0xe9df0000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 7, 0,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0 | REG_DEF1 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP,
+                 "ldrd", "!0C, !1C, [pc, #!2E]", 4, kFixupLoad),
+    ENCODING_MAP(kThumb2LdrdI8, 0xe9d00000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
+                 kFmtBitBlt, 7, 0,
+                 IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | IS_LOAD,
+                 "ldrd", "!0C, !1C, [!2C, #!3E]", 4, kFixupNone),
+    ENCODING_MAP(kThumb2StrdI8, 0xe9c00000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
+                 kFmtBitBlt, 7, 0,
+                 IS_QUAD_OP | REG_USE0 | REG_USE1 | REG_USE2 | IS_STORE,
+                 "strd", "!0C, !1C, [!2C, #!3E]", 4, kFixupNone),
+};
+
+// new_lir replaces orig_lir in the pcrel_fixup list.
+void Arm64Mir2Lir::ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir) {
+  new_lir->u.a.pcrel_next = orig_lir->u.a.pcrel_next;
+  if (UNLIKELY(prev_lir == NULL)) {
+    first_fixup_ = new_lir;
+  } else {
+    prev_lir->u.a.pcrel_next = new_lir;
+  }
+  orig_lir->flags.fixup = kFixupNone;
+}
+
+// new_lir is inserted before orig_lir in the pcrel_fixup list.
+void Arm64Mir2Lir::InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir) {
+  new_lir->u.a.pcrel_next = orig_lir;
+  if (UNLIKELY(prev_lir == NULL)) {
+    first_fixup_ = new_lir;
+  } else {
+    DCHECK(prev_lir->u.a.pcrel_next == orig_lir);
+    prev_lir->u.a.pcrel_next = new_lir;
+  }
+}
+
+/*
+ * The fake NOP of moving r0 to r0 actually will incur data stalls if r0 is
+ * not ready. Since r5FP is not updated often, it is less likely to
+ * generate unnecessary stall cycles.
+ * TUNING: No longer true - find new NOP pattern.
+ */
+#define PADDING_MOV_R5_R5               0x1C2D
+
+uint8_t* Arm64Mir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) {
+  for (; lir != NULL; lir = NEXT_LIR(lir)) {
+    if (!lir->flags.is_nop) {
+      int opcode = lir->opcode;
+      if (IsPseudoLirOp(opcode)) {
+        if (UNLIKELY(opcode == kPseudoPseudoAlign4)) {
+          // Note: size for this opcode will be either 0 or 2 depending on final alignment.
+          if (lir->offset & 0x2) {
+            write_pos[0] = (PADDING_MOV_R5_R5 & 0xff);
+            write_pos[1] = ((PADDING_MOV_R5_R5 >> 8) & 0xff);
+            write_pos += 2;
+          }
+        }
+      } else if (LIKELY(!lir->flags.is_nop)) {
+        const ArmEncodingMap *encoder = &EncodingMap[lir->opcode];
+        uint32_t bits = encoder->skeleton;
+        for (int i = 0; i < 4; i++) {
+          uint32_t operand;
+          uint32_t value;
+          operand = lir->operands[i];
+          ArmEncodingKind kind = encoder->field_loc[i].kind;
+          if (LIKELY(kind == kFmtBitBlt)) {
+            value = (operand << encoder->field_loc[i].start) &
+                ((1 << (encoder->field_loc[i].end + 1)) - 1);
+            bits |= value;
+          } else {
+            switch (encoder->field_loc[i].kind) {
+              case kFmtSkip:
+                break;  // Nothing to do, but continue to next.
+              case kFmtUnused:
+                i = 4;  // Done, break out of the enclosing loop.
+                break;
+              case kFmtFPImm:
+                value = ((operand & 0xF0) >> 4) << encoder->field_loc[i].end;
+                value |= (operand & 0x0F) << encoder->field_loc[i].start;
+                bits |= value;
+                break;
+              case kFmtBrOffset:
+                value = ((operand  & 0x80000) >> 19) << 26;
+                value |= ((operand & 0x40000) >> 18) << 11;
+                value |= ((operand & 0x20000) >> 17) << 13;
+                value |= ((operand & 0x1f800) >> 11) << 16;
+                value |= (operand  & 0x007ff);
+                bits |= value;
+                break;
+              case kFmtShift5:
+                value = ((operand & 0x1c) >> 2) << 12;
+                value |= (operand & 0x03) << 6;
+                bits |= value;
+                break;
+              case kFmtShift:
+                value = ((operand & 0x70) >> 4) << 12;
+                value |= (operand & 0x0f) << 4;
+                bits |= value;
+                break;
+              case kFmtBWidth:
+                value = operand - 1;
+                bits |= value;
+                break;
+              case kFmtLsb:
+                value = ((operand & 0x1c) >> 2) << 12;
+                value |= (operand & 0x03) << 6;
+                bits |= value;
+                break;
+              case kFmtImm6:
+                value = ((operand & 0x20) >> 5) << 9;
+                value |= (operand & 0x1f) << 3;
+                bits |= value;
+                break;
+              case kFmtDfp: {
+                DCHECK(RegStorage::IsDouble(operand)) << ", Operand = 0x" << std::hex << operand;
+                uint32_t reg_num = RegStorage::RegNum(operand);
+                /* Snag the 1-bit slice and position it */
+                value = ((reg_num & 0x10) >> 4) << encoder->field_loc[i].end;
+                /* Extract and position the 4-bit slice */
+                value |= (reg_num & 0x0f) << encoder->field_loc[i].start;
+                bits |= value;
+                break;
+              }
+              case kFmtSfp: {
+                DCHECK(RegStorage::IsSingle(operand)) << ", Operand = 0x" << std::hex << operand;
+                uint32_t reg_num = RegStorage::RegNum(operand);
+                /* Snag the 1-bit slice and position it */
+                value = (reg_num & 0x1) << encoder->field_loc[i].end;
+                /* Extract and position the 4-bit slice */
+                value |= ((reg_num & 0x1e) >> 1) << encoder->field_loc[i].start;
+                bits |= value;
+                break;
+              }
+              case kFmtImm12:
+              case kFmtModImm:
+                value = ((operand & 0x800) >> 11) << 26;
+                value |= ((operand & 0x700) >> 8) << 12;
+                value |= operand & 0x0ff;
+                bits |= value;
+                break;
+              case kFmtImm16:
+                value = ((operand & 0x0800) >> 11) << 26;
+                value |= ((operand & 0xf000) >> 12) << 16;
+                value |= ((operand & 0x0700) >> 8) << 12;
+                value |= operand & 0x0ff;
+                bits |= value;
+                break;
+              case kFmtOff24: {
+                uint32_t signbit = (operand >> 31) & 0x1;
+                uint32_t i1 = (operand >> 22) & 0x1;
+                uint32_t i2 = (operand >> 21) & 0x1;
+                uint32_t imm10 = (operand >> 11) & 0x03ff;
+                uint32_t imm11 = operand & 0x07ff;
+                uint32_t j1 = (i1 ^ signbit) ? 0 : 1;
+                uint32_t j2 = (i2 ^ signbit) ? 0 : 1;
+                value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) |
+                    imm11;
+                bits |= value;
+                }
+                break;
+              default:
+                LOG(FATAL) << "Bad fmt:" << encoder->field_loc[i].kind;
+            }
+          }
+        }
+        if (encoder->size == 4) {
+          write_pos[0] = ((bits >> 16) & 0xff);
+          write_pos[1] = ((bits >> 24) & 0xff);
+          write_pos[2] = (bits & 0xff);
+          write_pos[3] = ((bits >> 8) & 0xff);
+          write_pos += 4;
+        } else {
+          DCHECK_EQ(encoder->size, 2);
+          write_pos[0] = (bits & 0xff);
+          write_pos[1] = ((bits >> 8) & 0xff);
+          write_pos += 2;
+        }
+      }
+    }
+  }
+  return write_pos;
+}
+
+// Assemble the LIR into binary instruction format.
+void Arm64Mir2Lir::AssembleLIR() {
+  LIR* lir;
+  LIR* prev_lir;
+  cu_->NewTimingSplit("Assemble");
+  int assembler_retries = 0;
+  CodeOffset starting_offset = LinkFixupInsns(first_lir_insn_, last_lir_insn_, 0);
+  data_offset_ = (starting_offset + 0x3) & ~0x3;
+  int32_t offset_adjustment;
+  AssignDataOffsets();
+
+  /*
+   * Note: generation must be 1 on first pass (to distinguish from initialized state of 0 for
+   * non-visited nodes).  Start at zero here, and bit will be flipped to 1 on entry to the loop.
+   */
+  int generation = 0;
+  while (true) {
+    offset_adjustment = 0;
+    AssemblerStatus res = kSuccess;  // Assume success
+    generation ^= 1;
+    // Note: nodes requring possible fixup linked in ascending order.
+    lir = first_fixup_;
+    prev_lir = NULL;
+    while (lir != NULL) {
+      /*
+       * NOTE: the lir being considered here will be encoded following the switch (so long as
+       * we're not in a retry situation).  However, any new non-pc_rel instructions inserted
+       * due to retry must be explicitly encoded at the time of insertion.  Note that
+       * inserted instructions don't need use/def flags, but do need size and pc-rel status
+       * properly updated.
+       */
+      lir->offset += offset_adjustment;
+      // During pass, allows us to tell whether a node has been updated with offset_adjustment yet.
+      lir->flags.generation = generation;
+      switch (static_cast<FixupKind>(lir->flags.fixup)) {
+        case kFixupLabel:
+        case kFixupNone:
+          break;
+        case kFixupVLoad:
+          if (lir->operands[1] != rs_r15pc.GetReg()) {
+            break;
+          }
+          // NOTE: intentional fallthrough.
+        case kFixupLoad: {
+          /*
+           * PC-relative loads are mostly used to load immediates
+           * that are too large to materialize directly in one shot.
+           * However, if the load displacement exceeds the limit,
+           * we revert to a multiple-instruction materialization sequence.
+           */
+          LIR *lir_target = lir->target;
+          CodeOffset pc = (lir->offset + 4) & ~3;
+          CodeOffset target = lir_target->offset +
+              ((lir_target->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
+          int32_t delta = target - pc;
+          if (res != kSuccess) {
+            /*
+             * In this case, we're just estimating and will do it again for real.  Ensure offset
+             * is legal.
+             */
+            delta &= ~0x3;
+          }
+          DCHECK_EQ((delta & 0x3), 0);
+          // First, a sanity check for cases we shouldn't see now
+          if (kIsDebugBuild && (((lir->opcode == kThumbAddPcRel) && (delta > 1020)) ||
+              ((lir->opcode == kThumbLdrPcRel) && (delta > 1020)))) {
+            // Shouldn't happen in current codegen.
+            LOG(FATAL) << "Unexpected pc-rel offset " << delta;
+          }
+          // Now, check for the difficult cases
+          if (((lir->opcode == kThumb2LdrPcRel12) && (delta > 4091)) ||
+              ((lir->opcode == kThumb2LdrdPcRel8) && (delta > 1020)) ||
+              ((lir->opcode == kThumb2Vldrs) && (delta > 1020)) ||
+              ((lir->opcode == kThumb2Vldrd) && (delta > 1020))) {
+            /*
+             * Note: The reason vldrs/vldrd include rARM_LR in their use/def masks is that we
+             * sometimes have to use it to fix up out-of-range accesses.  This is where that
+             * happens.
+             */
+            int base_reg = ((lir->opcode == kThumb2LdrdPcRel8) ||
+                            (lir->opcode == kThumb2LdrPcRel12)) ?  lir->operands[0] :
+                            rs_rARM_LR.GetReg();
+
+            // Add new Adr to generate the address.
+            LIR* new_adr = RawLIR(lir->dalvik_offset, kThumb2Adr,
+                       base_reg, 0, 0, 0, 0, lir->target);
+            new_adr->offset = lir->offset;
+            new_adr->flags.fixup = kFixupAdr;
+            new_adr->flags.size = EncodingMap[kThumb2Adr].size;
+            InsertLIRBefore(lir, new_adr);
+            lir->offset += new_adr->flags.size;
+            offset_adjustment += new_adr->flags.size;
+
+            // lir no longer pcrel, unlink and link in new_adr.
+            ReplaceFixup(prev_lir, lir, new_adr);
+
+            // Convert to normal load.
+            offset_adjustment -= lir->flags.size;
+            if (lir->opcode == kThumb2LdrPcRel12) {
+              lir->opcode = kThumb2LdrRRI12;
+            } else if (lir->opcode == kThumb2LdrdPcRel8) {
+              lir->opcode = kThumb2LdrdI8;
+            }
+            lir->flags.size = EncodingMap[lir->opcode].size;
+            offset_adjustment += lir->flags.size;
+            // Change the load to be relative to the new Adr base.
+            if (lir->opcode == kThumb2LdrdI8) {
+              lir->operands[3] = 0;
+              lir->operands[2] = base_reg;
+            } else {
+              lir->operands[2] = 0;
+              lir->operands[1] = base_reg;
+            }
+            prev_lir = new_adr;  // Continue scan with new_adr;
+            lir = new_adr->u.a.pcrel_next;
+            res = kRetryAll;
+            continue;
+          } else {
+            if ((lir->opcode == kThumb2Vldrs) ||
+                (lir->opcode == kThumb2Vldrd) ||
+                (lir->opcode == kThumb2LdrdPcRel8)) {
+              lir->operands[2] = delta >> 2;
+            } else {
+              lir->operands[1] = (lir->opcode == kThumb2LdrPcRel12) ?  delta :
+                  delta >> 2;
+            }
+          }
+          break;
+        }
+        case kFixupCBxZ: {
+          LIR *target_lir = lir->target;
+          CodeOffset pc = lir->offset + 4;
+          CodeOffset target = target_lir->offset +
+              ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
+          int32_t delta = target - pc;
+          if (delta > 126 || delta < 0) {
+            /*
+             * Convert to cmp rx,#0 / b[eq/ne] tgt pair
+             * Make new branch instruction and insert after
+             */
+            LIR* new_inst =
+              RawLIR(lir->dalvik_offset, kThumbBCond, 0,
+                     (lir->opcode == kThumb2Cbz) ? kArmCondEq : kArmCondNe,
+                     0, 0, 0, lir->target);
+            InsertLIRAfter(lir, new_inst);
+
+            /* Convert the cb[n]z to a cmp rx, #0 ] */
+            // Subtract the old size.
+            offset_adjustment -= lir->flags.size;
+            lir->opcode = kThumbCmpRI8;
+            /* operand[0] is src1 in both cb[n]z & CmpRI8 */
+            lir->operands[1] = 0;
+            lir->target = 0;
+            lir->flags.size = EncodingMap[lir->opcode].size;
+            // Add back the new size.
+            offset_adjustment += lir->flags.size;
+            // Set up the new following inst.
+            new_inst->offset = lir->offset + lir->flags.size;
+            new_inst->flags.fixup = kFixupCondBranch;
+            new_inst->flags.size = EncodingMap[new_inst->opcode].size;
+            offset_adjustment += new_inst->flags.size;
+
+            // lir no longer pcrel, unlink and link in new_inst.
+            ReplaceFixup(prev_lir, lir, new_inst);
+            prev_lir = new_inst;  // Continue with the new instruction.
+            lir = new_inst->u.a.pcrel_next;
+            res = kRetryAll;
+            continue;
+          } else {
+            lir->operands[1] = delta >> 1;
+          }
+          break;
+        }
+        case kFixupPushPop: {
+          if (__builtin_popcount(lir->operands[0]) == 1) {
+            /*
+             * The standard push/pop multiple instruction
+             * requires at least two registers in the list.
+             * If we've got just one, switch to the single-reg
+             * encoding.
+             */
+            lir->opcode = (lir->opcode == kThumb2Push) ? kThumb2Push1 :
+                kThumb2Pop1;
+            int reg = 0;
+            while (lir->operands[0]) {
+              if (lir->operands[0] & 0x1) {
+                break;
+              } else {
+                reg++;
+                lir->operands[0] >>= 1;
+              }
+            }
+            lir->operands[0] = reg;
+            // This won't change again, don't bother unlinking, just reset fixup kind
+            lir->flags.fixup = kFixupNone;
+          }
+          break;
+        }
+        case kFixupCondBranch: {
+          LIR *target_lir = lir->target;
+          int32_t delta = 0;
+          DCHECK(target_lir);
+          CodeOffset pc = lir->offset + 4;
+          CodeOffset target = target_lir->offset +
+              ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
+          delta = target - pc;
+          if ((lir->opcode == kThumbBCond) && (delta > 254 || delta < -256)) {
+            offset_adjustment -= lir->flags.size;
+            lir->opcode = kThumb2BCond;
+            lir->flags.size = EncodingMap[lir->opcode].size;
+            // Fixup kind remains the same.
+            offset_adjustment += lir->flags.size;
+            res = kRetryAll;
+          }
+          lir->operands[0] = delta >> 1;
+          break;
+        }
+        case kFixupT2Branch: {
+          LIR *target_lir = lir->target;
+          CodeOffset pc = lir->offset + 4;
+          CodeOffset target = target_lir->offset +
+              ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
+          int32_t delta = target - pc;
+          lir->operands[0] = delta >> 1;
+          if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && lir->operands[0] == 0) {
+            // Useless branch
+            offset_adjustment -= lir->flags.size;
+            lir->flags.is_nop = true;
+            // Don't unlink - just set to do-nothing.
+            lir->flags.fixup = kFixupNone;
+            res = kRetryAll;
+          }
+          break;
+        }
+        case kFixupT1Branch: {
+          LIR *target_lir = lir->target;
+          CodeOffset pc = lir->offset + 4;
+          CodeOffset target = target_lir->offset +
+              ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
+          int32_t delta = target - pc;
+          if (delta > 2046 || delta < -2048) {
+            // Convert to Thumb2BCond w/ kArmCondAl
+            offset_adjustment -= lir->flags.size;
+            lir->opcode = kThumb2BUncond;
+            lir->operands[0] = 0;
+            lir->flags.size = EncodingMap[lir->opcode].size;
+            lir->flags.fixup = kFixupT2Branch;
+            offset_adjustment += lir->flags.size;
+            res = kRetryAll;
+          } else {
+            lir->operands[0] = delta >> 1;
+            if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && lir->operands[0] == -1) {
+              // Useless branch
+              offset_adjustment -= lir->flags.size;
+              lir->flags.is_nop = true;
+              // Don't unlink - just set to do-nothing.
+              lir->flags.fixup = kFixupNone;
+              res = kRetryAll;
+            }
+          }
+          break;
+        }
+        case kFixupBlx1: {
+          DCHECK(NEXT_LIR(lir)->opcode == kThumbBlx2);
+          /* cur_pc is Thumb */
+          CodeOffset cur_pc = (lir->offset + 4) & ~3;
+          CodeOffset target = lir->operands[1];
+
+          /* Match bit[1] in target with base */
+          if (cur_pc & 0x2) {
+            target |= 0x2;
+          }
+          int32_t delta = target - cur_pc;
+          DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2)));
+
+          lir->operands[0] = (delta >> 12) & 0x7ff;
+          NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff;
+          break;
+        }
+        case kFixupBl1: {
+          DCHECK(NEXT_LIR(lir)->opcode == kThumbBl2);
+          /* Both cur_pc and target are Thumb */
+          CodeOffset cur_pc = lir->offset + 4;
+          CodeOffset target = lir->operands[1];
+
+          int32_t delta = target - cur_pc;
+          DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2)));
+
+          lir->operands[0] = (delta >> 12) & 0x7ff;
+          NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff;
+          break;
+        }
+        case kFixupAdr: {
+          EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[2]));
+          LIR* target = lir->target;
+          int32_t target_disp = (tab_rec != NULL) ?  tab_rec->offset + offset_adjustment
+              : target->offset + ((target->flags.generation == lir->flags.generation) ? 0 :
+              offset_adjustment);
+          int32_t disp = target_disp - ((lir->offset + 4) & ~3);
+          if (disp < 4096) {
+            lir->operands[1] = disp;
+          } else {
+            // convert to ldimm16l, ldimm16h, add tgt, pc, operands[0]
+            // TUNING: if this case fires often, it can be improved.  Not expected to be common.
+            LIR *new_mov16L =
+                RawLIR(lir->dalvik_offset, kThumb2MovImm16LST, lir->operands[0], 0,
+                       WrapPointer(lir), WrapPointer(tab_rec), 0, lir->target);
+            new_mov16L->flags.size = EncodingMap[new_mov16L->opcode].size;
+            new_mov16L->flags.fixup = kFixupMovImmLST;
+            new_mov16L->offset = lir->offset;
+            // Link the new instruction, retaining lir.
+            InsertLIRBefore(lir, new_mov16L);
+            lir->offset += new_mov16L->flags.size;
+            offset_adjustment += new_mov16L->flags.size;
+            InsertFixupBefore(prev_lir, lir, new_mov16L);
+            prev_lir = new_mov16L;   // Now we've got a new prev.
+            LIR *new_mov16H =
+                RawLIR(lir->dalvik_offset, kThumb2MovImm16HST, lir->operands[0], 0,
+                       WrapPointer(lir), WrapPointer(tab_rec), 0, lir->target);
+            new_mov16H->flags.size = EncodingMap[new_mov16H->opcode].size;
+            new_mov16H->flags.fixup = kFixupMovImmHST;
+            new_mov16H->offset = lir->offset;
+            // Link the new instruction, retaining lir.
+            InsertLIRBefore(lir, new_mov16H);
+            lir->offset += new_mov16H->flags.size;
+            offset_adjustment += new_mov16H->flags.size;
+            InsertFixupBefore(prev_lir, lir, new_mov16H);
+            prev_lir = new_mov16H;  // Now we've got a new prev.
+
+            offset_adjustment -= lir->flags.size;
+            if (RegStorage::RegNum(lir->operands[0]) < 8) {
+              lir->opcode = kThumbAddRRLH;
+            } else {
+              lir->opcode = kThumbAddRRHH;
+            }
+            lir->operands[1] = rs_rARM_PC.GetReg();
+            lir->flags.size = EncodingMap[lir->opcode].size;
+            offset_adjustment += lir->flags.size;
+            // Must stay in fixup list and have offset updated; will be used by LST/HSP pair.
+            lir->flags.fixup = kFixupNone;
+            res = kRetryAll;
+          }
+          break;
+        }
+        case kFixupMovImmLST: {
+          // operands[1] should hold disp, [2] has add, [3] has tab_rec
+          LIR *addPCInst = reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2]));
+          EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3]));
+          // If tab_rec is null, this is a literal load. Use target
+          LIR* target = lir->target;
+          int32_t target_disp = tab_rec ? tab_rec->offset : target->offset;
+          lir->operands[1] = (target_disp - (addPCInst->offset + 4)) & 0xffff;
+          break;
+        }
+        case kFixupMovImmHST: {
+          // operands[1] should hold disp, [2] has add, [3] has tab_rec
+          LIR *addPCInst = reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2]));
+          EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3]));
+          // If tab_rec is null, this is a literal load. Use target
+          LIR* target = lir->target;
+          int32_t target_disp = tab_rec ? tab_rec->offset : target->offset;
+          lir->operands[1] =
+              ((target_disp - (addPCInst->offset + 4)) >> 16) & 0xffff;
+          break;
+        }
+        case kFixupAlign4: {
+          int32_t required_size = lir->offset & 0x2;
+          if (lir->flags.size != required_size) {
+            offset_adjustment += required_size - lir->flags.size;
+            lir->flags.size = required_size;
+            res = kRetryAll;
+          }
+          break;
+        }
+        default:
+          LOG(FATAL) << "Unexpected case " << lir->flags.fixup;
+      }
+      prev_lir = lir;
+      lir = lir->u.a.pcrel_next;
+    }
+
+    if (res == kSuccess) {
+      break;
+    } else {
+      assembler_retries++;
+      if (assembler_retries > MAX_ASSEMBLER_RETRIES) {
+        CodegenDump();
+        LOG(FATAL) << "Assembler error - too many retries";
+      }
+      starting_offset += offset_adjustment;
+      data_offset_ = (starting_offset + 0x3) & ~0x3;
+      AssignDataOffsets();
+    }
+  }
+
+  // Build the CodeBuffer.
+  DCHECK_LE(data_offset_, total_size_);
+  code_buffer_.reserve(total_size_);
+  code_buffer_.resize(starting_offset);
+  uint8_t* write_pos = &code_buffer_[0];
+  write_pos = EncodeLIRs(write_pos, first_lir_insn_);
+  DCHECK_EQ(static_cast<CodeOffset>(write_pos - &code_buffer_[0]), starting_offset);
+
+  DCHECK_EQ(data_offset_, (code_buffer_.size() + 0x3) & ~0x3);
+
+  // Install literals
+  InstallLiteralPools();
+
+  // Install switch tables
+  InstallSwitchTables();
+
+  // Install fill array data
+  InstallFillArrayData();
+
+  // Create the mapping table and native offset to reference map.
+  cu_->NewTimingSplit("PcMappingTable");
+  CreateMappingTables();
+
+  cu_->NewTimingSplit("GcMap");
+  CreateNativeGcMap();
+}
+
+int Arm64Mir2Lir::GetInsnSize(LIR* lir) {
+  DCHECK(!IsPseudoLirOp(lir->opcode));
+  return EncodingMap[lir->opcode].size;
+}
+
+// Encode instruction bit pattern and assign offsets.
+uint32_t Arm64Mir2Lir::LinkFixupInsns(LIR* head_lir, LIR* tail_lir, uint32_t offset) {
+  LIR* end_lir = tail_lir->next;
+
+  LIR* last_fixup = NULL;
+  for (LIR* lir = head_lir; lir != end_lir; lir = NEXT_LIR(lir)) {
+    if (!lir->flags.is_nop) {
+      if (lir->flags.fixup != kFixupNone) {
+        if (!IsPseudoLirOp(lir->opcode)) {
+          lir->flags.size = EncodingMap[lir->opcode].size;
+          lir->flags.fixup = EncodingMap[lir->opcode].fixup;
+        } else if (UNLIKELY(lir->opcode == kPseudoPseudoAlign4)) {
+          lir->flags.size = (offset & 0x2);
+          lir->flags.fixup = kFixupAlign4;
+        } else {
+          lir->flags.size = 0;
+          lir->flags.fixup = kFixupLabel;
+        }
+        // Link into the fixup chain.
+        lir->flags.use_def_invalid = true;
+        lir->u.a.pcrel_next = NULL;
+        if (first_fixup_ == NULL) {
+          first_fixup_ = lir;
+        } else {
+          last_fixup->u.a.pcrel_next = lir;
+        }
+        last_fixup = lir;
+        lir->offset = offset;
+      }
+      offset += lir->flags.size;
+    }
+  }
+  return offset;
+}
+
+void Arm64Mir2Lir::AssignDataOffsets() {
+  /* Set up offsets for literals */
+  CodeOffset offset = data_offset_;
+
+  offset = AssignLiteralOffset(offset);
+
+  offset = AssignSwitchTablesOffset(offset);
+
+  total_size_ = AssignFillArrayDataOffset(offset);
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
new file mode 100644
index 0000000..9dfee6e
--- /dev/null
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -0,0 +1,477 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This file contains codegen for the Thumb2 ISA. */
+
+#include "arm64_lir.h"
+#include "codegen_arm64.h"
+#include "dex/quick/mir_to_lir-inl.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+
+namespace art {
+
+/*
+ * The sparse table in the literal pool is an array of <key,displacement>
+ * pairs.  For each set, we'll load them as a pair using ldmia.
+ * This means that the register number of the temp we use for the key
+ * must be lower than the reg for the displacement.
+ *
+ * The test loop will look something like:
+ *
+ *   adr   r_base, <table>
+ *   ldr   r_val, [rARM_SP, v_reg_off]
+ *   mov   r_idx, #table_size
+ * lp:
+ *   ldmia r_base!, {r_key, r_disp}
+ *   sub   r_idx, #1
+ *   cmp   r_val, r_key
+ *   ifeq
+ *   add   rARM_PC, r_disp   ; This is the branch from which we compute displacement
+ *   cbnz  r_idx, lp
+ */
+void Arm64Mir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset,
+                                 RegLocation rl_src) {
+  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
+  if (cu_->verbose) {
+    DumpSparseSwitchTable(table);
+  }
+  // Add the table to the list - we'll process it later
+  SwitchTable *tab_rec =
+      static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData));
+  tab_rec->table = table;
+  tab_rec->vaddr = current_dalvik_offset_;
+  uint32_t size = table[1];
+  tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR));
+  switch_tables_.Insert(tab_rec);
+
+  // Get the switch value
+  rl_src = LoadValue(rl_src, kCoreReg);
+  RegStorage r_base = AllocTemp();
+  /* Allocate key and disp temps */
+  RegStorage r_key = AllocTemp();
+  RegStorage r_disp = AllocTemp();
+  // Make sure r_key's register number is less than r_disp's number for ldmia
+  if (r_key.GetReg() > r_disp.GetReg()) {
+    RegStorage tmp = r_disp;
+    r_disp = r_key;
+    r_key = tmp;
+  }
+  // Materialize a pointer to the switch table
+  NewLIR3(kThumb2Adr, r_base.GetReg(), 0, WrapPointer(tab_rec));
+  // Set up r_idx
+  RegStorage r_idx = AllocTemp();
+  LoadConstant(r_idx, size);
+  // Establish loop branch target
+  LIR* target = NewLIR0(kPseudoTargetLabel);
+  // Load next key/disp
+  NewLIR2(kThumb2LdmiaWB, r_base.GetReg(), (1 << r_key.GetRegNum()) | (1 << r_disp.GetRegNum()));
+  OpRegReg(kOpCmp, r_key, rl_src.reg);
+  // Go if match. NOTE: No instruction set switch here - must stay Thumb2
+  LIR* it = OpIT(kCondEq, "");
+  LIR* switch_branch = NewLIR1(kThumb2AddPCR, r_disp.GetReg());
+  OpEndIT(it);
+  tab_rec->anchor = switch_branch;
+  // Needs to use setflags encoding here
+  OpRegRegImm(kOpSub, r_idx, r_idx, 1);  // For value == 1, this should set flags.
+  DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
+  OpCondBranch(kCondNe, target);
+}
+
+
+void Arm64Mir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset,
+                                 RegLocation rl_src) {
+  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
+  if (cu_->verbose) {
+    DumpPackedSwitchTable(table);
+  }
+  // Add the table to the list - we'll process it later
+  SwitchTable *tab_rec =
+      static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable),  kArenaAllocData));
+  tab_rec->table = table;
+  tab_rec->vaddr = current_dalvik_offset_;
+  uint32_t size = table[1];
+  tab_rec->targets =
+      static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR));
+  switch_tables_.Insert(tab_rec);
+
+  // Get the switch value
+  rl_src = LoadValue(rl_src, kCoreReg);
+  RegStorage table_base = AllocTemp();
+  // Materialize a pointer to the switch table
+  NewLIR3(kThumb2Adr, table_base.GetReg(), 0, WrapPointer(tab_rec));
+  int low_key = s4FromSwitchData(&table[2]);
+  RegStorage keyReg;
+  // Remove the bias, if necessary
+  if (low_key == 0) {
+    keyReg = rl_src.reg;
+  } else {
+    keyReg = AllocTemp();
+    OpRegRegImm(kOpSub, keyReg, rl_src.reg, low_key);
+  }
+  // Bounds check - if < 0 or >= size continue following switch
+  OpRegImm(kOpCmp, keyReg, size-1);
+  LIR* branch_over = OpCondBranch(kCondHi, NULL);
+
+  // Load the displacement from the switch table
+  RegStorage disp_reg = AllocTemp();
+  LoadBaseIndexed(table_base, keyReg, disp_reg, 2, k32);
+
+  // ..and go! NOTE: No instruction set switch here - must stay Thumb2
+  LIR* switch_branch = NewLIR1(kThumb2AddPCR, disp_reg.GetReg());
+  tab_rec->anchor = switch_branch;
+
+  /* branch_over target here */
+  LIR* target = NewLIR0(kPseudoTargetLabel);
+  branch_over->target = target;
+}
+
+/*
+ * Array data table format:
+ *  ushort ident = 0x0300   magic value
+ *  ushort width            width of each element in the table
+ *  uint   size             number of elements in the table
+ *  ubyte  data[size*width] table of data values (may contain a single-byte
+ *                          padding at the end)
+ *
+ * Total size is 4+(width * size + 1)/2 16-bit code units.
+ */
+void Arm64Mir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) {
+  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
+  // Add the table to the list - we'll process it later
+  FillArrayData *tab_rec =
+      static_cast<FillArrayData*>(arena_->Alloc(sizeof(FillArrayData), kArenaAllocData));
+  tab_rec->table = table;
+  tab_rec->vaddr = current_dalvik_offset_;
+  uint16_t width = tab_rec->table[1];
+  uint32_t size = tab_rec->table[2] | ((static_cast<uint32_t>(tab_rec->table[3])) << 16);
+  tab_rec->size = (size * width) + 8;
+
+  fill_array_data_.Insert(tab_rec);
+
+  // Making a call - use explicit registers
+  FlushAllRegs();   /* Everything to home location */
+  LoadValueDirectFixed(rl_src, rs_r0);
+  LoadWordDisp(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pHandleFillArrayData).Int32Value(),
+               rs_rARM_LR);
+  // Materialize a pointer to the fill data image
+  NewLIR3(kThumb2Adr, rs_r1.GetReg(), 0, WrapPointer(tab_rec));
+  ClobberCallerSave();
+  LIR* call_inst = OpReg(kOpBlx, rs_rARM_LR);
+  MarkSafepointPC(call_inst);
+}
+
+/*
+ * Handle unlocked -> thin locked transition inline or else call out to quick entrypoint. For more
+ * details see monitor.cc.
+ */
+void Arm64Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
+  FlushAllRegs();
+  // FIXME: need separate LoadValues for object references.
+  LoadValueDirectFixed(rl_src, rs_r0);  // Get obj
+  LockCallTemps();  // Prepare for explicit register usage
+  constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
+  if (kArchVariantHasGoodBranchPredictor) {
+    LIR* null_check_branch = nullptr;
+    if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
+      null_check_branch = nullptr;  // No null check.
+    } else {
+      // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
+      if (Runtime::Current()->ExplicitNullChecks()) {
+        null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
+      }
+    }
+    Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
+    NewLIR3(kThumb2Ldrex, rs_r1.GetReg(), rs_r0.GetReg(),
+        mirror::Object::MonitorOffset().Int32Value() >> 2);
+    MarkPossibleNullPointerException(opt_flags);
+    LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_r1, 0, NULL);
+    NewLIR4(kThumb2Strex, rs_r1.GetReg(), rs_r2.GetReg(), rs_r0.GetReg(),
+        mirror::Object::MonitorOffset().Int32Value() >> 2);
+    LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_r1, 0, NULL);
+
+
+    LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
+    not_unlocked_branch->target = slow_path_target;
+    if (null_check_branch != nullptr) {
+      null_check_branch->target = slow_path_target;
+    }
+    // TODO: move to a slow path.
+    // Go expensive route - artLockObjectFromCode(obj);
+    LoadWordDisp(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pLockObject).Int32Value(), rs_rARM_LR);
+    ClobberCallerSave();
+    LIR* call_inst = OpReg(kOpBlx, rs_rARM_LR);
+    MarkSafepointPC(call_inst);
+
+    LIR* success_target = NewLIR0(kPseudoTargetLabel);
+    lock_success_branch->target = success_target;
+    GenMemBarrier(kLoadLoad);
+  } else {
+    // Explicit null-check as slow-path is entered using an IT.
+    GenNullCheck(rs_r0, opt_flags);
+    Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
+    NewLIR3(kThumb2Ldrex, rs_r1.GetReg(), rs_r0.GetReg(),
+        mirror::Object::MonitorOffset().Int32Value() >> 2);
+    MarkPossibleNullPointerException(opt_flags);
+    OpRegImm(kOpCmp, rs_r1, 0);
+    LIR* it = OpIT(kCondEq, "");
+    NewLIR4(kThumb2Strex/*eq*/, rs_r1.GetReg(), rs_r2.GetReg(), rs_r0.GetReg(),
+        mirror::Object::MonitorOffset().Int32Value() >> 2);
+    OpEndIT(it);
+    OpRegImm(kOpCmp, rs_r1, 0);
+    it = OpIT(kCondNe, "T");
+    // Go expensive route - artLockObjectFromCode(self, obj);
+    LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pLockObject).Int32Value(),
+                       rs_rARM_LR);
+    ClobberCallerSave();
+    LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rARM_LR);
+    OpEndIT(it);
+    MarkSafepointPC(call_inst);
+    GenMemBarrier(kLoadLoad);
+  }
+}
+
+/*
+ * Handle thin locked -> unlocked transition inline or else call out to quick entrypoint. For more
+ * details see monitor.cc. Note the code below doesn't use ldrex/strex as the code holds the lock
+ * and can only give away ownership if its suspended.
+ */
+void Arm64Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
+  FlushAllRegs();
+  LoadValueDirectFixed(rl_src, rs_r0);  // Get obj
+  LockCallTemps();  // Prepare for explicit register usage
+  LIR* null_check_branch = nullptr;
+  Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
+  constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
+  if (kArchVariantHasGoodBranchPredictor) {
+    if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
+      null_check_branch = nullptr;  // No null check.
+    } else {
+      // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
+      if (Runtime::Current()->ExplicitNullChecks()) {
+        null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
+      }
+    }
+    Load32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1);
+    MarkPossibleNullPointerException(opt_flags);
+    LoadConstantNoClobber(rs_r3, 0);
+    LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_r1, rs_r2, NULL);
+    Store32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
+    LIR* unlock_success_branch = OpUnconditionalBranch(NULL);
+
+    LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
+    slow_unlock_branch->target = slow_path_target;
+    if (null_check_branch != nullptr) {
+      null_check_branch->target = slow_path_target;
+    }
+    // TODO: move to a slow path.
+    // Go expensive route - artUnlockObjectFromCode(obj);
+    LoadWordDisp(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject).Int32Value(), rs_rARM_LR);
+    ClobberCallerSave();
+    LIR* call_inst = OpReg(kOpBlx, rs_rARM_LR);
+    MarkSafepointPC(call_inst);
+
+    LIR* success_target = NewLIR0(kPseudoTargetLabel);
+    unlock_success_branch->target = success_target;
+    GenMemBarrier(kStoreLoad);
+  } else {
+    // Explicit null-check as slow-path is entered using an IT.
+    GenNullCheck(rs_r0, opt_flags);
+    Load32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1);  // Get lock
+    MarkPossibleNullPointerException(opt_flags);
+    Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
+    LoadConstantNoClobber(rs_r3, 0);
+    // Is lock unheld on lock or held by us (==thread_id) on unlock?
+    OpRegReg(kOpCmp, rs_r1, rs_r2);
+    LIR* it = OpIT(kCondEq, "EE");
+    Store32Disp/*eq*/(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
+    // Go expensive route - UnlockObjectFromCode(obj);
+    LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject).Int32Value(),
+                       rs_rARM_LR);
+    ClobberCallerSave();
+    LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rARM_LR);
+    OpEndIT(it);
+    MarkSafepointPC(call_inst);
+    GenMemBarrier(kStoreLoad);
+  }
+}
+
+void Arm64Mir2Lir::GenMoveException(RegLocation rl_dest) {
+  int ex_offset = Thread::ExceptionOffset<4>().Int32Value();
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  RegStorage reset_reg = AllocTemp();
+  Load32Disp(rs_rARM_SELF, ex_offset, rl_result.reg);
+  LoadConstant(reset_reg, 0);
+  Store32Disp(rs_rARM_SELF, ex_offset, reset_reg);
+  FreeTemp(reset_reg);
+  StoreValue(rl_dest, rl_result);
+}
+
+/*
+ * Mark garbage collection card. Skip if the value we're storing is null.
+ */
+void Arm64Mir2Lir::MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) {
+  RegStorage reg_card_base = AllocTemp();
+  RegStorage reg_card_no = AllocTemp();
+  LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL);
+  LoadWordDisp(rs_rARM_SELF, Thread::CardTableOffset<4>().Int32Value(), reg_card_base);
+  OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
+  StoreBaseIndexed(reg_card_base, reg_card_no, reg_card_base, 0, kUnsignedByte);
+  LIR* target = NewLIR0(kPseudoTargetLabel);
+  branch_over->target = target;
+  FreeTemp(reg_card_base);
+  FreeTemp(reg_card_no);
+}
+
+void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
+  int spill_count = num_core_spills_ + num_fp_spills_;
+  /*
+   * On entry, r0, r1, r2 & r3 are live.  Let the register allocation
+   * mechanism know so it doesn't try to use any of them when
+   * expanding the frame or flushing.  This leaves the utility
+   * code with a single temp: r12.  This should be enough.
+   */
+  LockTemp(rs_r0);
+  LockTemp(rs_r1);
+  LockTemp(rs_r2);
+  LockTemp(rs_r3);
+
+  /*
+   * We can safely skip the stack overflow check if we're
+   * a leaf *and* our frame size < fudge factor.
+   */
+  bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
+                            (static_cast<size_t>(frame_size_) <
+                            Thread::kStackOverflowReservedBytes));
+  NewLIR0(kPseudoMethodEntry);
+  if (!skip_overflow_check) {
+    if (Runtime::Current()->ExplicitStackOverflowChecks()) {
+      /* Load stack limit */
+      Load32Disp(rs_rARM_SELF, Thread::StackEndOffset<4>().Int32Value(), rs_r12);
+    }
+  }
+  /* Spill core callee saves */
+  NewLIR1(kThumb2Push, core_spill_mask_);
+  /* Need to spill any FP regs? */
+  if (num_fp_spills_) {
+    /*
+     * NOTE: fp spills are a little different from core spills in that
+     * they are pushed as a contiguous block.  When promoting from
+     * the fp set, we must allocate all singles from s16..highest-promoted
+     */
+    NewLIR1(kThumb2VPushCS, num_fp_spills_);
+  }
+
+  const int spill_size = spill_count * 4;
+  const int frame_size_without_spills = frame_size_ - spill_size;
+  if (!skip_overflow_check) {
+    if (Runtime::Current()->ExplicitStackOverflowChecks()) {
+      class StackOverflowSlowPath : public LIRSlowPath {
+       public:
+        StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, bool restore_lr, size_t sp_displace)
+            : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr), restore_lr_(restore_lr),
+              sp_displace_(sp_displace) {
+        }
+        void Compile() OVERRIDE {
+          m2l_->ResetRegPool();
+          m2l_->ResetDefTracking();
+          GenerateTargetLabel(kPseudoThrowTarget);
+          if (restore_lr_) {
+            m2l_->LoadWordDisp(rs_rARM_SP, sp_displace_ - 4, rs_rARM_LR);
+          }
+          m2l_->OpRegImm(kOpAdd, rs_rARM_SP, sp_displace_);
+          m2l_->ClobberCallerSave();
+          ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pThrowStackOverflow);
+          // Load the entrypoint directly into the pc instead of doing a load + branch. Assumes
+          // codegen and target are in thumb2 mode.
+          // NOTE: native pointer.
+          m2l_->LoadWordDisp(rs_rARM_SELF, func_offset.Int32Value(), rs_rARM_PC);
+        }
+
+       private:
+        const bool restore_lr_;
+        const size_t sp_displace_;
+      };
+      if (static_cast<size_t>(frame_size_) > Thread::kStackOverflowReservedUsableBytes) {
+        OpRegRegImm(kOpSub, rs_rARM_LR, rs_rARM_SP, frame_size_without_spills);
+        LIR* branch = OpCmpBranch(kCondUlt, rs_rARM_LR, rs_r12, nullptr);
+        // Need to restore LR since we used it as a temp.
+        AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, true, spill_size));
+        OpRegCopy(rs_rARM_SP, rs_rARM_LR);     // Establish stack
+      } else {
+        // If the frame is small enough we are guaranteed to have enough space that remains to
+        // handle signals on the user stack.
+        OpRegRegImm(kOpSub, rs_rARM_SP, rs_rARM_SP, frame_size_without_spills);
+        LIR* branch = OpCmpBranch(kCondUlt, rs_rARM_SP, rs_r12, nullptr);
+        AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, false, frame_size_));
+      }
+    } else {
+      // Implicit stack overflow check.
+      // Generate a load from [sp, #-overflowsize].  If this is in the stack
+      // redzone we will get a segmentation fault.
+      //
+      // Caveat coder: if someone changes the kStackOverflowReservedBytes value
+      // we need to make sure that it's loadable in an immediate field of
+      // a sub instruction.  Otherwise we will get a temp allocation and the
+      // code size will increase.
+      OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, Thread::kStackOverflowReservedBytes);
+      Load32Disp(rs_r12, 0, rs_r12);
+      MarkPossibleStackOverflowException();
+      OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
+    }
+  } else {
+    OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
+  }
+
+  FlushIns(ArgLocs, rl_method);
+
+  FreeTemp(rs_r0);
+  FreeTemp(rs_r1);
+  FreeTemp(rs_r2);
+  FreeTemp(rs_r3);
+}
+
+void Arm64Mir2Lir::GenExitSequence() {
+  int spill_count = num_core_spills_ + num_fp_spills_;
+  /*
+   * In the exit path, r0/r1 are live - make sure they aren't
+   * allocated by the register utilities as temps.
+   */
+  LockTemp(rs_r0);
+  LockTemp(rs_r1);
+
+  NewLIR0(kPseudoMethodExit);
+  OpRegImm(kOpAdd, rs_rARM_SP, frame_size_ - (spill_count * 4));
+  /* Need to restore any FP callee saves? */
+  if (num_fp_spills_) {
+    NewLIR1(kThumb2VPopCS, num_fp_spills_);
+  }
+  if (core_spill_mask_ & (1 << rs_rARM_LR.GetRegNum())) {
+    /* Unspill rARM_LR to rARM_PC */
+    core_spill_mask_ &= ~(1 << rs_rARM_LR.GetRegNum());
+    core_spill_mask_ |= (1 << rs_rARM_PC.GetRegNum());
+  }
+  NewLIR1(kThumb2Pop, core_spill_mask_);
+  if (!(core_spill_mask_ & (1 << rs_rARM_PC.GetRegNum()))) {
+    /* We didn't pop to rARM_PC, so must do a bv rARM_LR */
+    NewLIR1(kThumbBx, rs_rARM_LR.GetReg());
+  }
+}
+
+void Arm64Mir2Lir::GenSpecialExitSequence() {
+  NewLIR1(kThumbBx, rs_rARM_LR.GetReg());
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
new file mode 100644
index 0000000..94c2563
--- /dev/null
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -0,0 +1,212 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_QUICK_ARM64_CODEGEN_ARM64_H_
+#define ART_COMPILER_DEX_QUICK_ARM64_CODEGEN_ARM64_H_
+
+#include "arm64_lir.h"
+#include "dex/compiler_internals.h"
+
+namespace art {
+
+class Arm64Mir2Lir FINAL : public Mir2Lir {
+  public:
+    Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
+
+    // Required for target - codegen helpers.
+    bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src,
+                            RegLocation rl_dest, int lit);
+    bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
+    LIR* CheckSuspendUsingLoad() OVERRIDE;
+    RegStorage LoadHelper(ThreadOffset<4> offset);
+    LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size,
+                      int s_reg);
+    LIR* LoadBaseDispWide(RegStorage r_base, int displacement, RegStorage r_dest, int s_reg);
+    LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale,
+                         OpSize size);
+    LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
+                             RegStorage r_dest, OpSize size, int s_reg);
+    LIR* LoadConstantNoClobber(RegStorage r_dest, int value);
+    LIR* LoadConstantWide(RegStorage r_dest, int64_t value);
+    LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, OpSize size);
+    LIR* StoreBaseDispWide(RegStorage r_base, int displacement, RegStorage r_src);
+    LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale,
+                          OpSize size);
+    LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
+                              RegStorage r_src, OpSize size, int s_reg);
+    void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
+
+    // Required for target - register utilities.
+    RegStorage AllocTypedTemp(bool fp_hint, int reg_class);
+    RegStorage AllocTypedTempWide(bool fp_hint, int reg_class);
+    RegStorage TargetReg(SpecialTargetRegister reg);
+    RegStorage GetArgMappingToPhysicalReg(int arg_num);
+    RegLocation GetReturnAlt();
+    RegLocation GetReturnWideAlt();
+    RegLocation LocCReturn();
+    RegLocation LocCReturnDouble();
+    RegLocation LocCReturnFloat();
+    RegLocation LocCReturnWide();
+    uint64_t GetRegMaskCommon(RegStorage reg);
+    void AdjustSpillMask();
+    void ClobberCallerSave();
+    void FreeCallTemps();
+    void FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free);
+    void LockCallTemps();
+    void MarkPreservedSingle(int v_reg, RegStorage reg);
+    void MarkPreservedDouble(int v_reg, RegStorage reg);
+    void CompilerInitializeRegAlloc();
+    RegStorage AllocPreservedDouble(int s_reg);
+
+    // Required for target - miscellaneous.
+    void AssembleLIR();
+    uint32_t LinkFixupInsns(LIR* head_lir, LIR* tail_lir, CodeOffset offset);
+    int AssignInsnOffsets();
+    void AssignOffsets();
+    static uint8_t* EncodeLIRs(uint8_t* write_pos, LIR* lir);
+    void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix);
+    void SetupTargetResourceMasks(LIR* lir, uint64_t flags);
+    const char* GetTargetInstFmt(int opcode);
+    const char* GetTargetInstName(int opcode);
+    std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr);
+    uint64_t GetPCUseDefEncoding();
+    uint64_t GetTargetInstFlags(int opcode);
+    int GetInsnSize(LIR* lir);
+    bool IsUnconditionalBranch(LIR* lir);
+
+    // Required for target - Dalvik-level generators.
+    void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
+                           RegLocation rl_src1, RegLocation rl_src2);
+    void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
+                     RegLocation rl_index, RegLocation rl_dest, int scale);
+    void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index,
+                     RegLocation rl_src, int scale, bool card_mark);
+    void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
+                           RegLocation rl_src1, RegLocation rl_shift);
+    void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                    RegLocation rl_src2);
+    void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                    RegLocation rl_src2);
+    void GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                    RegLocation rl_src2);
+    void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                          RegLocation rl_src2);
+    void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                         RegLocation rl_src2);
+    void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                  RegLocation rl_src2);
+    void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
+    bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
+    bool GenInlinedMinMaxInt(CallInfo* info, bool is_min);
+    bool GenInlinedSqrt(CallInfo* info);
+    bool GenInlinedPeek(CallInfo* info, OpSize size);
+    bool GenInlinedPoke(CallInfo* info, OpSize size);
+    void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
+    void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                   RegLocation rl_src2);
+    void GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                    RegLocation rl_src2);
+    void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                    RegLocation rl_src2);
+    RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div);
+    RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div);
+    void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenDivZeroCheckWide(RegStorage reg);
+    void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method);
+    void GenExitSequence();
+    void GenSpecialExitSequence();
+    void GenFillArrayData(DexOffset table_offset, RegLocation rl_src);
+    void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double);
+    void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
+    void GenSelect(BasicBlock* bb, MIR* mir);
+    void GenMemBarrier(MemBarrierKind barrier_kind);
+    void GenMonitorEnter(int opt_flags, RegLocation rl_src);
+    void GenMonitorExit(int opt_flags, RegLocation rl_src);
+    void GenMoveException(RegLocation rl_dest);
+    void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit,
+                                       int first_bit, int second_bit);
+    void GenNegDouble(RegLocation rl_dest, RegLocation rl_src);
+    void GenNegFloat(RegLocation rl_dest, RegLocation rl_src);
+    void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
+    void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
+
+    // Required for target - single operation generators.
+    LIR* OpUnconditionalBranch(LIR* target);
+    LIR* OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target);
+    LIR* OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target);
+    LIR* OpCondBranch(ConditionCode cc, LIR* target);
+    LIR* OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target);
+    LIR* OpFpRegCopy(RegStorage r_dest, RegStorage r_src);
+    LIR* OpIT(ConditionCode cond, const char* guide);
+    void OpEndIT(LIR* it);
+    LIR* OpMem(OpKind op, RegStorage r_base, int disp);
+    LIR* OpPcRelLoad(RegStorage reg, LIR* target);
+    LIR* OpReg(OpKind op, RegStorage r_dest_src);
+    void OpRegCopy(RegStorage r_dest, RegStorage r_src);
+    LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src);
+    LIR* OpRegImm(OpKind op, RegStorage r_dest_src1, int value);
+    LIR* OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset);
+    LIR* OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2);
+    LIR* OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type);
+    LIR* OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type);
+    LIR* OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src);
+    LIR* OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value);
+    LIR* OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2);
+    LIR* OpTestSuspend(LIR* target);
+    LIR* OpThreadMem(OpKind op, ThreadOffset<4> thread_offset);
+    LIR* OpVldm(RegStorage r_base, int count);
+    LIR* OpVstm(RegStorage r_base, int count);
+    void OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset);
+    void OpRegCopyWide(RegStorage dest, RegStorage src);
+    void OpTlsCmp(ThreadOffset<4> offset, int val);
+
+    LIR* LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size,
+                          int s_reg);
+    LIR* StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src, OpSize size);
+    LIR* OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2,
+                          int shift);
+    LIR* OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift);
+    static const ArmEncodingMap EncodingMap[kArmLast];
+    int EncodeShift(int code, int amount);
+    int ModifiedImmediate(uint32_t value);
+    ArmConditionCode ArmConditionEncoding(ConditionCode code);
+    bool InexpensiveConstantInt(int32_t value);
+    bool InexpensiveConstantFloat(int32_t value);
+    bool InexpensiveConstantLong(int64_t value);
+    bool InexpensiveConstantDouble(int64_t value);
+
+  private:
+    void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val,
+                                  ConditionCode ccode);
+    LIR* LoadFPConstantValue(int r_dest, int value);
+    void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
+    void InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
+    void AssignDataOffsets();
+    RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
+                          bool is_div, bool check_zero);
+    RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div);
+    typedef struct {
+      OpKind op;
+      uint32_t shift;
+    } EasyMultiplyOp;
+    bool GetEasyMultiplyOp(int lit, EasyMultiplyOp* op);
+    bool GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops);
+    void GenEasyMultiplyTwoOps(RegStorage r_dest, RegStorage r_src, EasyMultiplyOp* ops);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_QUICK_ARM64_CODEGEN_ARM64_H_
diff --git a/compiler/dex/quick/arm64/create.sh b/compiler/dex/quick/arm64/create.sh
new file mode 100644
index 0000000..a3833bd
--- /dev/null
+++ b/compiler/dex/quick/arm64/create.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+set -e
+
+if [ ! -d ./arm ]; then
+  echo "Directory ./arm not found."
+  exit 1
+fi
+
+mkdir -p arm64
+dst=`cd arm64 && pwd`
+cd arm/
+for f in *; do
+  cp $f $dst/`echo $f | sed 's/arm/arm64/g'`
+done
+
+sed -i 's,ART_COMPILER_DEX_QUICK_ARM_ARM_LIR_H_,ART_COMPILER_DEX_QUICK_ARM64_ARM64_LIR_H_,g' $dst/arm64_lir.h
+sed -i 's,ART_COMPILER_DEX_QUICK_ARM_CODEGEN_ARM_H_,ART_COMPILER_DEX_QUICK_ARM64_CODEGEN_ARM64_H_,g' $dst/codegen_arm64.h
+sed -i -e 's,ArmMir2Lir,Arm64Mir2Lir,g' -e 's,arm_lir.h,arm64_lir.h,g' -e 's,codegen_arm.h,codegen_arm64.h,g' $dst/*.h $dst/*.cc
diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc
new file mode 100644
index 0000000..9684283
--- /dev/null
+++ b/compiler/dex/quick/arm64/fp_arm64.cc
@@ -0,0 +1,358 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm64_lir.h"
+#include "codegen_arm64.h"
+#include "dex/quick/mir_to_lir-inl.h"
+
+namespace art {
+
+void Arm64Mir2Lir::GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest,
+                                 RegLocation rl_src1, RegLocation rl_src2) {
+  int op = kThumbBkpt;
+  RegLocation rl_result;
+
+  /*
+   * Don't attempt to optimize register usage since these opcodes call out to
+   * the handlers.
+   */
+  switch (opcode) {
+    case Instruction::ADD_FLOAT_2ADDR:
+    case Instruction::ADD_FLOAT:
+      op = kThumb2Vadds;
+      break;
+    case Instruction::SUB_FLOAT_2ADDR:
+    case Instruction::SUB_FLOAT:
+      op = kThumb2Vsubs;
+      break;
+    case Instruction::DIV_FLOAT_2ADDR:
+    case Instruction::DIV_FLOAT:
+      op = kThumb2Vdivs;
+      break;
+    case Instruction::MUL_FLOAT_2ADDR:
+    case Instruction::MUL_FLOAT:
+      op = kThumb2Vmuls;
+      break;
+    case Instruction::REM_FLOAT_2ADDR:
+    case Instruction::REM_FLOAT:
+      FlushAllRegs();   // Send everything to home location
+      CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmodf), rl_src1, rl_src2,
+                                              false);
+      rl_result = GetReturn(true);
+      StoreValue(rl_dest, rl_result);
+      return;
+    case Instruction::NEG_FLOAT:
+      GenNegFloat(rl_dest, rl_src1);
+      return;
+    default:
+      LOG(FATAL) << "Unexpected opcode: " << opcode;
+  }
+  rl_src1 = LoadValue(rl_src1, kFPReg);
+  rl_src2 = LoadValue(rl_src2, kFPReg);
+  rl_result = EvalLoc(rl_dest, kFPReg, true);
+  NewLIR3(op, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+  StoreValue(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenArithOpDouble(Instruction::Code opcode,
+                                  RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
+  int op = kThumbBkpt;
+  RegLocation rl_result;
+
+  switch (opcode) {
+    case Instruction::ADD_DOUBLE_2ADDR:
+    case Instruction::ADD_DOUBLE:
+      op = kThumb2Vaddd;
+      break;
+    case Instruction::SUB_DOUBLE_2ADDR:
+    case Instruction::SUB_DOUBLE:
+      op = kThumb2Vsubd;
+      break;
+    case Instruction::DIV_DOUBLE_2ADDR:
+    case Instruction::DIV_DOUBLE:
+      op = kThumb2Vdivd;
+      break;
+    case Instruction::MUL_DOUBLE_2ADDR:
+    case Instruction::MUL_DOUBLE:
+      op = kThumb2Vmuld;
+      break;
+    case Instruction::REM_DOUBLE_2ADDR:
+    case Instruction::REM_DOUBLE:
+      FlushAllRegs();   // Send everything to home location
+      CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmod), rl_src1, rl_src2,
+                                              false);
+      rl_result = GetReturnWide(true);
+      StoreValueWide(rl_dest, rl_result);
+      return;
+    case Instruction::NEG_DOUBLE:
+      GenNegDouble(rl_dest, rl_src1);
+      return;
+    default:
+      LOG(FATAL) << "Unexpected opcode: " << opcode;
+  }
+
+  rl_src1 = LoadValueWide(rl_src1, kFPReg);
+  DCHECK(rl_src1.wide);
+  rl_src2 = LoadValueWide(rl_src2, kFPReg);
+  DCHECK(rl_src2.wide);
+  rl_result = EvalLoc(rl_dest, kFPReg, true);
+  DCHECK(rl_dest.wide);
+  DCHECK(rl_result.wide);
+  NewLIR3(op, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+  StoreValueWide(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src) {
+  int op = kThumbBkpt;
+  int src_reg;
+  RegLocation rl_result;
+
+  switch (opcode) {
+    case Instruction::INT_TO_FLOAT:
+      op = kThumb2VcvtIF;
+      break;
+    case Instruction::FLOAT_TO_INT:
+      op = kThumb2VcvtFI;
+      break;
+    case Instruction::DOUBLE_TO_FLOAT:
+      op = kThumb2VcvtDF;
+      break;
+    case Instruction::FLOAT_TO_DOUBLE:
+      op = kThumb2VcvtFd;
+      break;
+    case Instruction::INT_TO_DOUBLE:
+      op = kThumb2VcvtF64S32;
+      break;
+    case Instruction::DOUBLE_TO_INT:
+      op = kThumb2VcvtDI;
+      break;
+    case Instruction::LONG_TO_DOUBLE: {
+      rl_src = LoadValueWide(rl_src, kFPReg);
+      RegStorage src_low = rl_src.reg.DoubleToLowSingle();
+      RegStorage src_high = rl_src.reg.DoubleToHighSingle();
+      rl_result = EvalLoc(rl_dest, kFPReg, true);
+      RegStorage tmp1 = AllocTempDouble();
+      RegStorage tmp2 = AllocTempDouble();
+
+      NewLIR2(kThumb2VcvtF64S32, tmp1.GetReg(), src_high.GetReg());
+      NewLIR2(kThumb2VcvtF64U32, rl_result.reg.GetReg(), src_low.GetReg());
+      LoadConstantWide(tmp2, 0x41f0000000000000LL);
+      NewLIR3(kThumb2VmlaF64, rl_result.reg.GetReg(), tmp1.GetReg(), tmp2.GetReg());
+      FreeTemp(tmp1);
+      FreeTemp(tmp2);
+      StoreValueWide(rl_dest, rl_result);
+      return;
+    }
+    case Instruction::FLOAT_TO_LONG:
+      GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pF2l), rl_dest, rl_src);
+      return;
+    case Instruction::LONG_TO_FLOAT: {
+      rl_src = LoadValueWide(rl_src, kFPReg);
+      RegStorage src_low = rl_src.reg.DoubleToLowSingle();
+      RegStorage src_high = rl_src.reg.DoubleToHighSingle();
+      rl_result = EvalLoc(rl_dest, kFPReg, true);
+      // Allocate temp registers.
+      RegStorage high_val = AllocTempDouble();
+      RegStorage low_val = AllocTempDouble();
+      RegStorage const_val = AllocTempDouble();
+      // Long to double.
+      NewLIR2(kThumb2VcvtF64S32, high_val.GetReg(), src_high.GetReg());
+      NewLIR2(kThumb2VcvtF64U32, low_val.GetReg(), src_low.GetReg());
+      LoadConstantWide(const_val, INT64_C(0x41f0000000000000));
+      NewLIR3(kThumb2VmlaF64, low_val.GetReg(), high_val.GetReg(), const_val.GetReg());
+      // Double to float.
+      NewLIR2(kThumb2VcvtDF, rl_result.reg.GetReg(), low_val.GetReg());
+      // Free temp registers.
+      FreeTemp(high_val);
+      FreeTemp(low_val);
+      FreeTemp(const_val);
+      // Store result.
+      StoreValue(rl_dest, rl_result);
+      return;
+    }
+    case Instruction::DOUBLE_TO_LONG:
+      GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pD2l), rl_dest, rl_src);
+      return;
+    default:
+      LOG(FATAL) << "Unexpected opcode: " << opcode;
+  }
+  if (rl_src.wide) {
+    rl_src = LoadValueWide(rl_src, kFPReg);
+    src_reg = rl_src.reg.GetReg();
+  } else {
+    rl_src = LoadValue(rl_src, kFPReg);
+    src_reg = rl_src.reg.GetReg();
+  }
+  if (rl_dest.wide) {
+    rl_result = EvalLoc(rl_dest, kFPReg, true);
+    NewLIR2(op, rl_result.reg.GetReg(), src_reg);
+    StoreValueWide(rl_dest, rl_result);
+  } else {
+    rl_result = EvalLoc(rl_dest, kFPReg, true);
+    NewLIR2(op, rl_result.reg.GetReg(), src_reg);
+    StoreValue(rl_dest, rl_result);
+  }
+}
+
+void Arm64Mir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias,
+                                     bool is_double) {
+  LIR* target = &block_label_list_[bb->taken];
+  RegLocation rl_src1;
+  RegLocation rl_src2;
+  if (is_double) {
+    rl_src1 = mir_graph_->GetSrcWide(mir, 0);
+    rl_src2 = mir_graph_->GetSrcWide(mir, 2);
+    rl_src1 = LoadValueWide(rl_src1, kFPReg);
+    rl_src2 = LoadValueWide(rl_src2, kFPReg);
+    NewLIR2(kThumb2Vcmpd, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+  } else {
+    rl_src1 = mir_graph_->GetSrc(mir, 0);
+    rl_src2 = mir_graph_->GetSrc(mir, 1);
+    rl_src1 = LoadValue(rl_src1, kFPReg);
+    rl_src2 = LoadValue(rl_src2, kFPReg);
+    NewLIR2(kThumb2Vcmps, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+  }
+  NewLIR0(kThumb2Fmstat);
+  ConditionCode ccode = mir->meta.ccode;
+  switch (ccode) {
+    case kCondEq:
+    case kCondNe:
+      break;
+    case kCondLt:
+      if (gt_bias) {
+        ccode = kCondMi;
+      }
+      break;
+    case kCondLe:
+      if (gt_bias) {
+        ccode = kCondLs;
+      }
+      break;
+    case kCondGt:
+      if (gt_bias) {
+        ccode = kCondHi;
+      }
+      break;
+    case kCondGe:
+      if (gt_bias) {
+        ccode = kCondUge;
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unexpected ccode: " << ccode;
+  }
+  OpCondBranch(ccode, target);
+}
+
+
+void Arm64Mir2Lir::GenCmpFP(Instruction::Code opcode, RegLocation rl_dest,
+                          RegLocation rl_src1, RegLocation rl_src2) {
+  bool is_double = false;
+  int default_result = -1;
+  RegLocation rl_result;
+
+  switch (opcode) {
+    case Instruction::CMPL_FLOAT:
+      is_double = false;
+      default_result = -1;
+      break;
+    case Instruction::CMPG_FLOAT:
+      is_double = false;
+      default_result = 1;
+      break;
+    case Instruction::CMPL_DOUBLE:
+      is_double = true;
+      default_result = -1;
+      break;
+    case Instruction::CMPG_DOUBLE:
+      is_double = true;
+      default_result = 1;
+      break;
+    default:
+      LOG(FATAL) << "Unexpected opcode: " << opcode;
+  }
+  if (is_double) {
+    rl_src1 = LoadValueWide(rl_src1, kFPReg);
+    rl_src2 = LoadValueWide(rl_src2, kFPReg);
+    // In case result vreg is also a src vreg, break association to avoid useless copy by EvalLoc()
+    ClobberSReg(rl_dest.s_reg_low);
+    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    LoadConstant(rl_result.reg, default_result);
+    NewLIR2(kThumb2Vcmpd, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+  } else {
+    rl_src1 = LoadValue(rl_src1, kFPReg);
+    rl_src2 = LoadValue(rl_src2, kFPReg);
+    // In case result vreg is also a srcvreg, break association to avoid useless copy by EvalLoc()
+    ClobberSReg(rl_dest.s_reg_low);
+    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    LoadConstant(rl_result.reg, default_result);
+    NewLIR2(kThumb2Vcmps, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+  }
+  DCHECK(!rl_result.reg.IsFloat());
+  NewLIR0(kThumb2Fmstat);
+
+  LIR* it = OpIT((default_result == -1) ? kCondGt : kCondMi, "");
+  NewLIR2(kThumb2MovI8M, rl_result.reg.GetReg(),
+          ModifiedImmediate(-default_result));  // Must not alter ccodes
+  OpEndIT(it);
+
+  it = OpIT(kCondEq, "");
+  LoadConstant(rl_result.reg, 0);
+  OpEndIT(it);
+
+  StoreValue(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenNegFloat(RegLocation rl_dest, RegLocation rl_src) {
+  RegLocation rl_result;
+  rl_src = LoadValue(rl_src, kFPReg);
+  rl_result = EvalLoc(rl_dest, kFPReg, true);
+  NewLIR2(kThumb2Vnegs, rl_result.reg.GetReg(), rl_src.reg.GetReg());
+  StoreValue(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) {
+  RegLocation rl_result;
+  rl_src = LoadValueWide(rl_src, kFPReg);
+  rl_result = EvalLoc(rl_dest, kFPReg, true);
+  NewLIR2(kThumb2Vnegd, rl_result.reg.GetReg(), rl_src.reg.GetReg());
+  StoreValueWide(rl_dest, rl_result);
+}
+
+bool Arm64Mir2Lir::GenInlinedSqrt(CallInfo* info) {
+  DCHECK_EQ(cu_->instruction_set, kThumb2);
+  LIR *branch;
+  RegLocation rl_src = info->args[0];
+  RegLocation rl_dest = InlineTargetWide(info);  // double place for result
+  rl_src = LoadValueWide(rl_src, kFPReg);
+  RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
+  NewLIR2(kThumb2Vsqrtd, rl_result.reg.GetReg(), rl_src.reg.GetReg());
+  NewLIR2(kThumb2Vcmpd, rl_result.reg.GetReg(), rl_result.reg.GetReg());
+  NewLIR0(kThumb2Fmstat);
+  branch = NewLIR2(kThumbBCond, 0, kArmCondEq);
+  ClobberCallerSave();
+  LockCallTemps();  // Using fixed registers
+  RegStorage r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(4, pSqrt));
+  NewLIR3(kThumb2Fmrrd, rs_r0.GetReg(), rs_r1.GetReg(), rl_src.reg.GetReg());
+  NewLIR1(kThumbBlxR, r_tgt.GetReg());
+  NewLIR3(kThumb2Fmdrr, rl_result.reg.GetReg(), rs_r0.GetReg(), rs_r1.GetReg());
+  branch->target = NewLIR0(kPseudoTargetLabel);
+  StoreValueWide(rl_dest, rl_result);
+  return true;
+}
+
+
+}  // namespace art
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
new file mode 100644
index 0000000..11fb765
--- /dev/null
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -0,0 +1,1460 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This file contains codegen for the Thumb2 ISA. */
+
+#include "arm64_lir.h"
+#include "codegen_arm64.h"
+#include "dex/quick/mir_to_lir-inl.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "mirror/array.h"
+
+namespace art {
+
+LIR* Arm64Mir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) {
+  OpRegReg(kOpCmp, src1, src2);
+  return OpCondBranch(cond, target);
+}
+
+/*
+ * Generate a Thumb2 IT instruction, which can nullify up to
+ * four subsequent instructions based on a condition and its
+ * inverse.  The condition applies to the first instruction, which
+ * is executed if the condition is met.  The string "guide" consists
+ * of 0 to 3 chars, and applies to the 2nd through 4th instruction.
+ * A "T" means the instruction is executed if the condition is
+ * met, and an "E" means the instruction is executed if the condition
+ * is not met.
+ */
+LIR* Arm64Mir2Lir::OpIT(ConditionCode ccode, const char* guide) {
+  int mask;
+  int mask3 = 0;
+  int mask2 = 0;
+  int mask1 = 0;
+  ArmConditionCode code = ArmConditionEncoding(ccode);
+  int cond_bit = code & 1;
+  int alt_bit = cond_bit ^ 1;
+
+  // Note: case fallthroughs intentional
+  switch (strlen(guide)) {
+    case 3:
+      mask1 = (guide[2] == 'T') ? cond_bit : alt_bit;
+    case 2:
+      mask2 = (guide[1] == 'T') ? cond_bit : alt_bit;
+    case 1:
+      mask3 = (guide[0] == 'T') ? cond_bit : alt_bit;
+      break;
+    case 0:
+      break;
+    default:
+      LOG(FATAL) << "OAT: bad case in OpIT";
+  }
+  mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) |
+       (1 << (3 - strlen(guide)));
+  return NewLIR2(kThumb2It, code, mask);
+}
+
+void Arm64Mir2Lir::OpEndIT(LIR* it) {
+  // TODO: use the 'it' pointer to do some checks with the LIR, for example
+  //       we could check that the number of instructions matches the mask
+  //       in the IT instruction.
+  CHECK(it != nullptr);
+  GenBarrier();
+}
+
+/*
+ * 64-bit 3way compare function.
+ *     mov   rX, #-1
+ *     cmp   op1hi, op2hi
+ *     blt   done
+ *     bgt   flip
+ *     sub   rX, op1lo, op2lo (treat as unsigned)
+ *     beq   done
+ *     ite   hi
+ *     mov(hi)   rX, #-1
+ *     mov(!hi)  rX, #1
+ * flip:
+ *     neg   rX
+ * done:
+ */
+void Arm64Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
+  LIR* target1;
+  LIR* target2;
+  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
+  RegStorage t_reg = AllocTemp();
+  LoadConstant(t_reg, -1);
+  OpRegReg(kOpCmp, rl_src1.reg.GetHigh(), rl_src2.reg.GetHigh());
+  LIR* branch1 = OpCondBranch(kCondLt, NULL);
+  LIR* branch2 = OpCondBranch(kCondGt, NULL);
+  OpRegRegReg(kOpSub, t_reg, rl_src1.reg.GetLow(), rl_src2.reg.GetLow());
+  LIR* branch3 = OpCondBranch(kCondEq, NULL);
+
+  LIR* it = OpIT(kCondHi, "E");
+  NewLIR2(kThumb2MovI8M, t_reg.GetReg(), ModifiedImmediate(-1));
+  LoadConstant(t_reg, 1);
+  OpEndIT(it);
+
+  target2 = NewLIR0(kPseudoTargetLabel);
+  OpRegReg(kOpNeg, t_reg, t_reg);
+
+  target1 = NewLIR0(kPseudoTargetLabel);
+
+  RegLocation rl_temp = LocCReturn();  // Just using as template, will change
+  rl_temp.reg.SetReg(t_reg.GetReg());
+  StoreValue(rl_dest, rl_temp);
+  FreeTemp(t_reg);
+
+  branch1->target = target1;
+  branch2->target = target2;
+  branch3->target = branch1->target;
+}
+
+void Arm64Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
+                                          int64_t val, ConditionCode ccode) {
+  int32_t val_lo = Low32Bits(val);
+  int32_t val_hi = High32Bits(val);
+  DCHECK_GE(ModifiedImmediate(val_lo), 0);
+  DCHECK_GE(ModifiedImmediate(val_hi), 0);
+  LIR* taken = &block_label_list_[bb->taken];
+  LIR* not_taken = &block_label_list_[bb->fall_through];
+  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+  RegStorage low_reg = rl_src1.reg.GetLow();
+  RegStorage high_reg = rl_src1.reg.GetHigh();
+
+  if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) {
+    RegStorage t_reg = AllocTemp();
+    NewLIR4(kThumb2OrrRRRs, t_reg.GetReg(), low_reg.GetReg(), high_reg.GetReg(), 0);
+    FreeTemp(t_reg);
+    OpCondBranch(ccode, taken);
+    return;
+  }
+
+  switch (ccode) {
+    case kCondEq:
+    case kCondNe:
+      OpCmpImmBranch(kCondNe, high_reg, val_hi, (ccode == kCondEq) ? not_taken : taken);
+      break;
+    case kCondLt:
+      OpCmpImmBranch(kCondLt, high_reg, val_hi, taken);
+      OpCmpImmBranch(kCondGt, high_reg, val_hi, not_taken);
+      ccode = kCondUlt;
+      break;
+    case kCondLe:
+      OpCmpImmBranch(kCondLt, high_reg, val_hi, taken);
+      OpCmpImmBranch(kCondGt, high_reg, val_hi, not_taken);
+      ccode = kCondLs;
+      break;
+    case kCondGt:
+      OpCmpImmBranch(kCondGt, high_reg, val_hi, taken);
+      OpCmpImmBranch(kCondLt, high_reg, val_hi, not_taken);
+      ccode = kCondHi;
+      break;
+    case kCondGe:
+      OpCmpImmBranch(kCondGt, high_reg, val_hi, taken);
+      OpCmpImmBranch(kCondLt, high_reg, val_hi, not_taken);
+      ccode = kCondUge;
+      break;
+    default:
+      LOG(FATAL) << "Unexpected ccode: " << ccode;
+  }
+  OpCmpImmBranch(ccode, low_reg, val_lo, taken);
+}
+
+void Arm64Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
+  RegLocation rl_result;
+  RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
+  RegLocation rl_dest = mir_graph_->GetDest(mir);
+  rl_src = LoadValue(rl_src, kCoreReg);
+  ConditionCode ccode = mir->meta.ccode;
+  if (mir->ssa_rep->num_uses == 1) {
+    // CONST case
+    int true_val = mir->dalvikInsn.vB;
+    int false_val = mir->dalvikInsn.vC;
+    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    // Change kCondNe to kCondEq for the special cases below.
+    if (ccode == kCondNe) {
+      ccode = kCondEq;
+      std::swap(true_val, false_val);
+    }
+    bool cheap_false_val = InexpensiveConstantInt(false_val);
+    if (cheap_false_val && ccode == kCondEq && (true_val == 0 || true_val == -1)) {
+      OpRegRegImm(kOpSub, rl_result.reg, rl_src.reg, -true_val);
+      DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
+      LIR* it = OpIT(true_val == 0 ? kCondNe : kCondUge, "");
+      LoadConstant(rl_result.reg, false_val);
+      OpEndIT(it);  // Add a scheduling barrier to keep the IT shadow intact
+    } else if (cheap_false_val && ccode == kCondEq && true_val == 1) {
+      OpRegRegImm(kOpRsub, rl_result.reg, rl_src.reg, 1);
+      DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
+      LIR* it = OpIT(kCondLs, "");
+      LoadConstant(rl_result.reg, false_val);
+      OpEndIT(it);  // Add a scheduling barrier to keep the IT shadow intact
+    } else if (cheap_false_val && InexpensiveConstantInt(true_val)) {
+      OpRegImm(kOpCmp, rl_src.reg, 0);
+      LIR* it = OpIT(ccode, "E");
+      LoadConstant(rl_result.reg, true_val);
+      LoadConstant(rl_result.reg, false_val);
+      OpEndIT(it);  // Add a scheduling barrier to keep the IT shadow intact
+    } else {
+      // Unlikely case - could be tuned.
+      RegStorage t_reg1 = AllocTemp();
+      RegStorage t_reg2 = AllocTemp();
+      LoadConstant(t_reg1, true_val);
+      LoadConstant(t_reg2, false_val);
+      OpRegImm(kOpCmp, rl_src.reg, 0);
+      LIR* it = OpIT(ccode, "E");
+      OpRegCopy(rl_result.reg, t_reg1);
+      OpRegCopy(rl_result.reg, t_reg2);
+      OpEndIT(it);  // Add a scheduling barrier to keep the IT shadow intact
+    }
+  } else {
+    // MOVE case
+    RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]];
+    RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]];
+    rl_true = LoadValue(rl_true, kCoreReg);
+    rl_false = LoadValue(rl_false, kCoreReg);
+    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    OpRegImm(kOpCmp, rl_src.reg, 0);
+    LIR* it = nullptr;
+    if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) {  // Is the "true" case already in place?
+      it = OpIT(NegateComparison(ccode), "");
+      OpRegCopy(rl_result.reg, rl_false.reg);
+    } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) {  // False case in place?
+      it = OpIT(ccode, "");
+      OpRegCopy(rl_result.reg, rl_true.reg);
+    } else {  // Normal - select between the two.
+      it = OpIT(ccode, "E");
+      OpRegCopy(rl_result.reg, rl_true.reg);
+      OpRegCopy(rl_result.reg, rl_false.reg);
+    }
+    OpEndIT(it);  // Add a scheduling barrier to keep the IT shadow intact
+  }
+  StoreValue(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
+  RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
+  RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
+  // Normalize such that if either operand is constant, src2 will be constant.
+  ConditionCode ccode = mir->meta.ccode;
+  if (rl_src1.is_const) {
+    std::swap(rl_src1, rl_src2);
+    ccode = FlipComparisonOrder(ccode);
+  }
+  if (rl_src2.is_const) {
+    RegLocation rl_temp = UpdateLocWide(rl_src2);
+    // Do special compare/branch against simple const operand if not already in registers.
+    int64_t val = mir_graph_->ConstantValueWide(rl_src2);
+    if ((rl_temp.location != kLocPhysReg) &&
+        ((ModifiedImmediate(Low32Bits(val)) >= 0) && (ModifiedImmediate(High32Bits(val)) >= 0))) {
+      GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode);
+      return;
+    }
+  }
+  LIR* taken = &block_label_list_[bb->taken];
+  LIR* not_taken = &block_label_list_[bb->fall_through];
+  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
+  OpRegReg(kOpCmp, rl_src1.reg.GetHigh(), rl_src2.reg.GetHigh());
+  switch (ccode) {
+    case kCondEq:
+      OpCondBranch(kCondNe, not_taken);
+      break;
+    case kCondNe:
+      OpCondBranch(kCondNe, taken);
+      break;
+    case kCondLt:
+      OpCondBranch(kCondLt, taken);
+      OpCondBranch(kCondGt, not_taken);
+      ccode = kCondUlt;
+      break;
+    case kCondLe:
+      OpCondBranch(kCondLt, taken);
+      OpCondBranch(kCondGt, not_taken);
+      ccode = kCondLs;
+      break;
+    case kCondGt:
+      OpCondBranch(kCondGt, taken);
+      OpCondBranch(kCondLt, not_taken);
+      ccode = kCondHi;
+      break;
+    case kCondGe:
+      OpCondBranch(kCondGt, taken);
+      OpCondBranch(kCondLt, not_taken);
+      ccode = kCondUge;
+      break;
+    default:
+      LOG(FATAL) << "Unexpected ccode: " << ccode;
+  }
+  OpRegReg(kOpCmp, rl_src1.reg.GetLow(), rl_src2.reg.GetLow());
+  OpCondBranch(ccode, taken);
+}
+
+/*
+ * Generate a register comparison to an immediate and branch.  Caller
+ * is responsible for setting branch target field.
+ */
+LIR* Arm64Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target) {
+  LIR* branch;
+  ArmConditionCode arm_cond = ArmConditionEncoding(cond);
+  /*
+   * A common use of OpCmpImmBranch is for null checks, and using the Thumb 16-bit
+   * compare-and-branch if zero is ideal if it will reach.  However, because null checks
+   * branch forward to a slow path, they will frequently not reach - and thus have to
+   * be converted to a long form during assembly (which will trigger another assembly
+   * pass).  Here we estimate the branch distance for checks, and if large directly
+   * generate the long form in an attempt to avoid an extra assembly pass.
+   * TODO: consider interspersing slowpaths in code following unconditional branches.
+   */
+  bool skip = ((target != NULL) && (target->opcode == kPseudoThrowTarget));
+  skip &= ((cu_->code_item->insns_size_in_code_units_ - current_dalvik_offset_) > 64);
+  if (!skip && reg.Low8() && (check_value == 0) &&
+     ((arm_cond == kArmCondEq) || (arm_cond == kArmCondNe))) {
+    branch = NewLIR2((arm_cond == kArmCondEq) ? kThumb2Cbz : kThumb2Cbnz,
+                     reg.GetReg(), 0);
+  } else {
+    OpRegImm(kOpCmp, reg, check_value);
+    branch = NewLIR2(kThumbBCond, 0, arm_cond);
+  }
+  branch->target = target;
+  return branch;
+}
+
+LIR* Arm64Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
+  LIR* res;
+  int opcode;
+  // If src or dest is a pair, we'll be using low reg.
+  if (r_dest.IsPair()) {
+    r_dest = r_dest.GetLow();
+  }
+  if (r_src.IsPair()) {
+    r_src = r_src.GetLow();
+  }
+  if (r_dest.IsFloat() || r_src.IsFloat())
+    return OpFpRegCopy(r_dest, r_src);
+  if (r_dest.Low8() && r_src.Low8())
+    opcode = kThumbMovRR;
+  else if (!r_dest.Low8() && !r_src.Low8())
+     opcode = kThumbMovRR_H2H;
+  else if (r_dest.Low8())
+     opcode = kThumbMovRR_H2L;
+  else
+     opcode = kThumbMovRR_L2H;
+  res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg());
+  if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
+    res->flags.is_nop = true;
+  }
+  return res;
+}
+
+void Arm64Mir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
+  if (r_dest != r_src) {
+    LIR* res = OpRegCopyNoInsert(r_dest, r_src);
+    AppendLIR(res);
+  }
+}
+
+void Arm64Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
+  if (r_dest != r_src) {
+    bool dest_fp = r_dest.IsFloat();
+    bool src_fp = r_src.IsFloat();
+    DCHECK(r_dest.Is64Bit());
+    DCHECK(r_src.Is64Bit());
+    if (dest_fp) {
+      if (src_fp) {
+        OpRegCopy(r_dest, r_src);
+      } else {
+        NewLIR3(kThumb2Fmdrr, r_dest.GetReg(), r_src.GetLowReg(), r_src.GetHighReg());
+      }
+    } else {
+      if (src_fp) {
+        NewLIR3(kThumb2Fmrrd, r_dest.GetLowReg(), r_dest.GetHighReg(), r_src.GetReg());
+      } else {
+        // Handle overlap
+        if (r_src.GetHighReg() == r_dest.GetLowReg()) {
+          DCHECK_NE(r_src.GetLowReg(), r_dest.GetHighReg());
+          OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
+          OpRegCopy(r_dest.GetLow(), r_src.GetLow());
+        } else {
+          OpRegCopy(r_dest.GetLow(), r_src.GetLow());
+          OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
+        }
+      }
+    }
+  }
+}
+
+// Table of magic divisors
+struct MagicTable {
+  uint32_t magic;
+  uint32_t shift;
+  DividePattern pattern;
+};
+
+static const MagicTable magic_table[] = {
+  {0, 0, DivideNone},        // 0
+  {0, 0, DivideNone},        // 1
+  {0, 0, DivideNone},        // 2
+  {0x55555556, 0, Divide3},  // 3
+  {0, 0, DivideNone},        // 4
+  {0x66666667, 1, Divide5},  // 5
+  {0x2AAAAAAB, 0, Divide3},  // 6
+  {0x92492493, 2, Divide7},  // 7
+  {0, 0, DivideNone},        // 8
+  {0x38E38E39, 1, Divide5},  // 9
+  {0x66666667, 2, Divide5},  // 10
+  {0x2E8BA2E9, 1, Divide5},  // 11
+  {0x2AAAAAAB, 1, Divide5},  // 12
+  {0x4EC4EC4F, 2, Divide5},  // 13
+  {0x92492493, 3, Divide7},  // 14
+  {0x88888889, 3, Divide7},  // 15
+};
+
+// Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4)
+bool Arm64Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
+                                    RegLocation rl_src, RegLocation rl_dest, int lit) {
+  if ((lit < 0) || (lit >= static_cast<int>(sizeof(magic_table)/sizeof(magic_table[0])))) {
+    return false;
+  }
+  DividePattern pattern = magic_table[lit].pattern;
+  if (pattern == DivideNone) {
+    return false;
+  }
+
+  RegStorage r_magic = AllocTemp();
+  LoadConstant(r_magic, magic_table[lit].magic);
+  rl_src = LoadValue(rl_src, kCoreReg);
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  RegStorage r_hi = AllocTemp();
+  RegStorage r_lo = AllocTemp();
+
+  // rl_dest and rl_src might overlap.
+  // Reuse r_hi to save the div result for reminder case.
+  RegStorage r_div_result = is_div ? rl_result.reg : r_hi;
+
+  NewLIR4(kThumb2Smull, r_lo.GetReg(), r_hi.GetReg(), r_magic.GetReg(), rl_src.reg.GetReg());
+  switch (pattern) {
+    case Divide3:
+      OpRegRegRegShift(kOpSub, r_div_result, r_hi, rl_src.reg, EncodeShift(kArmAsr, 31));
+      break;
+    case Divide5:
+      OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31);
+      OpRegRegRegShift(kOpRsub, r_div_result, r_lo, r_hi,
+                       EncodeShift(kArmAsr, magic_table[lit].shift));
+      break;
+    case Divide7:
+      OpRegReg(kOpAdd, r_hi, rl_src.reg);
+      OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31);
+      OpRegRegRegShift(kOpRsub, r_div_result, r_lo, r_hi,
+                       EncodeShift(kArmAsr, magic_table[lit].shift));
+      break;
+    default:
+      LOG(FATAL) << "Unexpected pattern: " << pattern;
+  }
+
+  if (!is_div) {
+    // div_result = src / lit
+    // tmp1 = div_result * lit
+    // dest = src - tmp1
+    RegStorage tmp1 = r_lo;
+    EasyMultiplyOp ops[2];
+
+    bool canEasyMultiply = GetEasyMultiplyTwoOps(lit, ops);
+    DCHECK_NE(canEasyMultiply, false);
+
+    GenEasyMultiplyTwoOps(tmp1, r_div_result, ops);
+    OpRegRegReg(kOpSub, rl_result.reg, rl_src.reg, tmp1);
+  }
+
+  StoreValue(rl_dest, rl_result);
+  return true;
+}
+
+// Try to convert *lit to 1 RegRegRegShift/RegRegShift form.
+bool Arm64Mir2Lir::GetEasyMultiplyOp(int lit, Arm64Mir2Lir::EasyMultiplyOp* op) {
+  if (IsPowerOfTwo(lit)) {
+    op->op = kOpLsl;
+    op->shift = LowestSetBit(lit);
+    return true;
+  }
+
+  if (IsPowerOfTwo(lit - 1)) {
+    op->op = kOpAdd;
+    op->shift = LowestSetBit(lit - 1);
+    return true;
+  }
+
+  if (IsPowerOfTwo(lit + 1)) {
+    op->op = kOpRsub;
+    op->shift = LowestSetBit(lit + 1);
+    return true;
+  }
+
+  op->op = kOpInvalid;
+  op->shift = 0;
+  return false;
+}
+
+// Try to convert *lit to 1~2 RegRegRegShift/RegRegShift forms.
+bool Arm64Mir2Lir::GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops) {
+  GetEasyMultiplyOp(lit, &ops[0]);
+  if (GetEasyMultiplyOp(lit, &ops[0])) {
+    ops[1].op = kOpInvalid;
+    ops[1].shift = 0;
+    return true;
+  }
+
+  int lit1 = lit;
+  uint32_t shift = LowestSetBit(lit1);
+  if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) {
+    ops[1].op = kOpLsl;
+    ops[1].shift = shift;
+    return true;
+  }
+
+  lit1 = lit - 1;
+  shift = LowestSetBit(lit1);
+  if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) {
+    ops[1].op = kOpAdd;
+    ops[1].shift = shift;
+    return true;
+  }
+
+  lit1 = lit + 1;
+  shift = LowestSetBit(lit1);
+  if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) {
+    ops[1].op = kOpRsub;
+    ops[1].shift = shift;
+    return true;
+  }
+
+  return false;
+}
+
+// Generate instructions to do multiply.
+// Additional temporary register is required,
+// if it need to generate 2 instructions and src/dest overlap.
+void Arm64Mir2Lir::GenEasyMultiplyTwoOps(RegStorage r_dest, RegStorage r_src, EasyMultiplyOp* ops) {
+  // tmp1 = ( src << shift1) + [ src | -src | 0 ]
+  // dest = (tmp1 << shift2) + [ src | -src | 0 ]
+
+  RegStorage r_tmp1;
+  if (ops[1].op == kOpInvalid) {
+    r_tmp1 = r_dest;
+  } else if (r_dest.GetReg() != r_src.GetReg()) {
+    r_tmp1 = r_dest;
+  } else {
+    r_tmp1 = AllocTemp();
+  }
+
+  switch (ops[0].op) {
+    case kOpLsl:
+      OpRegRegImm(kOpLsl, r_tmp1, r_src, ops[0].shift);
+      break;
+    case kOpAdd:
+      OpRegRegRegShift(kOpAdd, r_tmp1, r_src, r_src, EncodeShift(kArmLsl, ops[0].shift));
+      break;
+    case kOpRsub:
+      OpRegRegRegShift(kOpRsub, r_tmp1, r_src, r_src, EncodeShift(kArmLsl, ops[0].shift));
+      break;
+    default:
+      DCHECK_EQ(ops[0].op, kOpInvalid);
+      break;
+  }
+
+  switch (ops[1].op) {
+    case kOpInvalid:
+      return;
+    case kOpLsl:
+      OpRegRegImm(kOpLsl, r_dest, r_tmp1, ops[1].shift);
+      break;
+    case kOpAdd:
+      OpRegRegRegShift(kOpAdd, r_dest, r_src, r_tmp1, EncodeShift(kArmLsl, ops[1].shift));
+      break;
+    case kOpRsub:
+      OpRegRegRegShift(kOpRsub, r_dest, r_src, r_tmp1, EncodeShift(kArmLsl, ops[1].shift));
+      break;
+    default:
+      LOG(FATAL) << "Unexpected opcode passed to GenEasyMultiplyTwoOps";
+      break;
+  }
+}
+
+bool Arm64Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) {
+  EasyMultiplyOp ops[2];
+
+  if (!GetEasyMultiplyTwoOps(lit, ops)) {
+    return false;
+  }
+
+  rl_src = LoadValue(rl_src, kCoreReg);
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+
+  GenEasyMultiplyTwoOps(rl_result.reg, rl_src.reg, ops);
+  StoreValue(rl_dest, rl_result);
+  return true;
+}
+
+RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
+                      RegLocation rl_src2, bool is_div, bool check_zero) {
+  LOG(FATAL) << "Unexpected use of GenDivRem for Arm";
+  return rl_dest;
+}
+
+RegLocation Arm64Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) {
+  LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm";
+  return rl_dest;
+}
+
+RegLocation Arm64Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg1, int lit, bool is_div) {
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+
+  // Put the literal in a temp.
+  RegStorage lit_temp = AllocTemp();
+  LoadConstant(lit_temp, lit);
+  // Use the generic case for div/rem with arg2 in a register.
+  // TODO: The literal temp can be freed earlier during a modulus to reduce reg pressure.
+  rl_result = GenDivRem(rl_result, reg1, lit_temp, is_div);
+  FreeTemp(lit_temp);
+
+  return rl_result;
+}
+
+RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg1, RegStorage reg2,
+                                  bool is_div) {
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  if (is_div) {
+    // Simple case, use sdiv instruction.
+    OpRegRegReg(kOpDiv, rl_result.reg, reg1, reg2);
+  } else {
+    // Remainder case, use the following code:
+    // temp = reg1 / reg2      - integer division
+    // temp = temp * reg2
+    // dest = reg1 - temp
+
+    RegStorage temp = AllocTemp();
+    OpRegRegReg(kOpDiv, temp, reg1, reg2);
+    OpRegReg(kOpMul, temp, reg2);
+    OpRegRegReg(kOpSub, rl_result.reg, reg1, temp);
+    FreeTemp(temp);
+  }
+
+  return rl_result;
+}
+
+bool Arm64Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) {
+  DCHECK_EQ(cu_->instruction_set, kThumb2);
+  RegLocation rl_src1 = info->args[0];
+  RegLocation rl_src2 = info->args[1];
+  rl_src1 = LoadValue(rl_src1, kCoreReg);
+  rl_src2 = LoadValue(rl_src2, kCoreReg);
+  RegLocation rl_dest = InlineTarget(info);
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
+  LIR* it = OpIT((is_min) ? kCondGt : kCondLt, "E");
+  OpRegReg(kOpMov, rl_result.reg, rl_src2.reg);
+  OpRegReg(kOpMov, rl_result.reg, rl_src1.reg);
+  OpEndIT(it);
+  StoreValue(rl_dest, rl_result);
+  return true;
+}
+
+bool Arm64Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
+  RegLocation rl_src_address = info->args[0];  // long address
+  rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[1]
+  RegLocation rl_dest = InlineTarget(info);
+  RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  if (size == k64) {
+    // Fake unaligned LDRD by two unaligned LDR instructions on ARMv7 with SCTLR.A set to 0.
+    if (rl_address.reg.GetReg() != rl_result.reg.GetLowReg()) {
+      Load32Disp(rl_address.reg, 0, rl_result.reg.GetLow());
+      Load32Disp(rl_address.reg, 4, rl_result.reg.GetHigh());
+    } else {
+      Load32Disp(rl_address.reg, 4, rl_result.reg.GetHigh());
+      Load32Disp(rl_address.reg, 0, rl_result.reg.GetLow());
+    }
+    StoreValueWide(rl_dest, rl_result);
+  } else {
+    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
+    // Unaligned load with LDR and LDRSH is allowed on ARMv7 with SCTLR.A set to 0.
+    LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, INVALID_SREG);
+    StoreValue(rl_dest, rl_result);
+  }
+  return true;
+}
+
+bool Arm64Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
+  RegLocation rl_src_address = info->args[0];  // long address
+  rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[1]
+  RegLocation rl_src_value = info->args[2];  // [size] value
+  RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
+  if (size == k64) {
+    // Fake unaligned STRD by two unaligned STR instructions on ARMv7 with SCTLR.A set to 0.
+    RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg);
+    StoreBaseDisp(rl_address.reg, 0, rl_value.reg.GetLow(), k32);
+    StoreBaseDisp(rl_address.reg, 4, rl_value.reg.GetHigh(), k32);
+  } else {
+    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
+    // Unaligned store with STR and STRSH is allowed on ARMv7 with SCTLR.A set to 0.
+    RegLocation rl_value = LoadValue(rl_src_value, kCoreReg);
+    StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size);
+  }
+  return true;
+}
+
+void Arm64Mir2Lir::OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset) {
+  LOG(FATAL) << "Unexpected use of OpLea for Arm";
+}
+
+void Arm64Mir2Lir::OpTlsCmp(ThreadOffset<4> offset, int val) {
+  LOG(FATAL) << "Unexpected use of OpTlsCmp for Arm";
+}
+
+bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
+  DCHECK_EQ(cu_->instruction_set, kThumb2);
+  // Unused - RegLocation rl_src_unsafe = info->args[0];
+  RegLocation rl_src_obj = info->args[1];  // Object - known non-null
+  RegLocation rl_src_offset = info->args[2];  // long low
+  rl_src_offset = NarrowRegLoc(rl_src_offset);  // ignore high half in info->args[3]
+  RegLocation rl_src_expected = info->args[4];  // int, long or Object
+  // If is_long, high half is in info->args[5]
+  RegLocation rl_src_new_value = info->args[is_long ? 6 : 5];  // int, long or Object
+  // If is_long, high half is in info->args[7]
+  RegLocation rl_dest = InlineTarget(info);  // boolean place for result
+
+  // We have only 5 temporary registers available and actually only 4 if the InlineTarget
+  // above locked one of the temps. For a straightforward CAS64 we need 7 registers:
+  // r_ptr (1), new_value (2), expected(2) and ldrexd result (2). If neither expected nor
+  // new_value is in a non-temp core register we shall reload them in the ldrex/strex loop
+  // into the same temps, reducing the number of required temps down to 5. We shall work
+  // around the potentially locked temp by using LR for r_ptr, unconditionally.
+  // TODO: Pass information about the need for more temps to the stack frame generation
+  // code so that we can rely on being able to allocate enough temps.
+  DCHECK(!GetRegInfo(rs_rARM_LR)->IsTemp());
+  MarkTemp(rs_rARM_LR);
+  FreeTemp(rs_rARM_LR);
+  LockTemp(rs_rARM_LR);
+  bool load_early = true;
+  if (is_long) {
+    RegStorage expected_reg = rl_src_expected.reg.IsPair() ? rl_src_expected.reg.GetLow() :
+        rl_src_expected.reg;
+    RegStorage new_val_reg = rl_src_new_value.reg.IsPair() ? rl_src_new_value.reg.GetLow() :
+        rl_src_new_value.reg;
+    bool expected_is_core_reg = rl_src_expected.location == kLocPhysReg && !expected_reg.IsFloat();
+    bool new_value_is_core_reg = rl_src_new_value.location == kLocPhysReg && !new_val_reg.IsFloat();
+    bool expected_is_good_reg = expected_is_core_reg && !IsTemp(expected_reg);
+    bool new_value_is_good_reg = new_value_is_core_reg && !IsTemp(new_val_reg);
+
+    if (!expected_is_good_reg && !new_value_is_good_reg) {
+      // None of expected/new_value is non-temp reg, need to load both late
+      load_early = false;
+      // Make sure they are not in the temp regs and the load will not be skipped.
+      if (expected_is_core_reg) {
+        FlushRegWide(rl_src_expected.reg);
+        ClobberSReg(rl_src_expected.s_reg_low);
+        ClobberSReg(GetSRegHi(rl_src_expected.s_reg_low));
+        rl_src_expected.location = kLocDalvikFrame;
+      }
+      if (new_value_is_core_reg) {
+        FlushRegWide(rl_src_new_value.reg);
+        ClobberSReg(rl_src_new_value.s_reg_low);
+        ClobberSReg(GetSRegHi(rl_src_new_value.s_reg_low));
+        rl_src_new_value.location = kLocDalvikFrame;
+      }
+    }
+  }
+
+  // Release store semantics, get the barrier out of the way.  TODO: revisit
+  GenMemBarrier(kStoreLoad);
+
+  RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
+  RegLocation rl_new_value;
+  if (!is_long) {
+    rl_new_value = LoadValue(rl_src_new_value, kCoreReg);
+  } else if (load_early) {
+    rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
+  }
+
+  if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
+    // Mark card for object assuming new value is stored.
+    MarkGCCard(rl_new_value.reg, rl_object.reg);
+  }
+
+  RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
+
+  RegStorage r_ptr = rs_rARM_LR;
+  OpRegRegReg(kOpAdd, r_ptr, rl_object.reg, rl_offset.reg);
+
+  // Free now unneeded rl_object and rl_offset to give more temps.
+  ClobberSReg(rl_object.s_reg_low);
+  FreeTemp(rl_object.reg);
+  ClobberSReg(rl_offset.s_reg_low);
+  FreeTemp(rl_offset.reg);
+
+  RegLocation rl_expected;
+  if (!is_long) {
+    rl_expected = LoadValue(rl_src_expected, kCoreReg);
+  } else if (load_early) {
+    rl_expected = LoadValueWide(rl_src_expected, kCoreReg);
+  } else {
+    // NOTE: partially defined rl_expected & rl_new_value - but we just want the regs.
+    RegStorage low_reg = AllocTemp();
+    RegStorage high_reg = AllocTemp();
+    rl_new_value.reg = RegStorage::MakeRegPair(low_reg, high_reg);
+    rl_expected = rl_new_value;
+  }
+
+  // do {
+  //   tmp = [r_ptr] - expected;
+  // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
+  // result = tmp != 0;
+
+  RegStorage r_tmp = AllocTemp();
+  LIR* target = NewLIR0(kPseudoTargetLabel);
+
+  LIR* it = nullptr;
+  if (is_long) {
+    RegStorage r_tmp_high = AllocTemp();
+    if (!load_early) {
+      LoadValueDirectWide(rl_src_expected, rl_expected.reg);
+    }
+    NewLIR3(kThumb2Ldrexd, r_tmp.GetReg(), r_tmp_high.GetReg(), r_ptr.GetReg());
+    OpRegReg(kOpSub, r_tmp, rl_expected.reg.GetLow());
+    OpRegReg(kOpSub, r_tmp_high, rl_expected.reg.GetHigh());
+    if (!load_early) {
+      LoadValueDirectWide(rl_src_new_value, rl_new_value.reg);
+    }
+    // Make sure we use ORR that sets the ccode
+    if (r_tmp.Low8() && r_tmp_high.Low8()) {
+      NewLIR2(kThumbOrr, r_tmp.GetReg(), r_tmp_high.GetReg());
+    } else {
+      NewLIR4(kThumb2OrrRRRs, r_tmp.GetReg(), r_tmp.GetReg(), r_tmp_high.GetReg(), 0);
+    }
+    FreeTemp(r_tmp_high);  // Now unneeded
+
+    DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
+    it = OpIT(kCondEq, "T");
+    NewLIR4(kThumb2Strexd /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetLowReg(), rl_new_value.reg.GetHighReg(), r_ptr.GetReg());
+
+  } else {
+    NewLIR3(kThumb2Ldrex, r_tmp.GetReg(), r_ptr.GetReg(), 0);
+    OpRegReg(kOpSub, r_tmp, rl_expected.reg);
+    DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
+    it = OpIT(kCondEq, "T");
+    NewLIR4(kThumb2Strex /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg(), 0);
+  }
+
+  // Still one conditional left from OpIT(kCondEq, "T") from either branch
+  OpRegImm(kOpCmp /* eq */, r_tmp, 1);
+  OpEndIT(it);
+
+  OpCondBranch(kCondEq, target);
+
+  if (!load_early) {
+    FreeTemp(rl_expected.reg);  // Now unneeded.
+  }
+
+  // result := (tmp1 != 0) ? 0 : 1;
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  OpRegRegImm(kOpRsub, rl_result.reg, r_tmp, 1);
+  DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
+  it = OpIT(kCondUlt, "");
+  LoadConstant(rl_result.reg, 0); /* cc */
+  FreeTemp(r_tmp);  // Now unneeded.
+  OpEndIT(it);     // Barrier to terminate OpIT.
+
+  StoreValue(rl_dest, rl_result);
+
+  // Now, restore lr to its non-temp status.
+  Clobber(rs_rARM_LR);
+  UnmarkTemp(rs_rARM_LR);
+  return true;
+}
+
+LIR* Arm64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
+  return RawLIR(current_dalvik_offset_, kThumb2LdrPcRel12, reg.GetReg(), 0, 0, 0, 0, target);
+}
+
+LIR* Arm64Mir2Lir::OpVldm(RegStorage r_base, int count) {
+  return NewLIR3(kThumb2Vldms, r_base.GetReg(), rs_fr0.GetReg(), count);
+}
+
+LIR* Arm64Mir2Lir::OpVstm(RegStorage r_base, int count) {
+  return NewLIR3(kThumb2Vstms, r_base.GetReg(), rs_fr0.GetReg(), count);
+}
+
+void Arm64Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
+                                               RegLocation rl_result, int lit,
+                                               int first_bit, int second_bit) {
+  OpRegRegRegShift(kOpAdd, rl_result.reg, rl_src.reg, rl_src.reg,
+                   EncodeShift(kArmLsl, second_bit - first_bit));
+  if (first_bit != 0) {
+    OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit);
+  }
+}
+
+void Arm64Mir2Lir::GenDivZeroCheckWide(RegStorage reg) {
+  DCHECK(reg.IsPair());   // TODO: support k64BitSolo.
+  RegStorage t_reg = AllocTemp();
+  NewLIR4(kThumb2OrrRRRs, t_reg.GetReg(), reg.GetLowReg(), reg.GetHighReg(), 0);
+  FreeTemp(t_reg);
+  GenDivZeroCheck(kCondEq);
+}
+
+// Test suspend flag, return target of taken suspend branch
+LIR* Arm64Mir2Lir::OpTestSuspend(LIR* target) {
+  NewLIR2(kThumbSubRI8, rs_rARM_SUSPEND.GetReg(), 1);
+  return OpCondBranch((target == NULL) ? kCondEq : kCondNe, target);
+}
+
+// Decrement register and branch on condition
+LIR* Arm64Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) {
+  // Combine sub & test using sub setflags encoding here
+  OpRegRegImm(kOpSub, reg, reg, 1);  // For value == 1, this should set flags.
+  DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
+  return OpCondBranch(c_code, target);
+}
+
+void Arm64Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
+#if ANDROID_SMP != 0
+  // Start off with using the last LIR as the barrier. If it is not enough, then we will generate one.
+  LIR* barrier = last_lir_insn_;
+
+  int dmb_flavor;
+  // TODO: revisit Arm barrier kinds
+  switch (barrier_kind) {
+    case kLoadStore: dmb_flavor = kISH; break;
+    case kLoadLoad: dmb_flavor = kISH; break;
+    case kStoreStore: dmb_flavor = kISHST; break;
+    case kStoreLoad: dmb_flavor = kISH; break;
+    default:
+      LOG(FATAL) << "Unexpected MemBarrierKind: " << barrier_kind;
+      dmb_flavor = kSY;  // quiet gcc.
+      break;
+  }
+
+  // If the same barrier already exists, don't generate another.
+  if (barrier == nullptr
+      || (barrier != nullptr && (barrier->opcode != kThumb2Dmb || barrier->operands[0] != dmb_flavor))) {
+    barrier = NewLIR1(kThumb2Dmb, dmb_flavor);
+  }
+
+  // At this point we must have a memory barrier. Mark it as a scheduling barrier as well.
+  DCHECK(!barrier->flags.use_def_invalid);
+  barrier->u.m.def_mask = ENCODE_ALL;
+#endif
+}
+
+void Arm64Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
+  rl_src = LoadValueWide(rl_src, kCoreReg);
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  RegStorage z_reg = AllocTemp();
+  LoadConstantNoClobber(z_reg, 0);
+  // Check for destructive overlap
+  if (rl_result.reg.GetLowReg() == rl_src.reg.GetHighReg()) {
+    RegStorage t_reg = AllocTemp();
+    OpRegRegReg(kOpSub, rl_result.reg.GetLow(), z_reg, rl_src.reg.GetLow());
+    OpRegRegReg(kOpSbc, rl_result.reg.GetHigh(), z_reg, t_reg);
+    FreeTemp(t_reg);
+  } else {
+    OpRegRegReg(kOpSub, rl_result.reg.GetLow(), z_reg, rl_src.reg.GetLow());
+    OpRegRegReg(kOpSbc, rl_result.reg.GetHigh(), z_reg, rl_src.reg.GetHigh());
+  }
+  FreeTemp(z_reg);
+  StoreValueWide(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest,
+                            RegLocation rl_src1, RegLocation rl_src2) {
+    /*
+     * tmp1     = src1.hi * src2.lo;  // src1.hi is no longer needed
+     * dest     = src1.lo * src2.lo;
+     * tmp1    += src1.lo * src2.hi;
+     * dest.hi += tmp1;
+     *
+     * To pull off inline multiply, we have a worst-case requirement of 7 temporary
+     * registers.  Normally for Arm, we get 5.  We can get to 6 by including
+     * lr in the temp set.  The only problematic case is all operands and result are
+     * distinct, and none have been promoted.  In that case, we can succeed by aggressively
+     * freeing operand temp registers after they are no longer needed.  All other cases
+     * can proceed normally.  We'll just punt on the case of the result having a misaligned
+     * overlap with either operand and send that case to a runtime handler.
+     */
+    RegLocation rl_result;
+    if (BadOverlap(rl_src1, rl_dest) || (BadOverlap(rl_src2, rl_dest))) {
+      ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pLmul);
+      FlushAllRegs();
+      CallRuntimeHelperRegLocationRegLocation(func_offset, rl_src1, rl_src2, false);
+      rl_result = GetReturnWide(false);
+      StoreValueWide(rl_dest, rl_result);
+      return;
+    }
+
+    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+    rl_src2 = LoadValueWide(rl_src2, kCoreReg);
+
+    int reg_status = 0;
+    RegStorage res_lo;
+    RegStorage res_hi;
+    bool dest_promoted = rl_dest.location == kLocPhysReg && rl_dest.reg.Valid() &&
+        !IsTemp(rl_dest.reg.GetLow()) && !IsTemp(rl_dest.reg.GetHigh());
+    bool src1_promoted = !IsTemp(rl_src1.reg.GetLow()) && !IsTemp(rl_src1.reg.GetHigh());
+    bool src2_promoted = !IsTemp(rl_src2.reg.GetLow()) && !IsTemp(rl_src2.reg.GetHigh());
+    // Check if rl_dest is *not* either operand and we have enough temp registers.
+    if ((rl_dest.s_reg_low != rl_src1.s_reg_low && rl_dest.s_reg_low != rl_src2.s_reg_low) &&
+        (dest_promoted || src1_promoted || src2_promoted)) {
+      // In this case, we do not need to manually allocate temp registers for result.
+      rl_result = EvalLoc(rl_dest, kCoreReg, true);
+      res_lo = rl_result.reg.GetLow();
+      res_hi = rl_result.reg.GetHigh();
+    } else {
+      res_lo = AllocTemp();
+      if ((rl_src1.s_reg_low == rl_src2.s_reg_low) || src1_promoted || src2_promoted) {
+        // In this case, we have enough temp registers to be allocated for result.
+        res_hi = AllocTemp();
+        reg_status = 1;
+      } else {
+        // In this case, all temps are now allocated.
+        // res_hi will be allocated after we can free src1_hi.
+        reg_status = 2;
+      }
+    }
+
+    // Temporarily add LR to the temp pool, and assign it to tmp1
+    MarkTemp(rs_rARM_LR);
+    FreeTemp(rs_rARM_LR);
+    RegStorage tmp1 = rs_rARM_LR;
+    LockTemp(rs_rARM_LR);
+
+    if (rl_src1.reg == rl_src2.reg) {
+      DCHECK(res_hi.Valid());
+      DCHECK(res_lo.Valid());
+      NewLIR3(kThumb2MulRRR, tmp1.GetReg(), rl_src1.reg.GetLowReg(), rl_src1.reg.GetHighReg());
+      NewLIR4(kThumb2Umull, res_lo.GetReg(), res_hi.GetReg(), rl_src1.reg.GetLowReg(),
+              rl_src1.reg.GetLowReg());
+      OpRegRegRegShift(kOpAdd, res_hi, res_hi, tmp1, EncodeShift(kArmLsl, 1));
+    } else {
+      NewLIR3(kThumb2MulRRR, tmp1.GetReg(), rl_src2.reg.GetLowReg(), rl_src1.reg.GetHighReg());
+      if (reg_status == 2) {
+        DCHECK(!res_hi.Valid());
+        DCHECK_NE(rl_src1.reg.GetLowReg(), rl_src2.reg.GetLowReg());
+        DCHECK_NE(rl_src1.reg.GetHighReg(), rl_src2.reg.GetHighReg());
+        FreeTemp(rl_src1.reg.GetHigh());
+        res_hi = AllocTemp();
+      }
+      DCHECK(res_hi.Valid());
+      DCHECK(res_lo.Valid());
+      NewLIR4(kThumb2Umull, res_lo.GetReg(), res_hi.GetReg(), rl_src2.reg.GetLowReg(),
+              rl_src1.reg.GetLowReg());
+      NewLIR4(kThumb2Mla, tmp1.GetReg(), rl_src1.reg.GetLowReg(), rl_src2.reg.GetHighReg(),
+              tmp1.GetReg());
+      NewLIR4(kThumb2AddRRR, res_hi.GetReg(), tmp1.GetReg(), res_hi.GetReg(), 0);
+      if (reg_status == 2) {
+        // Clobber rl_src1 since it was corrupted.
+        FreeTemp(rl_src1.reg);
+        Clobber(rl_src1.reg);
+      }
+    }
+
+    // Now, restore lr to its non-temp status.
+    FreeTemp(tmp1);
+    Clobber(rs_rARM_LR);
+    UnmarkTemp(rs_rARM_LR);
+
+    if (reg_status != 0) {
+      // We had manually allocated registers for rl_result.
+      // Now construct a RegLocation.
+      rl_result = GetReturnWide(false);  // Just using as a template.
+      rl_result.reg = RegStorage::MakeRegPair(res_lo, res_hi);
+    }
+
+    StoreValueWide(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                            RegLocation rl_src2) {
+  LOG(FATAL) << "Unexpected use of GenAddLong for Arm";
+}
+
+void Arm64Mir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                            RegLocation rl_src2) {
+  LOG(FATAL) << "Unexpected use of GenSubLong for Arm";
+}
+
+void Arm64Mir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                            RegLocation rl_src2) {
+  LOG(FATAL) << "Unexpected use of GenAndLong for Arm";
+}
+
+void Arm64Mir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                           RegLocation rl_src2) {
+  LOG(FATAL) << "Unexpected use of GenOrLong for Arm";
+}
+
+void Arm64Mir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                            RegLocation rl_src2) {
+  LOG(FATAL) << "Unexpected use of genXoLong for Arm";
+}
+
+/*
+ * Generate array load
+ */
+void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
+                             RegLocation rl_index, RegLocation rl_dest, int scale) {
+  RegisterClass reg_class = RegClassBySize(size);
+  int len_offset = mirror::Array::LengthOffset().Int32Value();
+  int data_offset;
+  RegLocation rl_result;
+  bool constant_index = rl_index.is_const;
+  rl_array = LoadValue(rl_array, kCoreReg);
+  if (!constant_index) {
+    rl_index = LoadValue(rl_index, kCoreReg);
+  }
+
+  if (rl_dest.wide) {
+    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
+  } else {
+    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
+  }
+
+  // If index is constant, just fold it into the data offset
+  if (constant_index) {
+    data_offset += mir_graph_->ConstantValue(rl_index) << scale;
+  }
+
+  /* null object? */
+  GenNullCheck(rl_array.reg, opt_flags);
+
+  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
+  RegStorage reg_len;
+  if (needs_range_check) {
+    reg_len = AllocTemp();
+    /* Get len */
+    Load32Disp(rl_array.reg, len_offset, reg_len);
+    MarkPossibleNullPointerException(opt_flags);
+  } else {
+    ForceImplicitNullCheck(rl_array.reg, opt_flags);
+  }
+  if (rl_dest.wide || rl_dest.fp || constant_index) {
+    RegStorage reg_ptr;
+    if (constant_index) {
+      reg_ptr = rl_array.reg;  // NOTE: must not alter reg_ptr in constant case.
+    } else {
+      // No special indexed operation, lea + load w/ displacement
+      reg_ptr = AllocTemp();
+      OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kArmLsl, scale));
+      FreeTemp(rl_index.reg);
+    }
+    rl_result = EvalLoc(rl_dest, reg_class, true);
+
+    if (needs_range_check) {
+      if (constant_index) {
+        GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len);
+      } else {
+        GenArrayBoundsCheck(rl_index.reg, reg_len);
+      }
+      FreeTemp(reg_len);
+    }
+    if (rl_dest.wide) {
+      LoadBaseDispWide(reg_ptr, data_offset, rl_result.reg, INVALID_SREG);
+      MarkPossibleNullPointerException(opt_flags);
+      if (!constant_index) {
+        FreeTemp(reg_ptr);
+      }
+      StoreValueWide(rl_dest, rl_result);
+    } else {
+      LoadBaseDisp(reg_ptr, data_offset, rl_result.reg, size, INVALID_SREG);
+      MarkPossibleNullPointerException(opt_flags);
+      if (!constant_index) {
+        FreeTemp(reg_ptr);
+      }
+      StoreValue(rl_dest, rl_result);
+    }
+  } else {
+    // Offset base, then use indexed load
+    RegStorage reg_ptr = AllocTemp();
+    OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
+    FreeTemp(rl_array.reg);
+    rl_result = EvalLoc(rl_dest, reg_class, true);
+
+    if (needs_range_check) {
+      GenArrayBoundsCheck(rl_index.reg, reg_len);
+      FreeTemp(reg_len);
+    }
+    LoadBaseIndexed(reg_ptr, rl_index.reg, rl_result.reg, scale, size);
+    MarkPossibleNullPointerException(opt_flags);
+    FreeTemp(reg_ptr);
+    StoreValue(rl_dest, rl_result);
+  }
+}
+
+/*
+ * Generate array store
+ *
+ */
+void Arm64Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
+                             RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
+  RegisterClass reg_class = RegClassBySize(size);
+  int len_offset = mirror::Array::LengthOffset().Int32Value();
+  bool constant_index = rl_index.is_const;
+
+  int data_offset;
+  if (size == k64 || size == kDouble) {
+    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
+  } else {
+    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
+  }
+
+  // If index is constant, just fold it into the data offset.
+  if (constant_index) {
+    data_offset += mir_graph_->ConstantValue(rl_index) << scale;
+  }
+
+  rl_array = LoadValue(rl_array, kCoreReg);
+  if (!constant_index) {
+    rl_index = LoadValue(rl_index, kCoreReg);
+  }
+
+  RegStorage reg_ptr;
+  bool allocated_reg_ptr_temp = false;
+  if (constant_index) {
+    reg_ptr = rl_array.reg;
+  } else if (IsTemp(rl_array.reg) && !card_mark) {
+    Clobber(rl_array.reg);
+    reg_ptr = rl_array.reg;
+  } else {
+    allocated_reg_ptr_temp = true;
+    reg_ptr = AllocTemp();
+  }
+
+  /* null object? */
+  GenNullCheck(rl_array.reg, opt_flags);
+
+  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
+  RegStorage reg_len;
+  if (needs_range_check) {
+    reg_len = AllocTemp();
+    // NOTE: max live temps(4) here.
+    /* Get len */
+    Load32Disp(rl_array.reg, len_offset, reg_len);
+    MarkPossibleNullPointerException(opt_flags);
+  } else {
+    ForceImplicitNullCheck(rl_array.reg, opt_flags);
+  }
+  /* at this point, reg_ptr points to array, 2 live temps */
+  if (rl_src.wide || rl_src.fp || constant_index) {
+    if (rl_src.wide) {
+      rl_src = LoadValueWide(rl_src, reg_class);
+    } else {
+      rl_src = LoadValue(rl_src, reg_class);
+    }
+    if (!constant_index) {
+      OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kArmLsl, scale));
+    }
+    if (needs_range_check) {
+      if (constant_index) {
+        GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len);
+      } else {
+        GenArrayBoundsCheck(rl_index.reg, reg_len);
+      }
+      FreeTemp(reg_len);
+    }
+
+    if (rl_src.wide) {
+      StoreBaseDispWide(reg_ptr, data_offset, rl_src.reg);
+    } else {
+      StoreBaseDisp(reg_ptr, data_offset, rl_src.reg, size);
+    }
+    MarkPossibleNullPointerException(opt_flags);
+  } else {
+    /* reg_ptr -> array data */
+    OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
+    rl_src = LoadValue(rl_src, reg_class);
+    if (needs_range_check) {
+      GenArrayBoundsCheck(rl_index.reg, reg_len);
+      FreeTemp(reg_len);
+    }
+    StoreBaseIndexed(reg_ptr, rl_index.reg, rl_src.reg, scale, size);
+    MarkPossibleNullPointerException(opt_flags);
+  }
+  if (allocated_reg_ptr_temp) {
+    FreeTemp(reg_ptr);
+  }
+  if (card_mark) {
+    MarkGCCard(rl_src.reg, rl_array.reg);
+  }
+}
+
+
+void Arm64Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode,
+                                   RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift) {
+  rl_src = LoadValueWide(rl_src, kCoreReg);
+  // Per spec, we only care about low 6 bits of shift amount.
+  int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
+  if (shift_amount == 0) {
+    StoreValueWide(rl_dest, rl_src);
+    return;
+  }
+  if (BadOverlap(rl_src, rl_dest)) {
+    GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift);
+    return;
+  }
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  switch (opcode) {
+    case Instruction::SHL_LONG:
+    case Instruction::SHL_LONG_2ADDR:
+      if (shift_amount == 1) {
+        OpRegRegReg(kOpAdd, rl_result.reg.GetLow(), rl_src.reg.GetLow(), rl_src.reg.GetLow());
+        OpRegRegReg(kOpAdc, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), rl_src.reg.GetHigh());
+      } else if (shift_amount == 32) {
+        OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg);
+        LoadConstant(rl_result.reg.GetLow(), 0);
+      } else if (shift_amount > 31) {
+        OpRegRegImm(kOpLsl, rl_result.reg.GetHigh(), rl_src.reg.GetLow(), shift_amount - 32);
+        LoadConstant(rl_result.reg.GetLow(), 0);
+      } else {
+        OpRegRegImm(kOpLsl, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);
+        OpRegRegRegShift(kOpOr, rl_result.reg.GetHigh(), rl_result.reg.GetHigh(), rl_src.reg.GetLow(),
+                         EncodeShift(kArmLsr, 32 - shift_amount));
+        OpRegRegImm(kOpLsl, rl_result.reg.GetLow(), rl_src.reg.GetLow(), shift_amount);
+      }
+      break;
+    case Instruction::SHR_LONG:
+    case Instruction::SHR_LONG_2ADDR:
+      if (shift_amount == 32) {
+        OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
+        OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 31);
+      } else if (shift_amount > 31) {
+        OpRegRegImm(kOpAsr, rl_result.reg.GetLow(), rl_src.reg.GetHigh(), shift_amount - 32);
+        OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 31);
+      } else {
+        RegStorage t_reg = AllocTemp();
+        OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetLow(), shift_amount);
+        OpRegRegRegShift(kOpOr, rl_result.reg.GetLow(), t_reg, rl_src.reg.GetHigh(),
+                         EncodeShift(kArmLsl, 32 - shift_amount));
+        FreeTemp(t_reg);
+        OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);
+      }
+      break;
+    case Instruction::USHR_LONG:
+    case Instruction::USHR_LONG_2ADDR:
+      if (shift_amount == 32) {
+        OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
+        LoadConstant(rl_result.reg.GetHigh(), 0);
+      } else if (shift_amount > 31) {
+        OpRegRegImm(kOpLsr, rl_result.reg.GetLow(), rl_src.reg.GetHigh(), shift_amount - 32);
+        LoadConstant(rl_result.reg.GetHigh(), 0);
+      } else {
+        RegStorage t_reg = AllocTemp();
+        OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetLow(), shift_amount);
+        OpRegRegRegShift(kOpOr, rl_result.reg.GetLow(), t_reg, rl_src.reg.GetHigh(),
+                         EncodeShift(kArmLsl, 32 - shift_amount));
+        FreeTemp(t_reg);
+        OpRegRegImm(kOpLsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unexpected case";
+  }
+  StoreValueWide(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenArithImmOpLong(Instruction::Code opcode,
+                                   RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
+  if ((opcode == Instruction::SUB_LONG_2ADDR) || (opcode == Instruction::SUB_LONG)) {
+    if (!rl_src2.is_const) {
+      // Don't bother with special handling for subtract from immediate.
+      GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
+      return;
+    }
+  } else {
+    // Normalize
+    if (!rl_src2.is_const) {
+      DCHECK(rl_src1.is_const);
+      std::swap(rl_src1, rl_src2);
+    }
+  }
+  if (BadOverlap(rl_src1, rl_dest)) {
+    GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
+    return;
+  }
+  DCHECK(rl_src2.is_const);
+  int64_t val = mir_graph_->ConstantValueWide(rl_src2);
+  uint32_t val_lo = Low32Bits(val);
+  uint32_t val_hi = High32Bits(val);
+  int32_t mod_imm_lo = ModifiedImmediate(val_lo);
+  int32_t mod_imm_hi = ModifiedImmediate(val_hi);
+
+  // Only a subset of add/sub immediate instructions set carry - so bail if we don't fit
+  switch (opcode) {
+    case Instruction::ADD_LONG:
+    case Instruction::ADD_LONG_2ADDR:
+    case Instruction::SUB_LONG:
+    case Instruction::SUB_LONG_2ADDR:
+      if ((mod_imm_lo < 0) || (mod_imm_hi < 0)) {
+        GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
+        return;
+      }
+      break;
+    default:
+      break;
+  }
+  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  // NOTE: once we've done the EvalLoc on dest, we can no longer bail.
+  switch (opcode) {
+    case Instruction::ADD_LONG:
+    case Instruction::ADD_LONG_2ADDR:
+      NewLIR3(kThumb2AddRRI8M, rl_result.reg.GetLowReg(), rl_src1.reg.GetLowReg(), mod_imm_lo);
+      NewLIR3(kThumb2AdcRRI8M, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), mod_imm_hi);
+      break;
+    case Instruction::OR_LONG:
+    case Instruction::OR_LONG_2ADDR:
+      if ((val_lo != 0) || (rl_result.reg.GetLowReg() != rl_src1.reg.GetLowReg())) {
+        OpRegRegImm(kOpOr, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo);
+      }
+      if ((val_hi != 0) || (rl_result.reg.GetHighReg() != rl_src1.reg.GetHighReg())) {
+        OpRegRegImm(kOpOr, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi);
+      }
+      break;
+    case Instruction::XOR_LONG:
+    case Instruction::XOR_LONG_2ADDR:
+      OpRegRegImm(kOpXor, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo);
+      OpRegRegImm(kOpXor, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi);
+      break;
+    case Instruction::AND_LONG:
+    case Instruction::AND_LONG_2ADDR:
+      if ((val_lo != 0xffffffff) || (rl_result.reg.GetLowReg() != rl_src1.reg.GetLowReg())) {
+        OpRegRegImm(kOpAnd, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo);
+      }
+      if ((val_hi != 0xffffffff) || (rl_result.reg.GetHighReg() != rl_src1.reg.GetHighReg())) {
+        OpRegRegImm(kOpAnd, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi);
+      }
+      break;
+    case Instruction::SUB_LONG_2ADDR:
+    case Instruction::SUB_LONG:
+      NewLIR3(kThumb2SubRRI8M, rl_result.reg.GetLowReg(), rl_src1.reg.GetLowReg(), mod_imm_lo);
+      NewLIR3(kThumb2SbcRRI8M, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), mod_imm_hi);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected opcode " << opcode;
+  }
+  StoreValueWide(rl_dest, rl_result);
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
new file mode 100644
index 0000000..233e9c2
--- /dev/null
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -0,0 +1,803 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "codegen_arm64.h"
+
+#include <inttypes.h>
+
+#include <string>
+
+#include "dex/compiler_internals.h"
+#include "dex/quick/mir_to_lir-inl.h"
+
+namespace art {
+
+// TODO: rework this when c++11 support allows.
+static const RegStorage core_regs_arr[] =
+    {rs_r0, rs_r1, rs_r2, rs_r3, rs_rARM_SUSPEND, rs_r5, rs_r6, rs_r7, rs_r8, rs_rARM_SELF,
+     rs_r10, rs_r11, rs_r12, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC};
+static const RegStorage sp_regs_arr[] =
+    {rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, rs_fr8, rs_fr9, rs_fr10,
+     rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15, rs_fr16, rs_fr17, rs_fr18, rs_fr19, rs_fr20,
+     rs_fr21, rs_fr22, rs_fr23, rs_fr24, rs_fr25, rs_fr26, rs_fr27, rs_fr28, rs_fr29, rs_fr30,
+     rs_fr31};
+static const RegStorage dp_regs_arr[] =
+    {rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, rs_dr8, rs_dr9, rs_dr10,
+     rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15};
+static const RegStorage reserved_regs_arr[] =
+    {rs_rARM_SUSPEND, rs_rARM_SELF, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC};
+static const RegStorage core_temps_arr[] = {rs_r0, rs_r1, rs_r2, rs_r3, rs_r12};
+static const RegStorage sp_temps_arr[] =
+    {rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, rs_fr8, rs_fr9, rs_fr10,
+     rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15};
+static const RegStorage dp_temps_arr[] =
+    {rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7};
+
+static const std::vector<RegStorage> core_regs(core_regs_arr,
+    core_regs_arr + sizeof(core_regs_arr) / sizeof(core_regs_arr[0]));
+static const std::vector<RegStorage> sp_regs(sp_regs_arr,
+    sp_regs_arr + sizeof(sp_regs_arr) / sizeof(sp_regs_arr[0]));
+static const std::vector<RegStorage> dp_regs(dp_regs_arr,
+    dp_regs_arr + sizeof(dp_regs_arr) / sizeof(dp_regs_arr[0]));
+static const std::vector<RegStorage> reserved_regs(reserved_regs_arr,
+    reserved_regs_arr + sizeof(reserved_regs_arr) / sizeof(reserved_regs_arr[0]));
+static const std::vector<RegStorage> core_temps(core_temps_arr,
+    core_temps_arr + sizeof(core_temps_arr) / sizeof(core_temps_arr[0]));
+static const std::vector<RegStorage> sp_temps(sp_temps_arr,
+    sp_temps_arr + sizeof(sp_temps_arr) / sizeof(sp_temps_arr[0]));
+static const std::vector<RegStorage> dp_temps(dp_temps_arr,
+    dp_temps_arr + sizeof(dp_temps_arr) / sizeof(dp_temps_arr[0]));
+
+RegLocation Arm64Mir2Lir::LocCReturn() {
+  return arm_loc_c_return;
+}
+
+RegLocation Arm64Mir2Lir::LocCReturnWide() {
+  return arm_loc_c_return_wide;
+}
+
+RegLocation Arm64Mir2Lir::LocCReturnFloat() {
+  return arm_loc_c_return_float;
+}
+
+RegLocation Arm64Mir2Lir::LocCReturnDouble() {
+  return arm_loc_c_return_double;
+}
+
+// Return a target-dependent special register.
+RegStorage Arm64Mir2Lir::TargetReg(SpecialTargetRegister reg) {
+  RegStorage res_reg = RegStorage::InvalidReg();
+  switch (reg) {
+    case kSelf: res_reg = rs_rARM_SELF; break;
+    case kSuspend: res_reg =  rs_rARM_SUSPEND; break;
+    case kLr: res_reg =  rs_rARM_LR; break;
+    case kPc: res_reg =  rs_rARM_PC; break;
+    case kSp: res_reg =  rs_rARM_SP; break;
+    case kArg0: res_reg = rs_r0; break;
+    case kArg1: res_reg = rs_r1; break;
+    case kArg2: res_reg = rs_r2; break;
+    case kArg3: res_reg = rs_r3; break;
+    case kFArg0: res_reg = rs_r0; break;
+    case kFArg1: res_reg = rs_r1; break;
+    case kFArg2: res_reg = rs_r2; break;
+    case kFArg3: res_reg = rs_r3; break;
+    case kRet0: res_reg = rs_r0; break;
+    case kRet1: res_reg = rs_r1; break;
+    case kInvokeTgt: res_reg = rs_rARM_LR; break;
+    case kHiddenArg: res_reg = rs_r12; break;
+    case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break;
+    case kCount: res_reg = RegStorage::InvalidReg(); break;
+  }
+  return res_reg;
+}
+
+RegStorage Arm64Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
+  // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
+  switch (arg_num) {
+    case 0:
+      return rs_r1;
+    case 1:
+      return rs_r2;
+    case 2:
+      return rs_r3;
+    default:
+      return RegStorage::InvalidReg();
+  }
+}
+
+/*
+ * Decode the register id.
+ */
+uint64_t Arm64Mir2Lir::GetRegMaskCommon(RegStorage reg) {
+  uint64_t seed;
+  int shift;
+  int reg_id = reg.GetRegNum();
+  /* Each double register is equal to a pair of single-precision FP registers */
+  if (reg.IsDouble()) {
+    seed = 0x3;
+    reg_id = reg_id << 1;
+  } else {
+    seed = 1;
+  }
+  /* FP register starts at bit position 16 */
+  shift = reg.IsFloat() ? kArmFPReg0 : 0;
+  /* Expand the double register id into single offset */
+  shift += reg_id;
+  return (seed << shift);
+}
+
+uint64_t Arm64Mir2Lir::GetPCUseDefEncoding() {
+  return ENCODE_ARM_REG_PC;
+}
+
+// Thumb2 specific setup.  TODO: inline?:
+void Arm64Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) {
+  DCHECK_EQ(cu_->instruction_set, kThumb2);
+  DCHECK(!lir->flags.use_def_invalid);
+
+  int opcode = lir->opcode;
+
+  // These flags are somewhat uncommon - bypass if we can.
+  if ((flags & (REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0 | REG_DEF_LIST1 |
+                REG_DEF_FPCS_LIST0 | REG_DEF_FPCS_LIST2 | REG_USE_PC | IS_IT | REG_USE_LIST0 |
+                REG_USE_LIST1 | REG_USE_FPCS_LIST0 | REG_USE_FPCS_LIST2 | REG_DEF_LR)) != 0) {
+    if (flags & REG_DEF_SP) {
+      lir->u.m.def_mask |= ENCODE_ARM_REG_SP;
+    }
+
+    if (flags & REG_USE_SP) {
+      lir->u.m.use_mask |= ENCODE_ARM_REG_SP;
+    }
+
+    if (flags & REG_DEF_LIST0) {
+      lir->u.m.def_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]);
+    }
+
+    if (flags & REG_DEF_LIST1) {
+      lir->u.m.def_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]);
+    }
+
+    if (flags & REG_DEF_FPCS_LIST0) {
+      lir->u.m.def_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]);
+    }
+
+    if (flags & REG_DEF_FPCS_LIST2) {
+      for (int i = 0; i < lir->operands[2]; i++) {
+        SetupRegMask(&lir->u.m.def_mask, lir->operands[1] + i);
+      }
+    }
+
+    if (flags & REG_USE_PC) {
+      lir->u.m.use_mask |= ENCODE_ARM_REG_PC;
+    }
+
+    /* Conservatively treat the IT block */
+    if (flags & IS_IT) {
+      lir->u.m.def_mask = ENCODE_ALL;
+    }
+
+    if (flags & REG_USE_LIST0) {
+      lir->u.m.use_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]);
+    }
+
+    if (flags & REG_USE_LIST1) {
+      lir->u.m.use_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]);
+    }
+
+    if (flags & REG_USE_FPCS_LIST0) {
+      lir->u.m.use_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]);
+    }
+
+    if (flags & REG_USE_FPCS_LIST2) {
+      for (int i = 0; i < lir->operands[2]; i++) {
+        SetupRegMask(&lir->u.m.use_mask, lir->operands[1] + i);
+      }
+    }
+    /* Fixup for kThumbPush/lr and kThumbPop/pc */
+    if (opcode == kThumbPush || opcode == kThumbPop) {
+      uint64_t r8Mask = GetRegMaskCommon(rs_r8);
+      if ((opcode == kThumbPush) && (lir->u.m.use_mask & r8Mask)) {
+        lir->u.m.use_mask &= ~r8Mask;
+        lir->u.m.use_mask |= ENCODE_ARM_REG_LR;
+      } else if ((opcode == kThumbPop) && (lir->u.m.def_mask & r8Mask)) {
+        lir->u.m.def_mask &= ~r8Mask;
+        lir->u.m.def_mask |= ENCODE_ARM_REG_PC;
+      }
+    }
+    if (flags & REG_DEF_LR) {
+      lir->u.m.def_mask |= ENCODE_ARM_REG_LR;
+    }
+  }
+}
+
+ArmConditionCode Arm64Mir2Lir::ArmConditionEncoding(ConditionCode ccode) {
+  ArmConditionCode res;
+  switch (ccode) {
+    case kCondEq: res = kArmCondEq; break;
+    case kCondNe: res = kArmCondNe; break;
+    case kCondCs: res = kArmCondCs; break;
+    case kCondCc: res = kArmCondCc; break;
+    case kCondUlt: res = kArmCondCc; break;
+    case kCondUge: res = kArmCondCs; break;
+    case kCondMi: res = kArmCondMi; break;
+    case kCondPl: res = kArmCondPl; break;
+    case kCondVs: res = kArmCondVs; break;
+    case kCondVc: res = kArmCondVc; break;
+    case kCondHi: res = kArmCondHi; break;
+    case kCondLs: res = kArmCondLs; break;
+    case kCondGe: res = kArmCondGe; break;
+    case kCondLt: res = kArmCondLt; break;
+    case kCondGt: res = kArmCondGt; break;
+    case kCondLe: res = kArmCondLe; break;
+    case kCondAl: res = kArmCondAl; break;
+    case kCondNv: res = kArmCondNv; break;
+    default:
+      LOG(FATAL) << "Bad condition code " << ccode;
+      res = static_cast<ArmConditionCode>(0);  // Quiet gcc
+  }
+  return res;
+}
+
+static const char* core_reg_names[16] = {
+  "r0",
+  "r1",
+  "r2",
+  "r3",
+  "r4",
+  "r5",
+  "r6",
+  "r7",
+  "r8",
+  "rSELF",
+  "r10",
+  "r11",
+  "r12",
+  "sp",
+  "lr",
+  "pc",
+};
+
+
+static const char* shift_names[4] = {
+  "lsl",
+  "lsr",
+  "asr",
+  "ror"};
+
+/* Decode and print a ARM register name */
+static char* DecodeRegList(int opcode, int vector, char* buf, size_t buf_size) {
+  int i;
+  bool printed = false;
+  buf[0] = 0;
+  for (i = 0; i < 16; i++, vector >>= 1) {
+    if (vector & 0x1) {
+      int reg_id = i;
+      if (opcode == kThumbPush && i == 8) {
+        reg_id = rs_rARM_LR.GetRegNum();
+      } else if (opcode == kThumbPop && i == 8) {
+        reg_id = rs_rARM_PC.GetRegNum();
+      }
+      if (printed) {
+        snprintf(buf + strlen(buf), buf_size - strlen(buf), ", r%d", reg_id);
+      } else {
+        printed = true;
+        snprintf(buf, buf_size, "r%d", reg_id);
+      }
+    }
+  }
+  return buf;
+}
+
+static char*  DecodeFPCSRegList(int count, int base, char* buf, size_t buf_size) {
+  snprintf(buf, buf_size, "s%d", base);
+  for (int i = 1; i < count; i++) {
+    snprintf(buf + strlen(buf), buf_size - strlen(buf), ", s%d", base + i);
+  }
+  return buf;
+}
+
+static int32_t ExpandImmediate(int value) {
+  int32_t mode = (value & 0xf00) >> 8;
+  uint32_t bits = value & 0xff;
+  switch (mode) {
+    case 0:
+      return bits;
+     case 1:
+      return (bits << 16) | bits;
+     case 2:
+      return (bits << 24) | (bits << 8);
+     case 3:
+      return (bits << 24) | (bits << 16) | (bits << 8) | bits;
+    default:
+      break;
+  }
+  bits = (bits | 0x80) << 24;
+  return bits >> (((value & 0xf80) >> 7) - 8);
+}
+
+const char* cc_names[] = {"eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
+                         "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"};
+/*
+ * Interpret a format string and build a string no longer than size
+ * See format key in Assemble.c.
+ */
+std::string Arm64Mir2Lir::BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr) {
+  std::string buf;
+  int i;
+  const char* fmt_end = &fmt[strlen(fmt)];
+  char tbuf[256];
+  const char* name;
+  char nc;
+  while (fmt < fmt_end) {
+    int operand;
+    if (*fmt == '!') {
+      fmt++;
+      DCHECK_LT(fmt, fmt_end);
+      nc = *fmt++;
+      if (nc == '!') {
+        strcpy(tbuf, "!");
+      } else {
+         DCHECK_LT(fmt, fmt_end);
+         DCHECK_LT(static_cast<unsigned>(nc-'0'), 4U);
+         operand = lir->operands[nc-'0'];
+         switch (*fmt++) {
+           case 'H':
+             if (operand != 0) {
+               snprintf(tbuf, arraysize(tbuf), ", %s %d", shift_names[operand & 0x3], operand >> 2);
+             } else {
+               strcpy(tbuf, "");
+             }
+             break;
+           case 'B':
+             switch (operand) {
+               case kSY:
+                 name = "sy";
+                 break;
+               case kST:
+                 name = "st";
+                 break;
+               case kISH:
+                 name = "ish";
+                 break;
+               case kISHST:
+                 name = "ishst";
+                 break;
+               case kNSH:
+                 name = "nsh";
+                 break;
+               case kNSHST:
+                 name = "shst";
+                 break;
+               default:
+                 name = "DecodeError2";
+                 break;
+             }
+             strcpy(tbuf, name);
+             break;
+           case 'b':
+             strcpy(tbuf, "0000");
+             for (i = 3; i >= 0; i--) {
+               tbuf[i] += operand & 1;
+               operand >>= 1;
+             }
+             break;
+           case 'n':
+             operand = ~ExpandImmediate(operand);
+             snprintf(tbuf, arraysize(tbuf), "%d [%#x]", operand, operand);
+             break;
+           case 'm':
+             operand = ExpandImmediate(operand);
+             snprintf(tbuf, arraysize(tbuf), "%d [%#x]", operand, operand);
+             break;
+           case 's':
+             snprintf(tbuf, arraysize(tbuf), "s%d", RegStorage::RegNum(operand));
+             break;
+           case 'S':
+             snprintf(tbuf, arraysize(tbuf), "d%d", RegStorage::RegNum(operand));
+             break;
+           case 'h':
+             snprintf(tbuf, arraysize(tbuf), "%04x", operand);
+             break;
+           case 'M':
+           case 'd':
+             snprintf(tbuf, arraysize(tbuf), "%d", operand);
+             break;
+           case 'C':
+             operand = RegStorage::RegNum(operand);
+             DCHECK_LT(operand, static_cast<int>(
+                 sizeof(core_reg_names)/sizeof(core_reg_names[0])));
+             snprintf(tbuf, arraysize(tbuf), "%s", core_reg_names[operand]);
+             break;
+           case 'E':
+             snprintf(tbuf, arraysize(tbuf), "%d", operand*4);
+             break;
+           case 'F':
+             snprintf(tbuf, arraysize(tbuf), "%d", operand*2);
+             break;
+           case 'c':
+             strcpy(tbuf, cc_names[operand]);
+             break;
+           case 't':
+             snprintf(tbuf, arraysize(tbuf), "0x%08" PRIxPTR " (L%p)",
+                 reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4 + (operand << 1),
+                 lir->target);
+             break;
+           case 'u': {
+             int offset_1 = lir->operands[0];
+             int offset_2 = NEXT_LIR(lir)->operands[0];
+             uintptr_t target =
+                 (((reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4) &
+                 ~3) + (offset_1 << 21 >> 9) + (offset_2 << 1)) &
+                 0xfffffffc;
+             snprintf(tbuf, arraysize(tbuf), "%p", reinterpret_cast<void *>(target));
+             break;
+          }
+
+           /* Nothing to print for BLX_2 */
+           case 'v':
+             strcpy(tbuf, "see above");
+             break;
+           case 'R':
+             DecodeRegList(lir->opcode, operand, tbuf, arraysize(tbuf));
+             break;
+           case 'P':
+             DecodeFPCSRegList(operand, 16, tbuf, arraysize(tbuf));
+             break;
+           case 'Q':
+             DecodeFPCSRegList(operand, 0, tbuf, arraysize(tbuf));
+             break;
+           default:
+             strcpy(tbuf, "DecodeError1");
+             break;
+        }
+        buf += tbuf;
+      }
+    } else {
+       buf += *fmt++;
+    }
+  }
+  return buf;
+}
+
+void Arm64Mir2Lir::DumpResourceMask(LIR* arm_lir, uint64_t mask, const char* prefix) {
+  char buf[256];
+  buf[0] = 0;
+
+  if (mask == ENCODE_ALL) {
+    strcpy(buf, "all");
+  } else {
+    char num[8];
+    int i;
+
+    for (i = 0; i < kArmRegEnd; i++) {
+      if (mask & (1ULL << i)) {
+        snprintf(num, arraysize(num), "%d ", i);
+        strcat(buf, num);
+      }
+    }
+
+    if (mask & ENCODE_CCODE) {
+      strcat(buf, "cc ");
+    }
+    if (mask & ENCODE_FP_STATUS) {
+      strcat(buf, "fpcc ");
+    }
+
+    /* Memory bits */
+    if (arm_lir && (mask & ENCODE_DALVIK_REG)) {
+      snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s",
+               DECODE_ALIAS_INFO_REG(arm_lir->flags.alias_info),
+               DECODE_ALIAS_INFO_WIDE(arm_lir->flags.alias_info) ? "(+1)" : "");
+    }
+    if (mask & ENCODE_LITERAL) {
+      strcat(buf, "lit ");
+    }
+
+    if (mask & ENCODE_HEAP_REF) {
+      strcat(buf, "heap ");
+    }
+    if (mask & ENCODE_MUST_NOT_ALIAS) {
+      strcat(buf, "noalias ");
+    }
+  }
+  if (buf[0]) {
+    LOG(INFO) << prefix << ": " << buf;
+  }
+}
+
+bool Arm64Mir2Lir::IsUnconditionalBranch(LIR* lir) {
+  return ((lir->opcode == kThumbBUncond) || (lir->opcode == kThumb2BUncond));
+}
+
+Arm64Mir2Lir::Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
+    : Mir2Lir(cu, mir_graph, arena) {
+  // Sanity check - make sure encoding map lines up.
+  for (int i = 0; i < kArmLast; i++) {
+    if (Arm64Mir2Lir::EncodingMap[i].opcode != i) {
+      LOG(FATAL) << "Encoding order for " << Arm64Mir2Lir::EncodingMap[i].name
+                 << " is wrong: expecting " << i << ", seeing "
+                 << static_cast<int>(Arm64Mir2Lir::EncodingMap[i].opcode);
+    }
+  }
+}
+
+Mir2Lir* ArmCodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
+                          ArenaAllocator* const arena) {
+  return new Arm64Mir2Lir(cu, mir_graph, arena);
+}
+
+// Alloc a pair of core registers, or a double.
+RegStorage Arm64Mir2Lir::AllocTypedTempWide(bool fp_hint, int reg_class) {
+  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
+    return AllocTempDouble();
+  } else {
+    RegStorage low_reg = AllocTemp();
+    RegStorage high_reg = AllocTemp();
+    return RegStorage::MakeRegPair(low_reg, high_reg);
+  }
+}
+
+RegStorage Arm64Mir2Lir::AllocTypedTemp(bool fp_hint, int reg_class) {
+  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg))
+    return AllocTempSingle();
+  return AllocTemp();
+}
+
+void Arm64Mir2Lir::CompilerInitializeRegAlloc() {
+  reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs, sp_regs, dp_regs, reserved_regs,
+                                        core_temps, sp_temps, dp_temps);
+
+  // Target-specific adjustments.
+
+  // Alias single precision floats to appropriate half of overlapping double.
+  GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->sp_regs_);
+  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
+    int sp_reg_num = info->GetReg().GetRegNum();
+    int dp_reg_num = sp_reg_num >> 1;
+    RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | dp_reg_num);
+    RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
+    // Double precision register's master storage should refer to itself.
+    DCHECK_EQ(dp_reg_info, dp_reg_info->Master());
+    // Redirect single precision's master storage to master.
+    info->SetMaster(dp_reg_info);
+    // Singles should show a single 32-bit mask bit, at first referring to the low half.
+    DCHECK_EQ(info->StorageMask(), 0x1U);
+    if (sp_reg_num & 1) {
+      // For odd singles, change to user the high word of the backing double.
+      info->SetStorageMask(0x2);
+    }
+  }
+
+  // TODO: re-enable this when we can safely save r4 over the suspension code path.
+  bool no_suspend = NO_SUSPEND;  // || !Runtime::Current()->ExplicitSuspendChecks();
+  if (no_suspend) {
+    GetRegInfo(rs_rARM_SUSPEND)->MarkFree();
+  }
+
+  // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods.
+  // TODO: adjust when we roll to hard float calling convention.
+  reg_pool_->next_core_reg_ = 2;
+  reg_pool_->next_sp_reg_ = 0;
+  reg_pool_->next_dp_reg_ = 0;
+}
+
+void Arm64Mir2Lir::FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) {
+  DCHECK(rl_keep.wide);
+  DCHECK(rl_free.wide);
+  if ((rl_free.reg.GetLowReg() != rl_keep.reg.GetLowReg()) &&
+      (rl_free.reg.GetLowReg() != rl_keep.reg.GetHighReg()) &&
+      (rl_free.reg.GetHighReg() != rl_keep.reg.GetLowReg()) &&
+      (rl_free.reg.GetHighReg() != rl_keep.reg.GetHighReg())) {
+    // No overlap, free.
+    FreeTemp(rl_free.reg);
+  }
+}
+
+/*
+ * TUNING: is true leaf?  Can't just use METHOD_IS_LEAF to determine as some
+ * instructions might call out to C/assembly helper functions.  Until
+ * machinery is in place, always spill lr.
+ */
+
+void Arm64Mir2Lir::AdjustSpillMask() {
+  core_spill_mask_ |= (1 << rs_rARM_LR.GetRegNum());
+  num_core_spills_++;
+}
+
+/*
+ * Mark a callee-save fp register as promoted.  Note that
+ * vpush/vpop uses contiguous register lists so we must
+ * include any holes in the mask.  Associate holes with
+ * Dalvik register INVALID_VREG (0xFFFFU).
+ */
+void Arm64Mir2Lir::MarkPreservedSingle(int v_reg, RegStorage reg) {
+  DCHECK_GE(reg.GetRegNum(), ARM_FP_CALLEE_SAVE_BASE);
+  int adjusted_reg_num = reg.GetRegNum() - ARM_FP_CALLEE_SAVE_BASE;
+  // Ensure fp_vmap_table is large enough
+  int table_size = fp_vmap_table_.size();
+  for (int i = table_size; i < (adjusted_reg_num + 1); i++) {
+    fp_vmap_table_.push_back(INVALID_VREG);
+  }
+  // Add the current mapping
+  fp_vmap_table_[adjusted_reg_num] = v_reg;
+  // Size of fp_vmap_table is high-water mark, use to set mask
+  num_fp_spills_ = fp_vmap_table_.size();
+  fp_spill_mask_ = ((1 << num_fp_spills_) - 1) << ARM_FP_CALLEE_SAVE_BASE;
+}
+
+void Arm64Mir2Lir::MarkPreservedDouble(int v_reg, RegStorage reg) {
+  // TEMP: perform as 2 singles.
+  int reg_num = reg.GetRegNum() << 1;
+  RegStorage lo = RegStorage::Solo32(RegStorage::kFloatingPoint | reg_num);
+  RegStorage hi = RegStorage::Solo32(RegStorage::kFloatingPoint | reg_num | 1);
+  MarkPreservedSingle(v_reg, lo);
+  MarkPreservedSingle(v_reg + 1, hi);
+}
+
+/* Clobber all regs that might be used by an external C call */
+void Arm64Mir2Lir::ClobberCallerSave() {
+  // TODO: rework this - it's gotten even more ugly.
+  Clobber(rs_r0);
+  Clobber(rs_r1);
+  Clobber(rs_r2);
+  Clobber(rs_r3);
+  Clobber(rs_r12);
+  Clobber(rs_r14lr);
+  Clobber(rs_fr0);
+  Clobber(rs_fr1);
+  Clobber(rs_fr2);
+  Clobber(rs_fr3);
+  Clobber(rs_fr4);
+  Clobber(rs_fr5);
+  Clobber(rs_fr6);
+  Clobber(rs_fr7);
+  Clobber(rs_fr8);
+  Clobber(rs_fr9);
+  Clobber(rs_fr10);
+  Clobber(rs_fr11);
+  Clobber(rs_fr12);
+  Clobber(rs_fr13);
+  Clobber(rs_fr14);
+  Clobber(rs_fr15);
+  Clobber(rs_dr0);
+  Clobber(rs_dr1);
+  Clobber(rs_dr2);
+  Clobber(rs_dr3);
+  Clobber(rs_dr4);
+  Clobber(rs_dr5);
+  Clobber(rs_dr6);
+  Clobber(rs_dr7);
+}
+
+RegLocation Arm64Mir2Lir::GetReturnWideAlt() {
+  RegLocation res = LocCReturnWide();
+  res.reg.SetLowReg(rs_r2.GetReg());
+  res.reg.SetHighReg(rs_r3.GetReg());
+  Clobber(rs_r2);
+  Clobber(rs_r3);
+  MarkInUse(rs_r2);
+  MarkInUse(rs_r3);
+  MarkWide(res.reg);
+  return res;
+}
+
+RegLocation Arm64Mir2Lir::GetReturnAlt() {
+  RegLocation res = LocCReturn();
+  res.reg.SetReg(rs_r1.GetReg());
+  Clobber(rs_r1);
+  MarkInUse(rs_r1);
+  return res;
+}
+
+/* To be used when explicitly managing register use */
+void Arm64Mir2Lir::LockCallTemps() {
+  LockTemp(rs_r0);
+  LockTemp(rs_r1);
+  LockTemp(rs_r2);
+  LockTemp(rs_r3);
+}
+
+/* To be used when explicitly managing register use */
+void Arm64Mir2Lir::FreeCallTemps() {
+  FreeTemp(rs_r0);
+  FreeTemp(rs_r1);
+  FreeTemp(rs_r2);
+  FreeTemp(rs_r3);
+}
+
+RegStorage Arm64Mir2Lir::LoadHelper(ThreadOffset<4> offset) {
+  LoadWordDisp(rs_rARM_SELF, offset.Int32Value(), rs_rARM_LR);
+  return rs_rARM_LR;
+}
+
+LIR* Arm64Mir2Lir::CheckSuspendUsingLoad() {
+  RegStorage tmp = rs_r0;
+  Load32Disp(rs_rARM_SELF, Thread::ThreadSuspendTriggerOffset<4>().Int32Value(), tmp);
+  LIR* load2 = Load32Disp(tmp, 0, tmp);
+  return load2;
+}
+
+uint64_t Arm64Mir2Lir::GetTargetInstFlags(int opcode) {
+  DCHECK(!IsPseudoLirOp(opcode));
+  return Arm64Mir2Lir::EncodingMap[opcode].flags;
+}
+
+const char* Arm64Mir2Lir::GetTargetInstName(int opcode) {
+  DCHECK(!IsPseudoLirOp(opcode));
+  return Arm64Mir2Lir::EncodingMap[opcode].name;
+}
+
+const char* Arm64Mir2Lir::GetTargetInstFmt(int opcode) {
+  DCHECK(!IsPseudoLirOp(opcode));
+  return Arm64Mir2Lir::EncodingMap[opcode].fmt;
+}
+
+/*
+ * Somewhat messy code here.  We want to allocate a pair of contiguous
+ * physical single-precision floating point registers starting with
+ * an even numbered reg.  It is possible that the paired s_reg (s_reg+1)
+ * has already been allocated - try to fit if possible.  Fail to
+ * allocate if we can't meet the requirements for the pair of
+ * s_reg<=sX[even] & (s_reg+1)<= sX+1.
+ */
+// TODO: needs rewrite to support non-backed 64-bit float regs.
+RegStorage Arm64Mir2Lir::AllocPreservedDouble(int s_reg) {
+  RegStorage res;
+  int v_reg = mir_graph_->SRegToVReg(s_reg);
+  int p_map_idx = SRegToPMap(s_reg);
+  if (promotion_map_[p_map_idx+1].fp_location == kLocPhysReg) {
+    // Upper reg is already allocated.  Can we fit?
+    int high_reg = promotion_map_[p_map_idx+1].FpReg;
+    if ((high_reg & 1) == 0) {
+      // High reg is even - fail.
+      return res;  // Invalid.
+    }
+    // Is the low reg of the pair free?
+    // FIXME: rework.
+    RegisterInfo* p = GetRegInfo(RegStorage::FloatSolo32(high_reg - 1));
+    if (p->InUse() || p->IsTemp()) {
+      // Already allocated or not preserved - fail.
+      return res;  // Invalid.
+    }
+    // OK - good to go.
+    res = RegStorage::FloatSolo64(p->GetReg().GetRegNum() >> 1);
+    p->MarkInUse();
+    MarkPreservedSingle(v_reg, p->GetReg());
+  } else {
+    /*
+     * TODO: until runtime support is in, make sure we avoid promoting the same vreg to
+     * different underlying physical registers.
+     */
+    GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->dp_regs_);
+    for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
+      if (!info->IsTemp() && !info->InUse()) {
+        res = info->GetReg();
+        info->MarkInUse();
+        MarkPreservedDouble(v_reg, info->GetReg());
+        break;
+      }
+    }
+  }
+  if (res.Valid()) {
+    promotion_map_[p_map_idx].fp_location = kLocPhysReg;
+    promotion_map_[p_map_idx].FpReg = res.DoubleToLowSingle().GetReg();
+    promotion_map_[p_map_idx+1].fp_location = kLocPhysReg;
+    promotion_map_[p_map_idx+1].FpReg = res.DoubleToHighSingle().GetReg();
+  }
+  return res;
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
new file mode 100644
index 0000000..d66b834
--- /dev/null
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -0,0 +1,1149 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm64_lir.h"
+#include "codegen_arm64.h"
+#include "dex/quick/mir_to_lir-inl.h"
+
+namespace art {
+
+/* This file contains codegen for the Thumb ISA. */
+
+static int32_t EncodeImmSingle(int32_t value) {
+  int32_t res;
+  int32_t bit_a =  (value & 0x80000000) >> 31;
+  int32_t not_bit_b = (value & 0x40000000) >> 30;
+  int32_t bit_b =  (value & 0x20000000) >> 29;
+  int32_t b_smear =  (value & 0x3e000000) >> 25;
+  int32_t slice =   (value & 0x01f80000) >> 19;
+  int32_t zeroes =  (value & 0x0007ffff);
+  if (zeroes != 0)
+    return -1;
+  if (bit_b) {
+    if ((not_bit_b != 0) || (b_smear != 0x1f))
+      return -1;
+  } else {
+    if ((not_bit_b != 1) || (b_smear != 0x0))
+      return -1;
+  }
+  res = (bit_a << 7) | (bit_b << 6) | slice;
+  return res;
+}
+
+/*
+ * Determine whether value can be encoded as a Thumb2 floating point
+ * immediate.  If not, return -1.  If so return encoded 8-bit value.
+ */
+static int32_t EncodeImmDouble(int64_t value) {
+  int32_t res;
+  int32_t bit_a = (value & INT64_C(0x8000000000000000)) >> 63;
+  int32_t not_bit_b = (value & INT64_C(0x4000000000000000)) >> 62;
+  int32_t bit_b = (value & INT64_C(0x2000000000000000)) >> 61;
+  int32_t b_smear = (value & INT64_C(0x3fc0000000000000)) >> 54;
+  int32_t slice =  (value & INT64_C(0x003f000000000000)) >> 48;
+  uint64_t zeroes = (value & INT64_C(0x0000ffffffffffff));
+  if (zeroes != 0ull)
+    return -1;
+  if (bit_b) {
+    if ((not_bit_b != 0) || (b_smear != 0xff))
+      return -1;
+  } else {
+    if ((not_bit_b != 1) || (b_smear != 0x0))
+      return -1;
+  }
+  res = (bit_a << 7) | (bit_b << 6) | slice;
+  return res;
+}
+
+LIR* Arm64Mir2Lir::LoadFPConstantValue(int r_dest, int value) {
+  DCHECK(RegStorage::IsSingle(r_dest));
+  if (value == 0) {
+    // TODO: we need better info about the target CPU.  a vector exclusive or
+    //       would probably be better here if we could rely on its existance.
+    // Load an immediate +2.0 (which encodes to 0)
+    NewLIR2(kThumb2Vmovs_IMM8, r_dest, 0);
+    // +0.0 = +2.0 - +2.0
+    return NewLIR3(kThumb2Vsubs, r_dest, r_dest, r_dest);
+  } else {
+    int encoded_imm = EncodeImmSingle(value);
+    if (encoded_imm >= 0) {
+      return NewLIR2(kThumb2Vmovs_IMM8, r_dest, encoded_imm);
+    }
+  }
+  LIR* data_target = ScanLiteralPool(literal_list_, value, 0);
+  if (data_target == NULL) {
+    data_target = AddWordData(&literal_list_, value);
+  }
+  LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kThumb2Vldrs,
+                          r_dest, rs_r15pc.GetReg(), 0, 0, 0, data_target);
+  SetMemRefType(load_pc_rel, true, kLiteral);
+  AppendLIR(load_pc_rel);
+  return load_pc_rel;
+}
+
+static int LeadingZeros(uint32_t val) {
+  uint32_t alt;
+  int32_t n;
+  int32_t count;
+
+  count = 16;
+  n = 32;
+  do {
+    alt = val >> count;
+    if (alt != 0) {
+      n = n - count;
+      val = alt;
+    }
+    count >>= 1;
+  } while (count);
+  return n - val;
+}
+
+/*
+ * Determine whether value can be encoded as a Thumb2 modified
+ * immediate.  If not, return -1.  If so, return i:imm3:a:bcdefgh form.
+ */
+int Arm64Mir2Lir::ModifiedImmediate(uint32_t value) {
+  int32_t z_leading;
+  int32_t z_trailing;
+  uint32_t b0 = value & 0xff;
+
+  /* Note: case of value==0 must use 0:000:0:0000000 encoding */
+  if (value <= 0xFF)
+    return b0;  // 0:000:a:bcdefgh
+  if (value == ((b0 << 16) | b0))
+    return (0x1 << 8) | b0; /* 0:001:a:bcdefgh */
+  if (value == ((b0 << 24) | (b0 << 16) | (b0 << 8) | b0))
+    return (0x3 << 8) | b0; /* 0:011:a:bcdefgh */
+  b0 = (value >> 8) & 0xff;
+  if (value == ((b0 << 24) | (b0 << 8)))
+    return (0x2 << 8) | b0; /* 0:010:a:bcdefgh */
+  /* Can we do it with rotation? */
+  z_leading = LeadingZeros(value);
+  z_trailing = 32 - LeadingZeros(~value & (value - 1));
+  /* A run of eight or fewer active bits? */
+  if ((z_leading + z_trailing) < 24)
+    return -1;  /* No - bail */
+  /* left-justify the constant, discarding msb (known to be 1) */
+  value <<= z_leading + 1;
+  /* Create bcdefgh */
+  value >>= 25;
+  /* Put it all together */
+  return value | ((0x8 + z_leading) << 7); /* [01000..11111]:bcdefgh */
+}
+
+bool Arm64Mir2Lir::InexpensiveConstantInt(int32_t value) {
+  return (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0);
+}
+
+bool Arm64Mir2Lir::InexpensiveConstantFloat(int32_t value) {
+  return EncodeImmSingle(value) >= 0;
+}
+
+bool Arm64Mir2Lir::InexpensiveConstantLong(int64_t value) {
+  return InexpensiveConstantInt(High32Bits(value)) && InexpensiveConstantInt(Low32Bits(value));
+}
+
+bool Arm64Mir2Lir::InexpensiveConstantDouble(int64_t value) {
+  return EncodeImmDouble(value) >= 0;
+}
+
+/*
+ * Load a immediate using a shortcut if possible; otherwise
+ * grab from the per-translation literal pool.
+ *
+ * No additional register clobbering operation performed. Use this version when
+ * 1) r_dest is freshly returned from AllocTemp or
+ * 2) The codegen is under fixed register usage
+ */
+LIR* Arm64Mir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) {
+  LIR* res;
+  int mod_imm;
+
+  if (r_dest.IsFloat()) {
+    return LoadFPConstantValue(r_dest.GetReg(), value);
+  }
+
+  /* See if the value can be constructed cheaply */
+  if (r_dest.Low8() && (value >= 0) && (value <= 255)) {
+    return NewLIR2(kThumbMovImm, r_dest.GetReg(), value);
+  }
+  /* Check Modified immediate special cases */
+  mod_imm = ModifiedImmediate(value);
+  if (mod_imm >= 0) {
+    res = NewLIR2(kThumb2MovI8M, r_dest.GetReg(), mod_imm);
+    return res;
+  }
+  mod_imm = ModifiedImmediate(~value);
+  if (mod_imm >= 0) {
+    res = NewLIR2(kThumb2MvnI8M, r_dest.GetReg(), mod_imm);
+    return res;
+  }
+  /* 16-bit immediate? */
+  if ((value & 0xffff) == value) {
+    res = NewLIR2(kThumb2MovImm16, r_dest.GetReg(), value);
+    return res;
+  }
+  /* Do a low/high pair */
+  res = NewLIR2(kThumb2MovImm16, r_dest.GetReg(), Low16Bits(value));
+  NewLIR2(kThumb2MovImm16H, r_dest.GetReg(), High16Bits(value));
+  return res;
+}
+
+LIR* Arm64Mir2Lir::OpUnconditionalBranch(LIR* target) {
+  LIR* res = NewLIR1(kThumbBUncond, 0 /* offset to be patched  during assembly */);
+  res->target = target;
+  return res;
+}
+
+LIR* Arm64Mir2Lir::OpCondBranch(ConditionCode cc, LIR* target) {
+  // This is kThumb2BCond instead of kThumbBCond for performance reasons. The assembly
+  // time required for a new pass after kThumbBCond is fixed up to kThumb2BCond is
+  // substantial.
+  LIR* branch = NewLIR2(kThumb2BCond, 0 /* offset to be patched */,
+                        ArmConditionEncoding(cc));
+  branch->target = target;
+  return branch;
+}
+
+LIR* Arm64Mir2Lir::OpReg(OpKind op, RegStorage r_dest_src) {
+  ArmOpcode opcode = kThumbBkpt;
+  switch (op) {
+    case kOpBlx:
+      opcode = kThumbBlxR;
+      break;
+    case kOpBx:
+      opcode = kThumbBx;
+      break;
+    default:
+      LOG(FATAL) << "Bad opcode " << op;
+  }
+  return NewLIR1(opcode, r_dest_src.GetReg());
+}
+
+LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2,
+                               int shift) {
+  bool thumb_form =
+      ((shift == 0) && r_dest_src1.Low8() && r_src2.Low8());
+  ArmOpcode opcode = kThumbBkpt;
+  switch (op) {
+    case kOpAdc:
+      opcode = (thumb_form) ? kThumbAdcRR : kThumb2AdcRRR;
+      break;
+    case kOpAnd:
+      opcode = (thumb_form) ? kThumbAndRR : kThumb2AndRRR;
+      break;
+    case kOpBic:
+      opcode = (thumb_form) ? kThumbBicRR : kThumb2BicRRR;
+      break;
+    case kOpCmn:
+      DCHECK_EQ(shift, 0);
+      opcode = (thumb_form) ? kThumbCmnRR : kThumb2CmnRR;
+      break;
+    case kOpCmp:
+      if (thumb_form)
+        opcode = kThumbCmpRR;
+      else if ((shift == 0) && !r_dest_src1.Low8() && !r_src2.Low8())
+        opcode = kThumbCmpHH;
+      else if ((shift == 0) && r_dest_src1.Low8())
+        opcode = kThumbCmpLH;
+      else if (shift == 0)
+        opcode = kThumbCmpHL;
+      else
+        opcode = kThumb2CmpRR;
+      break;
+    case kOpXor:
+      opcode = (thumb_form) ? kThumbEorRR : kThumb2EorRRR;
+      break;
+    case kOpMov:
+      DCHECK_EQ(shift, 0);
+      if (r_dest_src1.Low8() && r_src2.Low8())
+        opcode = kThumbMovRR;
+      else if (!r_dest_src1.Low8() && !r_src2.Low8())
+        opcode = kThumbMovRR_H2H;
+      else if (r_dest_src1.Low8())
+        opcode = kThumbMovRR_H2L;
+      else
+        opcode = kThumbMovRR_L2H;
+      break;
+    case kOpMul:
+      DCHECK_EQ(shift, 0);
+      opcode = (thumb_form) ? kThumbMul : kThumb2MulRRR;
+      break;
+    case kOpMvn:
+      opcode = (thumb_form) ? kThumbMvn : kThumb2MnvRR;
+      break;
+    case kOpNeg:
+      DCHECK_EQ(shift, 0);
+      opcode = (thumb_form) ? kThumbNeg : kThumb2NegRR;
+      break;
+    case kOpOr:
+      opcode = (thumb_form) ? kThumbOrr : kThumb2OrrRRR;
+      break;
+    case kOpSbc:
+      opcode = (thumb_form) ? kThumbSbc : kThumb2SbcRRR;
+      break;
+    case kOpTst:
+      opcode = (thumb_form) ? kThumbTst : kThumb2TstRR;
+      break;
+    case kOpLsl:
+      DCHECK_EQ(shift, 0);
+      opcode = (thumb_form) ? kThumbLslRR : kThumb2LslRRR;
+      break;
+    case kOpLsr:
+      DCHECK_EQ(shift, 0);
+      opcode = (thumb_form) ? kThumbLsrRR : kThumb2LsrRRR;
+      break;
+    case kOpAsr:
+      DCHECK_EQ(shift, 0);
+      opcode = (thumb_form) ? kThumbAsrRR : kThumb2AsrRRR;
+      break;
+    case kOpRor:
+      DCHECK_EQ(shift, 0);
+      opcode = (thumb_form) ? kThumbRorRR : kThumb2RorRRR;
+      break;
+    case kOpAdd:
+      opcode = (thumb_form) ? kThumbAddRRR : kThumb2AddRRR;
+      break;
+    case kOpSub:
+      opcode = (thumb_form) ? kThumbSubRRR : kThumb2SubRRR;
+      break;
+    case kOpRev:
+      DCHECK_EQ(shift, 0);
+      if (!thumb_form) {
+        // Binary, but rm is encoded twice.
+        return NewLIR3(kThumb2RevRR, r_dest_src1.GetReg(), r_src2.GetReg(), r_src2.GetReg());
+      }
+      opcode = kThumbRev;
+      break;
+    case kOpRevsh:
+      DCHECK_EQ(shift, 0);
+      if (!thumb_form) {
+        // Binary, but rm is encoded twice.
+        return NewLIR3(kThumb2RevshRR, r_dest_src1.GetReg(), r_src2.GetReg(), r_src2.GetReg());
+      }
+      opcode = kThumbRevsh;
+      break;
+    case kOp2Byte:
+      DCHECK_EQ(shift, 0);
+      return NewLIR4(kThumb2Sbfx, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 8);
+    case kOp2Short:
+      DCHECK_EQ(shift, 0);
+      return NewLIR4(kThumb2Sbfx, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 16);
+    case kOp2Char:
+      DCHECK_EQ(shift, 0);
+      return NewLIR4(kThumb2Ubfx, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 16);
+    default:
+      LOG(FATAL) << "Bad opcode: " << op;
+      break;
+  }
+  DCHECK(!IsPseudoLirOp(opcode));
+  if (EncodingMap[opcode].flags & IS_BINARY_OP) {
+    return NewLIR2(opcode, r_dest_src1.GetReg(), r_src2.GetReg());
+  } else if (EncodingMap[opcode].flags & IS_TERTIARY_OP) {
+    if (EncodingMap[opcode].field_loc[2].kind == kFmtShift) {
+      return NewLIR3(opcode, r_dest_src1.GetReg(), r_src2.GetReg(), shift);
+    } else {
+      return NewLIR3(opcode, r_dest_src1.GetReg(), r_dest_src1.GetReg(), r_src2.GetReg());
+    }
+  } else if (EncodingMap[opcode].flags & IS_QUAD_OP) {
+    return NewLIR4(opcode, r_dest_src1.GetReg(), r_dest_src1.GetReg(), r_src2.GetReg(), shift);
+  } else {
+    LOG(FATAL) << "Unexpected encoding operand count";
+    return NULL;
+  }
+}
+
+LIR* Arm64Mir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) {
+  return OpRegRegShift(op, r_dest_src1, r_src2, 0);
+}
+
+LIR* Arm64Mir2Lir::OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type) {
+  UNIMPLEMENTED(FATAL);
+  return nullptr;
+}
+
+LIR* Arm64Mir2Lir::OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type) {
+  UNIMPLEMENTED(FATAL);
+  return nullptr;
+}
+
+LIR* Arm64Mir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src) {
+  LOG(FATAL) << "Unexpected use of OpCondRegReg for Arm";
+  return NULL;
+}
+
+LIR* Arm64Mir2Lir::OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1,
+                                  RegStorage r_src2, int shift) {
+  ArmOpcode opcode = kThumbBkpt;
+  bool thumb_form = (shift == 0) && r_dest.Low8() && r_src1.Low8() && r_src2.Low8();
+  switch (op) {
+    case kOpAdd:
+      opcode = (thumb_form) ? kThumbAddRRR : kThumb2AddRRR;
+      break;
+    case kOpSub:
+      opcode = (thumb_form) ? kThumbSubRRR : kThumb2SubRRR;
+      break;
+    case kOpRsub:
+      opcode = kThumb2RsubRRR;
+      break;
+    case kOpAdc:
+      opcode = kThumb2AdcRRR;
+      break;
+    case kOpAnd:
+      opcode = kThumb2AndRRR;
+      break;
+    case kOpBic:
+      opcode = kThumb2BicRRR;
+      break;
+    case kOpXor:
+      opcode = kThumb2EorRRR;
+      break;
+    case kOpMul:
+      DCHECK_EQ(shift, 0);
+      opcode = kThumb2MulRRR;
+      break;
+    case kOpDiv:
+      DCHECK_EQ(shift, 0);
+      opcode = kThumb2SdivRRR;
+      break;
+    case kOpOr:
+      opcode = kThumb2OrrRRR;
+      break;
+    case kOpSbc:
+      opcode = kThumb2SbcRRR;
+      break;
+    case kOpLsl:
+      DCHECK_EQ(shift, 0);
+      opcode = kThumb2LslRRR;
+      break;
+    case kOpLsr:
+      DCHECK_EQ(shift, 0);
+      opcode = kThumb2LsrRRR;
+      break;
+    case kOpAsr:
+      DCHECK_EQ(shift, 0);
+      opcode = kThumb2AsrRRR;
+      break;
+    case kOpRor:
+      DCHECK_EQ(shift, 0);
+      opcode = kThumb2RorRRR;
+      break;
+    default:
+      LOG(FATAL) << "Bad opcode: " << op;
+      break;
+  }
+  DCHECK(!IsPseudoLirOp(opcode));
+  if (EncodingMap[opcode].flags & IS_QUAD_OP) {
+    return NewLIR4(opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), shift);
+  } else {
+    DCHECK(EncodingMap[opcode].flags & IS_TERTIARY_OP);
+    return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg());
+  }
+}
+
+LIR* Arm64Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) {
+  return OpRegRegRegShift(op, r_dest, r_src1, r_src2, 0);
+}
+
+LIR* Arm64Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value) {
+  LIR* res;
+  bool neg = (value < 0);
+  int32_t abs_value = (neg) ? -value : value;
+  ArmOpcode opcode = kThumbBkpt;
+  ArmOpcode alt_opcode = kThumbBkpt;
+  bool all_low_regs = r_dest.Low8() && r_src1.Low8();
+  int32_t mod_imm = ModifiedImmediate(value);
+
+  switch (op) {
+    case kOpLsl:
+      if (all_low_regs)
+        return NewLIR3(kThumbLslRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
+      else
+        return NewLIR3(kThumb2LslRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
+    case kOpLsr:
+      if (all_low_regs)
+        return NewLIR3(kThumbLsrRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
+      else
+        return NewLIR3(kThumb2LsrRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
+    case kOpAsr:
+      if (all_low_regs)
+        return NewLIR3(kThumbAsrRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
+      else
+        return NewLIR3(kThumb2AsrRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
+    case kOpRor:
+      return NewLIR3(kThumb2RorRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
+    case kOpAdd:
+      if (r_dest.Low8() && (r_src1 == rs_r13sp) && (value <= 1020) && ((value & 0x3) == 0)) {
+        return NewLIR3(kThumbAddSpRel, r_dest.GetReg(), r_src1.GetReg(), value >> 2);
+      } else if (r_dest.Low8() && (r_src1 == rs_r15pc) &&
+          (value <= 1020) && ((value & 0x3) == 0)) {
+        return NewLIR3(kThumbAddPcRel, r_dest.GetReg(), r_src1.GetReg(), value >> 2);
+      }
+      // Note: intentional fallthrough
+    case kOpSub:
+      if (all_low_regs && ((abs_value & 0x7) == abs_value)) {
+        if (op == kOpAdd)
+          opcode = (neg) ? kThumbSubRRI3 : kThumbAddRRI3;
+        else
+          opcode = (neg) ? kThumbAddRRI3 : kThumbSubRRI3;
+        return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), abs_value);
+      }
+      if (mod_imm < 0) {
+        mod_imm = ModifiedImmediate(-value);
+        if (mod_imm >= 0) {
+          op = (op == kOpAdd) ? kOpSub : kOpAdd;
+        }
+      }
+      if (mod_imm < 0 && (abs_value & 0x3ff) == abs_value) {
+        // This is deliberately used only if modified immediate encoding is inadequate since
+        // we sometimes actually use the flags for small values but not necessarily low regs.
+        if (op == kOpAdd)
+          opcode = (neg) ? kThumb2SubRRI12 : kThumb2AddRRI12;
+        else
+          opcode = (neg) ? kThumb2AddRRI12 : kThumb2SubRRI12;
+        return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), abs_value);
+      }
+      if (op == kOpSub) {
+        opcode = kThumb2SubRRI8M;
+        alt_opcode = kThumb2SubRRR;
+      } else {
+        opcode = kThumb2AddRRI8M;
+        alt_opcode = kThumb2AddRRR;
+      }
+      break;
+    case kOpRsub:
+      opcode = kThumb2RsubRRI8M;
+      alt_opcode = kThumb2RsubRRR;
+      break;
+    case kOpAdc:
+      opcode = kThumb2AdcRRI8M;
+      alt_opcode = kThumb2AdcRRR;
+      break;
+    case kOpSbc:
+      opcode = kThumb2SbcRRI8M;
+      alt_opcode = kThumb2SbcRRR;
+      break;
+    case kOpOr:
+      opcode = kThumb2OrrRRI8M;
+      alt_opcode = kThumb2OrrRRR;
+      break;
+    case kOpAnd:
+      if (mod_imm < 0) {
+        mod_imm = ModifiedImmediate(~value);
+        if (mod_imm >= 0) {
+          return NewLIR3(kThumb2BicRRI8M, r_dest.GetReg(), r_src1.GetReg(), mod_imm);
+        }
+      }
+      opcode = kThumb2AndRRI8M;
+      alt_opcode = kThumb2AndRRR;
+      break;
+    case kOpXor:
+      opcode = kThumb2EorRRI8M;
+      alt_opcode = kThumb2EorRRR;
+      break;
+    case kOpMul:
+      // TUNING: power of 2, shift & add
+      mod_imm = -1;
+      alt_opcode = kThumb2MulRRR;
+      break;
+    case kOpCmp: {
+      LIR* res;
+      if (mod_imm >= 0) {
+        res = NewLIR2(kThumb2CmpRI8M, r_src1.GetReg(), mod_imm);
+      } else {
+        mod_imm = ModifiedImmediate(-value);
+        if (mod_imm >= 0) {
+          res = NewLIR2(kThumb2CmnRI8M, r_src1.GetReg(), mod_imm);
+        } else {
+          RegStorage r_tmp = AllocTemp();
+          res = LoadConstant(r_tmp, value);
+          OpRegReg(kOpCmp, r_src1, r_tmp);
+          FreeTemp(r_tmp);
+        }
+      }
+      return res;
+    }
+    default:
+      LOG(FATAL) << "Bad opcode: " << op;
+  }
+
+  if (mod_imm >= 0) {
+    return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), mod_imm);
+  } else {
+    RegStorage r_scratch = AllocTemp();
+    LoadConstant(r_scratch, value);
+    if (EncodingMap[alt_opcode].flags & IS_QUAD_OP)
+      res = NewLIR4(alt_opcode, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), 0);
+    else
+      res = NewLIR3(alt_opcode, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg());
+    FreeTemp(r_scratch);
+    return res;
+  }
+}
+
+/* Handle Thumb-only variants here - otherwise punt to OpRegRegImm */
+LIR* Arm64Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) {
+  bool neg = (value < 0);
+  int32_t abs_value = (neg) ? -value : value;
+  bool short_form = (((abs_value & 0xff) == abs_value) && r_dest_src1.Low8());
+  ArmOpcode opcode = kThumbBkpt;
+  switch (op) {
+    case kOpAdd:
+      if (!neg && (r_dest_src1 == rs_r13sp) && (value <= 508)) { /* sp */
+        DCHECK_EQ((value & 0x3), 0);
+        return NewLIR1(kThumbAddSpI7, value >> 2);
+      } else if (short_form) {
+        opcode = (neg) ? kThumbSubRI8 : kThumbAddRI8;
+      }
+      break;
+    case kOpSub:
+      if (!neg && (r_dest_src1 == rs_r13sp) && (value <= 508)) { /* sp */
+        DCHECK_EQ((value & 0x3), 0);
+        return NewLIR1(kThumbSubSpI7, value >> 2);
+      } else if (short_form) {
+        opcode = (neg) ? kThumbAddRI8 : kThumbSubRI8;
+      }
+      break;
+    case kOpCmp:
+      if (!neg && short_form) {
+        opcode = kThumbCmpRI8;
+      } else {
+        short_form = false;
+      }
+      break;
+    default:
+      /* Punt to OpRegRegImm - if bad case catch it there */
+      short_form = false;
+      break;
+  }
+  if (short_form) {
+    return NewLIR2(opcode, r_dest_src1.GetReg(), abs_value);
+  } else {
+    return OpRegRegImm(op, r_dest_src1, r_dest_src1, value);
+  }
+}
+
+LIR* Arm64Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) {
+  LIR* res = NULL;
+  int32_t val_lo = Low32Bits(value);
+  int32_t val_hi = High32Bits(value);
+  if (r_dest.IsFloat()) {
+    DCHECK(!r_dest.IsPair());
+    if ((val_lo == 0) && (val_hi == 0)) {
+      // TODO: we need better info about the target CPU.  a vector exclusive or
+      //       would probably be better here if we could rely on its existance.
+      // Load an immediate +2.0 (which encodes to 0)
+      NewLIR2(kThumb2Vmovd_IMM8, r_dest.GetReg(), 0);
+      // +0.0 = +2.0 - +2.0
+      res = NewLIR3(kThumb2Vsubd, r_dest.GetReg(), r_dest.GetReg(), r_dest.GetReg());
+    } else {
+      int encoded_imm = EncodeImmDouble(value);
+      if (encoded_imm >= 0) {
+        res = NewLIR2(kThumb2Vmovd_IMM8, r_dest.GetReg(), encoded_imm);
+      }
+    }
+  } else {
+    // NOTE: Arm32 assumption here.
+    DCHECK(r_dest.IsPair());
+    if ((InexpensiveConstantInt(val_lo) && (InexpensiveConstantInt(val_hi)))) {
+      res = LoadConstantNoClobber(r_dest.GetLow(), val_lo);
+      LoadConstantNoClobber(r_dest.GetHigh(), val_hi);
+    }
+  }
+  if (res == NULL) {
+    // No short form - load from the literal pool.
+    LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
+    if (data_target == NULL) {
+      data_target = AddWideData(&literal_list_, val_lo, val_hi);
+    }
+    if (r_dest.IsFloat()) {
+      res = RawLIR(current_dalvik_offset_, kThumb2Vldrd,
+                   r_dest.GetReg(), rs_r15pc.GetReg(), 0, 0, 0, data_target);
+    } else {
+      DCHECK(r_dest.IsPair());
+      res = RawLIR(current_dalvik_offset_, kThumb2LdrdPcRel8,
+                   r_dest.GetLowReg(), r_dest.GetHighReg(), rs_r15pc.GetReg(), 0, 0, data_target);
+    }
+    SetMemRefType(res, true, kLiteral);
+    AppendLIR(res);
+  }
+  return res;
+}
+
+int Arm64Mir2Lir::EncodeShift(int code, int amount) {
+  return ((amount & 0x1f) << 2) | code;
+}
+
+LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest,
+                                 int scale, OpSize size) {
+  bool all_low_regs = r_base.Low8() && r_index.Low8() && r_dest.Low8();
+  LIR* load;
+  ArmOpcode opcode = kThumbBkpt;
+  bool thumb_form = (all_low_regs && (scale == 0));
+  RegStorage reg_ptr;
+
+  if (r_dest.IsFloat()) {
+    if (r_dest.IsSingle()) {
+      DCHECK((size == k32) || (size == kSingle) || (size == kReference));
+      opcode = kThumb2Vldrs;
+      size = kSingle;
+    } else {
+      DCHECK(r_dest.IsDouble());
+      DCHECK((size == k64) || (size == kDouble));
+      opcode = kThumb2Vldrd;
+      size = kDouble;
+    }
+  } else {
+    if (size == kSingle)
+      size = k32;
+  }
+
+  switch (size) {
+    case kDouble:  // fall-through
+    // Intentional fall-though.
+    case kSingle:
+      reg_ptr = AllocTemp();
+      if (scale) {
+        NewLIR4(kThumb2AddRRR, reg_ptr.GetReg(), r_base.GetReg(), r_index.GetReg(),
+                EncodeShift(kArmLsl, scale));
+      } else {
+        OpRegRegReg(kOpAdd, reg_ptr, r_base, r_index);
+      }
+      load = NewLIR3(opcode, r_dest.GetReg(), reg_ptr.GetReg(), 0);
+      FreeTemp(reg_ptr);
+      return load;
+    case k32:
+    // Intentional fall-though.
+    case kReference:
+      opcode = (thumb_form) ? kThumbLdrRRR : kThumb2LdrRRR;
+      break;
+    case kUnsignedHalf:
+      opcode = (thumb_form) ? kThumbLdrhRRR : kThumb2LdrhRRR;
+      break;
+    case kSignedHalf:
+      opcode = (thumb_form) ? kThumbLdrshRRR : kThumb2LdrshRRR;
+      break;
+    case kUnsignedByte:
+      opcode = (thumb_form) ? kThumbLdrbRRR : kThumb2LdrbRRR;
+      break;
+    case kSignedByte:
+      opcode = (thumb_form) ? kThumbLdrsbRRR : kThumb2LdrsbRRR;
+      break;
+    default:
+      LOG(FATAL) << "Bad size: " << size;
+  }
+  if (thumb_form)
+    load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg());
+  else
+    load = NewLIR4(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(), scale);
+
+  return load;
+}
+
+LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
+                                  int scale, OpSize size) {
+  bool all_low_regs = r_base.Low8() && r_index.Low8() && r_src.Low8();
+  LIR* store = NULL;
+  ArmOpcode opcode = kThumbBkpt;
+  bool thumb_form = (all_low_regs && (scale == 0));
+  RegStorage reg_ptr;
+
+  if (r_src.IsFloat()) {
+    if (r_src.IsSingle()) {
+      DCHECK((size == k32) || (size == kSingle) || (size == kReference));
+      opcode = kThumb2Vstrs;
+      size = kSingle;
+    } else {
+      DCHECK(r_src.IsDouble());
+      DCHECK((size == k64) || (size == kDouble));
+      DCHECK_EQ((r_src.GetReg() & 0x1), 0);
+      opcode = kThumb2Vstrd;
+      size = kDouble;
+    }
+  } else {
+    if (size == kSingle)
+      size = k32;
+  }
+
+  switch (size) {
+    case kDouble:  // fall-through
+    // Intentional fall-though.
+    case kSingle:
+      reg_ptr = AllocTemp();
+      if (scale) {
+        NewLIR4(kThumb2AddRRR, reg_ptr.GetReg(), r_base.GetReg(), r_index.GetReg(),
+                EncodeShift(kArmLsl, scale));
+      } else {
+        OpRegRegReg(kOpAdd, reg_ptr, r_base, r_index);
+      }
+      store = NewLIR3(opcode, r_src.GetReg(), reg_ptr.GetReg(), 0);
+      FreeTemp(reg_ptr);
+      return store;
+    case k32:
+    // Intentional fall-though.
+    case kReference:
+      opcode = (thumb_form) ? kThumbStrRRR : kThumb2StrRRR;
+      break;
+    case kUnsignedHalf:
+    // Intentional fall-though.
+    case kSignedHalf:
+      opcode = (thumb_form) ? kThumbStrhRRR : kThumb2StrhRRR;
+      break;
+    case kUnsignedByte:
+    // Intentional fall-though.
+    case kSignedByte:
+      opcode = (thumb_form) ? kThumbStrbRRR : kThumb2StrbRRR;
+      break;
+    default:
+      LOG(FATAL) << "Bad size: " << size;
+  }
+  if (thumb_form)
+    store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg());
+  else
+    store = NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(), scale);
+
+  return store;
+}
+
+/*
+ * Load value from base + displacement.  Optionally perform null check
+ * on base (which must have an associated s_reg and MIR).  If not
+ * performing null check, incoming MIR can be null.
+ */
+LIR* Arm64Mir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest,
+                                  OpSize size, int s_reg) {
+  LIR* load = NULL;
+  ArmOpcode opcode = kThumbBkpt;
+  bool short_form = false;
+  bool thumb2Form = (displacement < 4092 && displacement >= 0);
+  bool all_low = r_dest.Is32Bit() && r_base.Low8() && r_dest.Low8();
+  int encoded_disp = displacement;
+  bool already_generated = false;
+  bool null_pointer_safepoint = false;
+  switch (size) {
+    case kDouble:
+    // Intentional fall-though.
+    case k64:
+      if (r_dest.IsFloat()) {
+        DCHECK(!r_dest.IsPair());
+        opcode = kThumb2Vldrd;
+        if (displacement <= 1020) {
+          short_form = true;
+          encoded_disp >>= 2;
+        }
+      } else {
+        if (displacement <= 1020) {
+          load = NewLIR4(kThumb2LdrdI8, r_dest.GetLowReg(), r_dest.GetHighReg(), r_base.GetReg(),
+                         displacement >> 2);
+        } else {
+          load = LoadBaseDispBody(r_base, displacement, r_dest.GetLow(), k32, s_reg);
+          null_pointer_safepoint = true;
+          LoadBaseDispBody(r_base, displacement + 4, r_dest.GetHigh(), k32, INVALID_SREG);
+        }
+        already_generated = true;
+      }
+      break;
+    case kSingle:
+    // Intentional fall-though.
+    case k32:
+    // Intentional fall-though.
+    case kReference:
+      if (r_dest.IsFloat()) {
+        opcode = kThumb2Vldrs;
+        if (displacement <= 1020) {
+          short_form = true;
+          encoded_disp >>= 2;
+        }
+        break;
+      }
+      if (r_dest.Low8() && (r_base == rs_rARM_PC) && (displacement <= 1020) &&
+          (displacement >= 0)) {
+        short_form = true;
+        encoded_disp >>= 2;
+        opcode = kThumbLdrPcRel;
+      } else if (r_dest.Low8() && (r_base == rs_rARM_SP) && (displacement <= 1020) &&
+                 (displacement >= 0)) {
+        short_form = true;
+        encoded_disp >>= 2;
+        opcode = kThumbLdrSpRel;
+      } else if (all_low && displacement < 128 && displacement >= 0) {
+        DCHECK_EQ((displacement & 0x3), 0);
+        short_form = true;
+        encoded_disp >>= 2;
+        opcode = kThumbLdrRRI5;
+      } else if (thumb2Form) {
+        short_form = true;
+        opcode = kThumb2LdrRRI12;
+      }
+      break;
+    case kUnsignedHalf:
+      if (all_low && displacement < 64 && displacement >= 0) {
+        DCHECK_EQ((displacement & 0x1), 0);
+        short_form = true;
+        encoded_disp >>= 1;
+        opcode = kThumbLdrhRRI5;
+      } else if (displacement < 4092 && displacement >= 0) {
+        short_form = true;
+        opcode = kThumb2LdrhRRI12;
+      }
+      break;
+    case kSignedHalf:
+      if (thumb2Form) {
+        short_form = true;
+        opcode = kThumb2LdrshRRI12;
+      }
+      break;
+    case kUnsignedByte:
+      if (all_low && displacement < 32 && displacement >= 0) {
+        short_form = true;
+        opcode = kThumbLdrbRRI5;
+      } else if (thumb2Form) {
+        short_form = true;
+        opcode = kThumb2LdrbRRI12;
+      }
+      break;
+    case kSignedByte:
+      if (thumb2Form) {
+        short_form = true;
+        opcode = kThumb2LdrsbRRI12;
+      }
+      break;
+    default:
+      LOG(FATAL) << "Bad size: " << size;
+  }
+
+  if (!already_generated) {
+    if (short_form) {
+      load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), encoded_disp);
+    } else {
+      RegStorage reg_offset = AllocTemp();
+      LoadConstant(reg_offset, encoded_disp);
+      if (r_dest.IsFloat()) {
+        // No index ops - must use a long sequence.  Turn the offset into a direct pointer.
+        OpRegReg(kOpAdd, reg_offset, r_base);
+        load = LoadBaseDispBody(reg_offset, 0, r_dest, size, s_reg);
+      } else {
+        load = LoadBaseIndexed(r_base, reg_offset, r_dest, 0, size);
+      }
+      FreeTemp(reg_offset);
+    }
+  }
+
+  // TODO: in future may need to differentiate Dalvik accesses w/ spills
+  if (r_base == rs_rARM_SP) {
+    AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit());
+  } else {
+     // We might need to generate a safepoint if we have two store instructions (wide or double).
+     if (!Runtime::Current()->ExplicitNullChecks() && null_pointer_safepoint) {
+       MarkSafepointPC(load);
+     }
+  }
+  return load;
+}
+
+LIR* Arm64Mir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size,
+                              int s_reg) {
+  DCHECK(!((size == k64) || (size == kDouble)));
+  // TODO: base this on target.
+  if (size == kWord) {
+    size = k32;
+  }
+  return LoadBaseDispBody(r_base, displacement, r_dest, size, s_reg);
+}
+
+LIR* Arm64Mir2Lir::LoadBaseDispWide(RegStorage r_base, int displacement, RegStorage r_dest,
+                                  int s_reg) {
+  return LoadBaseDispBody(r_base, displacement, r_dest, k64, s_reg);
+}
+
+
+LIR* Arm64Mir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src,
+                                   OpSize size) {
+  LIR* store = NULL;
+  ArmOpcode opcode = kThumbBkpt;
+  bool short_form = false;
+  bool thumb2Form = (displacement < 4092 && displacement >= 0);
+  bool all_low = r_src.Is32Bit() && r_base.Low8() && r_src.Low8();
+  int encoded_disp = displacement;
+  bool already_generated = false;
+  bool null_pointer_safepoint = false;
+  switch (size) {
+    case k64:
+    case kDouble:
+      if (!r_src.IsFloat()) {
+        if (displacement <= 1020) {
+          store = NewLIR4(kThumb2StrdI8, r_src.GetLowReg(), r_src.GetHighReg(), r_base.GetReg(),
+                          displacement >> 2);
+        } else {
+          store = StoreBaseDispBody(r_base, displacement, r_src.GetLow(), k32);
+          null_pointer_safepoint = true;
+          StoreBaseDispBody(r_base, displacement + 4, r_src.GetHigh(), k32);
+        }
+        already_generated = true;
+      } else {
+        DCHECK(!r_src.IsPair());
+        opcode = kThumb2Vstrd;
+        if (displacement <= 1020) {
+          short_form = true;
+          encoded_disp >>= 2;
+        }
+      }
+      break;
+    case kSingle:
+    // Intentional fall-through.
+    case k32:
+    // Intentional fall-through.
+    case kReference:
+      if (r_src.IsFloat()) {
+        DCHECK(r_src.IsSingle());
+        opcode = kThumb2Vstrs;
+        if (displacement <= 1020) {
+          short_form = true;
+          encoded_disp >>= 2;
+        }
+        break;
+      }
+      if (r_src.Low8() && (r_base == rs_r13sp) && (displacement <= 1020) && (displacement >= 0)) {
+        short_form = true;
+        encoded_disp >>= 2;
+        opcode = kThumbStrSpRel;
+      } else if (all_low && displacement < 128 && displacement >= 0) {
+        DCHECK_EQ((displacement & 0x3), 0);
+        short_form = true;
+        encoded_disp >>= 2;
+        opcode = kThumbStrRRI5;
+      } else if (thumb2Form) {
+        short_form = true;
+        opcode = kThumb2StrRRI12;
+      }
+      break;
+    case kUnsignedHalf:
+    case kSignedHalf:
+      if (all_low && displacement < 64 && displacement >= 0) {
+        DCHECK_EQ((displacement & 0x1), 0);
+        short_form = true;
+        encoded_disp >>= 1;
+        opcode = kThumbStrhRRI5;
+      } else if (thumb2Form) {
+        short_form = true;
+        opcode = kThumb2StrhRRI12;
+      }
+      break;
+    case kUnsignedByte:
+    case kSignedByte:
+      if (all_low && displacement < 32 && displacement >= 0) {
+        short_form = true;
+        opcode = kThumbStrbRRI5;
+      } else if (thumb2Form) {
+        short_form = true;
+        opcode = kThumb2StrbRRI12;
+      }
+      break;
+    default:
+      LOG(FATAL) << "Bad size: " << size;
+  }
+  if (!already_generated) {
+    if (short_form) {
+      store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), encoded_disp);
+    } else {
+      RegStorage r_scratch = AllocTemp();
+      LoadConstant(r_scratch, encoded_disp);
+      if (r_src.IsFloat()) {
+        // No index ops - must use a long sequence.  Turn the offset into a direct pointer.
+        OpRegReg(kOpAdd, r_scratch, r_base);
+        store = StoreBaseDispBody(r_scratch, 0, r_src, size);
+      } else {
+        store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size);
+      }
+      FreeTemp(r_scratch);
+    }
+  }
+
+  // TODO: In future, may need to differentiate Dalvik & spill accesses
+  if (r_base == rs_rARM_SP) {
+    AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit());
+  } else {
+    // We might need to generate a safepoint if we have two store instructions (wide or double).
+    if (!Runtime::Current()->ExplicitNullChecks() && null_pointer_safepoint) {
+      MarkSafepointPC(store);
+    }
+  }
+  return store;
+}
+
+LIR* Arm64Mir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
+                               OpSize size) {
+  // TODO: base this on target.
+  if (size == kWord) {
+    size = k32;
+  }
+  DCHECK(!((size == k64) || (size == kDouble)));
+  return StoreBaseDispBody(r_base, displacement, r_src, size);
+}
+
+LIR* Arm64Mir2Lir::StoreBaseDispWide(RegStorage r_base, int displacement, RegStorage r_src) {
+  return StoreBaseDispBody(r_base, displacement, r_src, k64);
+}
+
+LIR* Arm64Mir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) {
+  int opcode;
+  DCHECK_EQ(r_dest.IsDouble(), r_src.IsDouble());
+  if (r_dest.IsDouble()) {
+    opcode = kThumb2Vmovd;
+  } else {
+    if (r_dest.IsSingle()) {
+      opcode = r_src.IsSingle() ? kThumb2Vmovs : kThumb2Fmsr;
+    } else {
+      DCHECK(r_src.IsSingle());
+      opcode = kThumb2Fmrs;
+    }
+  }
+  LIR* res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg());
+  if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
+    res->flags.is_nop = true;
+  }
+  return res;
+}
+
+LIR* Arm64Mir2Lir::OpThreadMem(OpKind op, ThreadOffset<4> thread_offset) {
+  LOG(FATAL) << "Unexpected use of OpThreadMem for Arm";
+  return NULL;
+}
+
+LIR* Arm64Mir2Lir::OpMem(OpKind op, RegStorage r_base, int disp) {
+  LOG(FATAL) << "Unexpected use of OpMem for Arm";
+  return NULL;
+}
+
+LIR* Arm64Mir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
+                                      int displacement, RegStorage r_src, OpSize size, int s_reg) {
+  LOG(FATAL) << "Unexpected use of StoreBaseIndexedDisp for Arm";
+  return NULL;
+}
+
+LIR* Arm64Mir2Lir::OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset) {
+  LOG(FATAL) << "Unexpected use of OpRegMem for Arm";
+  return NULL;
+}
+
+LIR* Arm64Mir2Lir::LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
+                                     int displacement, RegStorage r_dest, OpSize size, int s_reg) {
+  LOG(FATAL) << "Unexpected use of LoadBaseIndexedDisp for Arm";
+  return NULL;
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 9f84e09..fbf8a0c 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -558,7 +558,7 @@
 static int AssignLiteralPointerOffsetCommon(LIR* lir, CodeOffset offset,
                                             unsigned int element_size) {
   // Align to natural pointer size.
-  offset = (offset + (element_size - 1)) & ~(element_size - 1);
+  offset = RoundUp(offset, element_size);
   for (; lir != NULL; lir = lir->next) {
     lir->offset = offset;
     offset += element_size;
@@ -758,7 +758,7 @@
     tab_rec->offset = offset;
     offset += tab_rec->size;
     // word align
-    offset = (offset + 3) & ~3;
+    offset = RoundUp(offset, 4);
     }
   return offset;
 }
@@ -1027,7 +1027,7 @@
 
   UniquePtr<std::vector<uint8_t> > cfi_info(ReturnCallFrameInformation());
   CompiledMethod* result =
-      new CompiledMethod(*cu_->compiler_driver, cu_->instruction_set, code_buffer_, frame_size_,
+      new CompiledMethod(cu_->compiler_driver, cu_->instruction_set, code_buffer_, frame_size_,
                          core_spill_mask_, fp_spill_mask_, encoded_mapping_table_,
                          vmap_encoder.GetData(), native_gc_map_, cfi_info.get());
   return result;
@@ -1049,14 +1049,13 @@
 
 int Mir2Lir::ComputeFrameSize() {
   /* Figure out the frame size */
-  static const uint32_t kAlignMask = kStackAlignment - 1;
   uint32_t size = num_core_spills_ * GetBytesPerGprSpillLocation(cu_->instruction_set)
                   + num_fp_spills_ * GetBytesPerFprSpillLocation(cu_->instruction_set)
                   + sizeof(uint32_t)  // Filler.
                   + (cu_->num_regs + cu_->num_outs) * sizeof(uint32_t)
                   + GetNumBytesForCompilerTempSpillRegion();
   /* Align and set */
-  return (size + kAlignMask) & ~(kAlignMask);
+  return RoundUp(size, kStackAlignment);
 }
 
 /*
diff --git a/compiler/dex/quick/mips/assemble_mips.cc b/compiler/dex/quick/mips/assemble_mips.cc
index baae319..b26ab57 100644
--- a/compiler/dex/quick/mips/assemble_mips.cc
+++ b/compiler/dex/quick/mips/assemble_mips.cc
@@ -748,7 +748,7 @@
   int offset = AssignInsnOffsets();
 
   /* Const values have to be word aligned */
-  offset = (offset + 3) & ~3;
+  offset = RoundUp(offset, 4);
 
   /* Set up offsets for literals */
   data_offset_ = offset;
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 0fc5c6e..7436e39 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -1483,7 +1483,7 @@
   int offset = AssignInsnOffsets();
 
   /* Const values have to be word aligned */
-  offset = (offset + 3) & ~3;
+  offset = RoundUp(offset, 4);
 
   /* Set up offsets for literals */
   data_offset_ = offset;
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 4446f43..b747102 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -325,49 +325,60 @@
   int32_t val_lo = Low32Bits(val);
   int32_t val_hi = High32Bits(val);
   LIR* taken = &block_label_list_[bb->taken];
-  LIR* not_taken = &block_label_list_[bb->fall_through];
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+  bool is_equality_test = ccode == kCondEq || ccode == kCondNe;
+  if (is_equality_test && val != 0) {
+    rl_src1 = ForceTempWide(rl_src1);
+  }
   RegStorage low_reg = rl_src1.reg.GetLow();
   RegStorage high_reg = rl_src1.reg.GetHigh();
 
-  if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) {
-    RegStorage t_reg = AllocTemp();
-    OpRegRegReg(kOpOr, t_reg, low_reg, high_reg);
-    FreeTemp(t_reg);
-    OpCondBranch(ccode, taken);
-    return;
+  if (is_equality_test) {
+    // We can simpolify of comparing for ==, != to 0.
+    if (val == 0) {
+      if (IsTemp(low_reg)) {
+        OpRegReg(kOpOr, low_reg, high_reg);
+        // We have now changed it; ignore the old values.
+        Clobber(rl_src1.reg);
+      } else {
+        RegStorage t_reg = AllocTemp();
+        OpRegRegReg(kOpOr, t_reg, low_reg, high_reg);
+        FreeTemp(t_reg);
+      }
+      OpCondBranch(ccode, taken);
+      return;
+    }
+
+    // Need to compute the actual value for ==, !=.
+    OpRegImm(kOpSub, low_reg, val_lo);
+    NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi);
+    OpRegReg(kOpOr, high_reg, low_reg);
+    Clobber(rl_src1.reg);
+  } else if (ccode == kCondLe || ccode == kCondGt) {
+    // Swap operands and condition code to prevent use of zero flag.
+    RegStorage tmp = AllocTypedTempWide(false, kCoreReg);
+    LoadConstantWide(tmp, val);
+    OpRegReg(kOpSub, tmp.GetLow(), low_reg);
+    OpRegReg(kOpSbc, tmp.GetHigh(), high_reg);
+    ccode = (ccode == kCondLe) ? kCondGe : kCondLt;
+    FreeTemp(tmp);
+  } else {
+    // We can use a compare for the low word to set CF.
+    OpRegImm(kOpCmp, low_reg, val_lo);
+    if (IsTemp(high_reg)) {
+      NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi);
+      // We have now changed it; ignore the old values.
+      Clobber(rl_src1.reg);
+    } else {
+      // mov temp_reg, high_reg; sbb temp_reg, high_constant
+      RegStorage t_reg = AllocTemp();
+      OpRegCopy(t_reg, high_reg);
+      NewLIR2(kX86Sbb32RI, t_reg.GetReg(), val_hi);
+      FreeTemp(t_reg);
+    }
   }
 
-  OpRegImm(kOpCmp, high_reg, val_hi);
-  switch (ccode) {
-    case kCondEq:
-    case kCondNe:
-      OpCondBranch(kCondNe, (ccode == kCondEq) ? not_taken : taken);
-      break;
-    case kCondLt:
-      OpCondBranch(kCondLt, taken);
-      OpCondBranch(kCondGt, not_taken);
-      ccode = kCondUlt;
-      break;
-    case kCondLe:
-      OpCondBranch(kCondLt, taken);
-      OpCondBranch(kCondGt, not_taken);
-      ccode = kCondLs;
-      break;
-    case kCondGt:
-      OpCondBranch(kCondGt, taken);
-      OpCondBranch(kCondLt, not_taken);
-      ccode = kCondHi;
-      break;
-    case kCondGe:
-      OpCondBranch(kCondGt, taken);
-      OpCondBranch(kCondLt, not_taken);
-      ccode = kCondUge;
-      break;
-    default:
-      LOG(FATAL) << "Unexpected ccode: " << ccode;
-  }
-  OpCmpImmBranch(ccode, low_reg, val_lo, taken);
+  OpCondBranch(ccode, taken);
 }
 
 void X86Mir2Lir::CalculateMagicAndShift(int divisor, int& magic, int& shift) {
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index e5decc5..6817f14 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -336,7 +336,7 @@
     : profile_ok_(false), compiler_options_(compiler_options),
       verification_results_(verification_results),
       method_inliner_map_(method_inliner_map),
-      compiler_(Compiler::Create(compiler_kind)),
+      compiler_(Compiler::Create(this, compiler_kind)),
       instruction_set_(instruction_set),
       instruction_set_features_(instruction_set_features),
       freezing_constructor_lock_("freezing constructor lock"),
@@ -374,7 +374,7 @@
 
   dex_to_dex_compiler_ = reinterpret_cast<DexToDexCompilerFn>(ArtCompileDEX);
 
-  compiler_->Init(*this);
+  compiler_->Init();
 
   CHECK(!Runtime::Current()->IsStarted());
   if (!image_) {
@@ -433,7 +433,7 @@
     STLDeleteElements(&classes_to_patch_);
   }
   CHECK_PTHREAD_CALL(pthread_key_delete, (tls_key_), "delete tls key");
-  compiler_->UnInit(*this);
+  compiler_->UnInit();
 }
 
 CompilerTls* CompilerDriver::GetTls() {
@@ -1874,7 +1874,7 @@
         (instruction_set_ == kX86_64 || instruction_set_ == kArm64)) {
       // Leaving this empty will trigger the generic JNI version
     } else {
-      compiled_method = compiler_->JniCompile(*this, access_flags, method_idx, dex_file);
+      compiled_method = compiler_->JniCompile(access_flags, method_idx, dex_file);
       CHECK(compiled_method != NULL);
     }
   } else if ((access_flags & kAccAbstract) != 0) {
@@ -1883,9 +1883,8 @@
     bool compile = verification_results_->IsCandidateForCompilation(method_ref, access_flags);
     if (compile) {
       // NOTE: if compiler declines to compile this method, it will return NULL.
-      compiled_method = compiler_->Compile(
-          *this, code_item, access_flags, invoke_type, class_def_idx,
-          method_idx, class_loader, dex_file);
+      compiled_method = compiler_->Compile(code_item, access_flags, invoke_type, class_def_idx,
+                                           method_idx, class_loader, dex_file);
     }
     if (compiled_method == nullptr && dex_to_dex_compilation_level != kDontDexToDexCompile) {
       // TODO: add a command-line option to disable DEX-to-DEX compilation ?
@@ -1983,7 +1982,7 @@
                               OatWriter* oat_writer,
                               art::File* file)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  return compiler_->WriteElf(file, oat_writer, dex_files, android_root, is_host, *this);
+  return compiler_->WriteElf(file, oat_writer, dex_files, android_root, is_host);
 }
 void CompilerDriver::InstructionSetToLLVMTarget(InstructionSet instruction_set,
                                                 std::string* target_triple,
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 9f439eb..5a22170 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -52,7 +52,7 @@
 //   registers, a reference to the method object is supplied as part of this
 //   convention.
 //
-CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver& compiler,
+CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver,
                                             uint32_t access_flags, uint32_t method_idx,
                                             const DexFile& dex_file) {
   const bool is_native = (access_flags & kAccNative) != 0;
@@ -60,7 +60,7 @@
   const bool is_static = (access_flags & kAccStatic) != 0;
   const bool is_synchronized = (access_flags & kAccSynchronized) != 0;
   const char* shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx));
-  InstructionSet instruction_set = compiler.GetInstructionSet();
+  InstructionSet instruction_set = driver->GetInstructionSet();
   if (instruction_set == kThumb2) {
     instruction_set = kArm;
   }
@@ -423,7 +423,7 @@
   std::vector<uint8_t> managed_code(cs);
   MemoryRegion code(&managed_code[0], managed_code.size());
   __ FinalizeInstructions(code);
-  return new CompiledMethod(compiler,
+  return new CompiledMethod(driver,
                             instruction_set,
                             managed_code,
                             frame_size,
@@ -536,7 +536,7 @@
 
 }  // namespace art
 
-extern "C" art::CompiledMethod* ArtQuickJniCompileMethod(art::CompilerDriver& compiler,
+extern "C" art::CompiledMethod* ArtQuickJniCompileMethod(art::CompilerDriver* compiler,
                                                          uint32_t access_flags, uint32_t method_idx,
                                                          const art::DexFile& dex_file) {
   return ArtJniCompileMethodInternal(compiler, access_flags, method_idx, dex_file);
diff --git a/compiler/llvm/compiler_llvm.cc b/compiler/llvm/compiler_llvm.cc
index 2812700..df895ee 100644
--- a/compiler/llvm/compiler_llvm.cc
+++ b/compiler/llvm/compiler_llvm.cc
@@ -175,8 +175,8 @@
 }  // namespace llvm
 }  // namespace art
 
-static art::llvm::CompilerLLVM* ContextOf(art::CompilerDriver& driver) {
-  void *compiler_context = driver.GetCompilerContext();
+static art::llvm::CompilerLLVM* ContextOf(art::CompilerDriver* driver) {
+  void *compiler_context = driver->GetCompilerContext();
   CHECK(compiler_context != NULL);
   return reinterpret_cast<art::llvm::CompilerLLVM*>(compiler_context);
 }
@@ -187,20 +187,20 @@
   return reinterpret_cast<art::llvm::CompilerLLVM*>(compiler_context);
 }
 
-extern "C" void ArtInitCompilerContext(art::CompilerDriver& driver) {
-  CHECK(driver.GetCompilerContext() == NULL);
+extern "C" void ArtInitCompilerContext(art::CompilerDriver* driver) {
+  CHECK(driver->GetCompilerContext() == nullptr);
 
-  art::llvm::CompilerLLVM* compiler_llvm = new art::llvm::CompilerLLVM(&driver,
-                                                                       driver.GetInstructionSet());
+  art::llvm::CompilerLLVM* compiler_llvm = new art::llvm::CompilerLLVM(driver,
+                                                                       driver->GetInstructionSet());
 
-  driver.SetCompilerContext(compiler_llvm);
+  driver->SetCompilerContext(compiler_llvm);
 }
 
-extern "C" void ArtUnInitCompilerContext(art::CompilerDriver& driver) {
+extern "C" void ArtUnInitCompilerContext(art::CompilerDriver* driver) {
   delete ContextOf(driver);
-  driver.SetCompilerContext(NULL);
+  driver->SetCompilerContext(nullptr);
 }
-extern "C" art::CompiledMethod* ArtCompileMethod(art::CompilerDriver& driver,
+extern "C" art::CompiledMethod* ArtCompileMethod(art::CompilerDriver* driver,
                                                  const art::DexFile::CodeItem* code_item,
                                                  uint32_t access_flags,
                                                  art::InvokeType invoke_type,
@@ -213,13 +213,13 @@
 
   art::DexCompilationUnit dex_compilation_unit(
     NULL, class_loader, class_linker, dex_file, code_item,
-    class_def_idx, method_idx, access_flags, driver.GetVerifiedMethod(&dex_file, method_idx));
+    class_def_idx, method_idx, access_flags, driver->GetVerifiedMethod(&dex_file, method_idx));
   art::llvm::CompilerLLVM* compiler_llvm = ContextOf(driver);
   art::CompiledMethod* result = compiler_llvm->CompileDexMethod(&dex_compilation_unit, invoke_type);
   return result;
 }
 
-extern "C" art::CompiledMethod* ArtLLVMJniCompileMethod(art::CompilerDriver& driver,
+extern "C" art::CompiledMethod* ArtLLVMJniCompileMethod(art::CompilerDriver* driver,
                                                         uint32_t access_flags, uint32_t method_idx,
                                                         const art::DexFile& dex_file) {
   art::ClassLinker *class_linker = art::Runtime::Current()->GetClassLinker();
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 9438890..b2c3c2d 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -50,8 +50,7 @@
 };
 
 
-CompiledMethod* OptimizingCompiler::TryCompile(CompilerDriver& driver,
-                                               const DexFile::CodeItem* code_item,
+CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_item,
                                                uint32_t access_flags,
                                                InvokeType invoke_type,
                                                uint16_t class_def_idx,
@@ -60,7 +59,8 @@
                                                const DexFile& dex_file) const {
   DexCompilationUnit dex_compilation_unit(
     nullptr, class_loader, art::Runtime::Current()->GetClassLinker(), dex_file, code_item,
-    class_def_idx, method_idx, access_flags, driver.GetVerifiedMethod(&dex_file, method_idx));
+    class_def_idx, method_idx, access_flags,
+    GetCompilerDriver()->GetVerifiedMethod(&dex_file, method_idx));
 
   // For testing purposes, we put a special marker on method names that should be compiled
   // with this compiler. This makes sure we're not regressing.
@@ -77,7 +77,7 @@
     return nullptr;
   }
 
-  InstructionSet instruction_set = driver.GetInstructionSet();
+  InstructionSet instruction_set = GetCompilerDriver()->GetInstructionSet();
   // The optimizing compiler currently does not have a Thumb2 assembler.
   if (instruction_set == kThumb2) {
     instruction_set = kArm;
@@ -104,7 +104,7 @@
   graph->BuildDominatorTree();
   graph->TransformToSSA();
 
-  return new CompiledMethod(driver,
+  return new CompiledMethod(GetCompilerDriver(),
                             instruction_set,
                             allocator.GetMemory(),
                             codegen->GetFrameSize(),
diff --git a/compiler/utils/arena_allocator.h b/compiler/utils/arena_allocator.h
index 18a5bce..032eabc 100644
--- a/compiler/utils/arena_allocator.h
+++ b/compiler/utils/arena_allocator.h
@@ -23,6 +23,7 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "mem_map.h"
+#include "utils.h"
 
 namespace art {
 
@@ -155,7 +156,7 @@
     if (UNLIKELY(running_on_valgrind_)) {
       return AllocValgrind(bytes, kind);
     }
-    bytes = (bytes + 3) & ~3;
+    bytes = RoundUp(bytes, 4);
     if (UNLIKELY(ptr_ + bytes > end_)) {
       // Obtain a new block.
       ObtainNewArenaForAllocation(bytes);
diff --git a/compiler/utils/scoped_arena_allocator.cc b/compiler/utils/scoped_arena_allocator.cc
index bd78eae..b8b0e6e 100644
--- a/compiler/utils/scoped_arena_allocator.cc
+++ b/compiler/utils/scoped_arena_allocator.cc
@@ -92,7 +92,7 @@
 }
 
 void* ArenaStack::AllocValgrind(size_t bytes, ArenaAllocKind kind) {
-  size_t rounded_bytes = (bytes + kValgrindRedZoneBytes + 3) & ~3;
+  size_t rounded_bytes = RoundUp(bytes + kValgrindRedZoneBytes, 4);
   uint8_t* ptr = top_ptr_;
   if (UNLIKELY(static_cast<size_t>(top_end_ - ptr) < rounded_bytes)) {
     ptr = AllocateFromNextArena(rounded_bytes);
diff --git a/compiler/utils/scoped_arena_allocator.h b/compiler/utils/scoped_arena_allocator.h
index 28e86ec..d5b003c 100644
--- a/compiler/utils/scoped_arena_allocator.h
+++ b/compiler/utils/scoped_arena_allocator.h
@@ -67,7 +67,7 @@
     if (UNLIKELY(running_on_valgrind_)) {
       return AllocValgrind(bytes, kind);
     }
-    size_t rounded_bytes = (bytes + 3) & ~3;
+    size_t rounded_bytes = RoundUp(bytes, 4);
     uint8_t* ptr = top_ptr_;
     if (UNLIKELY(static_cast<size_t>(top_end_ - ptr) < rounded_bytes)) {
       ptr = AllocateFromNextArena(rounded_bytes);
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index eddaa0b..f81e2f9 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -34,7 +34,7 @@
 namespace art {
 
 extern "C" void art_quick_throw_null_pointer_exception();
-extern "C" void art_quick_throw_stack_overflow(void*);
+extern "C" void art_quick_throw_stack_overflow_from_signal();
 extern "C" void art_quick_implicit_suspend();
 
 // Get the size of a thumb2 instruction in bytes.
@@ -50,7 +50,7 @@
   struct ucontext *uc = (struct ucontext *)context;
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
   *out_sp = static_cast<uintptr_t>(sc->arm_sp);
-  LOG(DEBUG) << "sp: " << *out_sp;
+  VLOG(signals) << "sp: " << *out_sp;
   if (*out_sp == 0) {
     return;
   }
@@ -74,7 +74,7 @@
 
   // Need to work out the size of the instruction that caused the exception.
   uint8_t* ptr = reinterpret_cast<uint8_t*>(sc->arm_pc);
-  LOG(DEBUG) << "pc: " << std::hex << static_cast<void*>(ptr);
+  VLOG(signals) << "pc: " << std::hex << static_cast<void*>(ptr);
   uint32_t instr_size = GetInstructionSize(ptr);
 
   *out_return_pc = (sc->arm_pc + instr_size) | 1;
@@ -95,7 +95,7 @@
   uint32_t instr_size = GetInstructionSize(ptr);
   sc->arm_lr = (sc->arm_pc + instr_size) | 1;      // LR needs to point to gc map location
   sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
-  LOG(DEBUG) << "Generating null pointer exception";
+  VLOG(signals) << "Generating null pointer exception";
   return true;
 }
 
@@ -117,10 +117,10 @@
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
   uint8_t* ptr2 = reinterpret_cast<uint8_t*>(sc->arm_pc);
   uint8_t* ptr1 = ptr2 - 4;
-  LOG(DEBUG) << "checking suspend";
+  VLOG(signals) << "checking suspend";
 
   uint16_t inst2 = ptr2[0] | ptr2[1] << 8;
-  LOG(DEBUG) << "inst2: " << std::hex << inst2 << " checkinst2: " << checkinst2;
+  VLOG(signals) << "inst2: " << std::hex << inst2 << " checkinst2: " << checkinst2;
   if (inst2 != checkinst2) {
     // Second instruction is not good, not ours.
     return false;
@@ -132,7 +132,7 @@
   bool found = false;
   while (ptr1 > limit) {
     uint32_t inst1 = ((ptr1[0] | ptr1[1] << 8) << 16) | (ptr1[2] | ptr1[3] << 8);
-    LOG(DEBUG) << "inst1: " << std::hex << inst1 << " checkinst1: " << checkinst1;
+    VLOG(signals) << "inst1: " << std::hex << inst1 << " checkinst1: " << checkinst1;
     if (inst1 == checkinst1) {
       found = true;
       break;
@@ -140,7 +140,7 @@
     ptr1 -= 2;      // Min instruction size is 2 bytes.
   }
   if (found) {
-    LOG(DEBUG) << "suspend check match";
+    VLOG(signals) << "suspend check match";
     // This is a suspend check.  Arrange for the signal handler to return to
     // art_quick_implicit_suspend.  Also set LR so that after the suspend check it
     // will resume the instruction (current PC + 2).  PC points to the
@@ -148,14 +148,14 @@
 
     // NB: remember that we need to set the bottom bit of the LR register
     // to switch to thumb mode.
-    LOG(DEBUG) << "arm lr: " << std::hex << sc->arm_lr;
-    LOG(DEBUG) << "arm pc: " << std::hex << sc->arm_pc;
+    VLOG(signals) << "arm lr: " << std::hex << sc->arm_lr;
+    VLOG(signals) << "arm pc: " << std::hex << sc->arm_pc;
     sc->arm_lr = sc->arm_pc + 3;      // +2 + 1 (for thumb)
     sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_implicit_suspend);
 
     // Now remove the suspend trigger that caused this fault.
     Thread::Current()->RemoveSuspendTrigger();
-    LOG(DEBUG) << "removed suspend trigger invoking test suspend";
+    VLOG(signals) << "removed suspend trigger invoking test suspend";
     return true;
   }
   return false;
@@ -174,103 +174,60 @@
 // on the stack.
 //
 // If we determine this is a stack overflow we need to move the stack pointer
-// to the overflow region below the protected region.  Because we now have
-// a gap in the stack (skips over protected region), we need to arrange
-// for the rest of the system to be unaware of the new stack arrangement
-// and behave as if there is a fully valid stack.  We do this by placing
-// a unique address onto the stack followed by
-// the size of the gap.  The stack walker will detect this and skip over the
-// gap.
-
-// NB. We also need to be careful of stack alignment as the ARM EABI specifies that
-// stack must be 8 byte aligned when making any calls.
-
-// NB. The size of the gap is the difference between the previous frame's SP and
-// the SP at which the size word is pushed.
+// to the overflow region below the protected region.
 
 bool StackOverflowHandler::Action(int sig, siginfo_t* info, void* context) {
   struct ucontext *uc = (struct ucontext *)context;
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
-  LOG(DEBUG) << "stack overflow handler with sp at " << std::hex << &uc;
-  LOG(DEBUG) << "sigcontext: " << std::hex << sc;
+  VLOG(signals) << "stack overflow handler with sp at " << std::hex << &uc;
+  VLOG(signals) << "sigcontext: " << std::hex << sc;
 
-  uint8_t* sp = reinterpret_cast<uint8_t*>(sc->arm_sp);
-  LOG(DEBUG) << "sp: " << static_cast<void*>(sp);
+  uintptr_t sp = sc->arm_sp;
+  VLOG(signals) << "sp: " << std::hex << sp;
 
-  uintptr_t* fault_addr = reinterpret_cast<uintptr_t*>(sc->fault_address);
-  LOG(DEBUG) << "fault_addr: " << std::hex << fault_addr;
-  LOG(DEBUG) << "checking for stack overflow, sp: " << std::hex << static_cast<void*>(sp) <<
+  uintptr_t fault_addr = sc->fault_address;
+  VLOG(signals) << "fault_addr: " << std::hex << fault_addr;
+  VLOG(signals) << "checking for stack overflow, sp: " << std::hex << sp <<
     ", fault_addr: " << fault_addr;
-  uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(sp - Thread::kStackOverflowReservedBytes);
+
+  uintptr_t overflow_addr = sp - Thread::kStackOverflowReservedBytes;
+
+  Thread* self = reinterpret_cast<Thread*>(sc->arm_r9);
+  CHECK_EQ(self, Thread::Current());
+  uintptr_t pregion = reinterpret_cast<uintptr_t>(self->GetStackEnd()) -
+      Thread::kStackOverflowProtectedSize;
 
   // Check that the fault address is the value expected for a stack overflow.
   if (fault_addr != overflow_addr) {
-    LOG(DEBUG) << "Not a stack overflow";
+    VLOG(signals) << "Not a stack overflow";
     return false;
   }
 
   // We know this is a stack overflow.  We need to move the sp to the overflow region
-  // the exists below the protected region.  R9 contains the current Thread* so
-  // we can read the stack_end from that and subtract the size of the
-  // protected region.  This creates a gap in the stack that needs to be marked.
-  Thread* self = reinterpret_cast<Thread*>(sc->arm_r9);
+  // the exists below the protected region.  Determine the address of the next
+  // available valid address below the protected region.
+  uintptr_t prevsp = sp;
+  sp = pregion;
+  VLOG(signals) << "setting sp to overflow region at " << std::hex << sp;
 
-  uint8_t* prevsp = sp;
-  sp = self->GetStackEnd() - Thread::kStackOverflowProtectedSize;
-  LOG(DEBUG) << "setting sp to overflow region at " << std::hex << static_cast<void*>(sp);
-
-  // We need to find the previous frame.  Remember that
-  // this has not yet been fully constructed because the SP has not been
-  // decremented.  So we need to work out the size of the spill portion of the
-  // frame.  This consists of something like:
-  //
-  // 0xb6a1d49c: e92d40e0  push    {r5, r6, r7, lr}
-  // 0xb6a1d4a0: ed2d8a06  vpush.f32 {s16-s21}
-  //
-  // The first is encoded in the ArtMethod as the spill_mask, the second as the
-  // fp_spill_mask.  A population count on each will give the number of registers
-  // in each mask.  Each register is 4 bytes on ARM32.
-
-  mirror::ArtMethod* method = reinterpret_cast<mirror::ArtMethod*>(sc->arm_r0);
-  uint32_t spill_mask = method->GetCoreSpillMask();
-  uint32_t numcores = POPCOUNT(spill_mask);
-  uint32_t fp_spill_mask = method->GetFpSpillMask();
-  uint32_t numfps = POPCOUNT(fp_spill_mask);
-  uint32_t spill_size = (numcores + numfps) * 4;
-  LOG(DEBUG) << "spill size: " << spill_size;
-  uint8_t* prevframe = prevsp + spill_size;
-  LOG(DEBUG) << "previous frame: " << static_cast<void*>(prevframe);
-
-  // NOTE: the ARM EABI needs an 8 byte alignment.  In the case of ARM32 a pointer
-  // is 4 bytes so that, together with the offset to the previous frame is 8
-  // bytes.  On other architectures we will need to align the stack.
-
-  // Push a marker onto the stack to tell the stack walker that there is a stack
-  // overflow and the stack is not contiguous.
-
-  // First the offset from SP to the previous frame.
-  sp -= sizeof(uint32_t);
-  LOG(DEBUG) << "push gap of " << static_cast<uint32_t>(prevframe - sp);
-  *reinterpret_cast<uint32_t*>(sp) = static_cast<uint32_t>(prevframe - sp);
-
-  // Now the gap marker (pointer sized).
-  sp -= sizeof(mirror::ArtMethod*);
-  *reinterpret_cast<void**>(sp) = stack_overflow_gap_marker;
+  // Since the compiler puts the implicit overflow
+  // check before the callee save instructions, the SP is already pointing to
+  // the previous frame.
+  VLOG(signals) << "previous frame: " << std::hex << prevsp;
 
   // Now establish the stack pointer for the signal return.
-  sc->arm_sp = reinterpret_cast<uintptr_t>(sp);
+  sc->arm_sp = prevsp;
 
-  // Now arrange for the signal handler to return to art_quick_throw_stack_overflow.
-  // We need the LR to point to the GC map just after the fault instruction.
-  uint8_t* ptr = reinterpret_cast<uint8_t*>(sc->arm_pc);
-  uint32_t instr_size = GetInstructionSize(ptr);
-  sc->arm_lr = (sc->arm_pc + instr_size) | 1;      // LR needs to point to gc map location
-  sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow);
+  // Tell the stack overflow code where the new stack pointer should be.
+  sc->arm_ip = sp;      // aka r12
 
-  // The kernel will now return to the address in sc->arm_pc.  We have arranged the
-  // stack pointer to be in the overflow region.  Throwing the exception will perform
-  // a longjmp which will restore the stack pointer to the correct location for the
-  // exception catch.
+  // Now arrange for the signal handler to return to art_quick_throw_stack_overflow_from_signal.
+  // The value of LR must be the same as it was when we entered the code that
+  // caused this fault.  This will be inserted into a callee save frame by
+  // the function to which this handler returns (art_quick_throw_stack_overflow_from_signal).
+  sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow_from_signal);
+
+  // The kernel will now return to the address in sc->arm_pc.
   return true;
 }
 }       // namespace art
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index bc80644..dcf4561 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -235,6 +235,31 @@
      */
 ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_no_such_method, artThrowNoSuchMethodFromCode
 
+  /*
+   * Invoke stack overflow exception from signal handler.
+   * On entry:
+   * r9: thread
+   * sp: address of last known frame
+   * r12: address of next valid SP below protected region in stack
+   *
+   * This is deceptively simple but hides some complexity.  It is called in the case of
+   * a stack overflow condition during implicit checks.  The signal handler has been
+   * called by the kernel due to a load from the protected stack region.  The handler
+   * works out the address of the previous frame and passes this in SP.  However there
+   * is a piece of memory somewhere below the current SP that is not accessible (the
+   * memory that caused the signal).  The signal handler works out the next
+   * accessible value of SP and passes this in r12.  This code then sets up the SP
+   * to be this new value and calls the code to create and throw the stack overflow
+   * exception.
+   */
+ENTRY art_quick_throw_stack_overflow_from_signal
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    mov r0, r9                      @ pass Thread::Current
+    mov r1, sp                      @ pass SP
+    mov sp, r12                     @ move SP down to below protected region.
+    b   artThrowStackOverflowFromCode                   @ artThrowStackOverflowFromCode(Thread*, SP)
+END art_quick_throw_stack_overflow_from_signal
+
     /*
      * All generated callsites for interface invokes and invocation slow paths will load arguments
      * as usual - except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 7b66613..8079460 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -419,14 +419,30 @@
     brk 0  // Unreached
 .endm
 
-.macro RETURN_OR_DELIVER_PENDING_EXCEPTION
-    ldr x9, [xSELF, # THREAD_EXCEPTION_OFFSET]   // Get exception field.
-    cbnz x9, 1f
+.macro RETURN_OR_DELIVER_PENDING_EXCEPTION_REG reg
+    ldr \reg, [xSELF, # THREAD_EXCEPTION_OFFSET]   // Get exception field.
+    cbnz \reg, 1f
     ret
 1:
     DELIVER_PENDING_EXCEPTION
 .endm
 
+.macro RETURN_OR_DELIVER_PENDING_EXCEPTION
+    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG x9
+.endm
+
+// Same as above with x1. This is helpful in stubs that want to avoid clobbering another register.
+.macro RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
+    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG x1
+.endm
+
+.macro RETURN_IF_W0_IS_ZERO_OR_DELIVER
+    cbnz w0, 1f                // result non-zero branch over
+    ret                        // return
+1:
+    DELIVER_PENDING_EXCEPTION
+.endm
+
 // FIXME: Temporary fix for TR(XSELF).
 .macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
@@ -1153,19 +1169,6 @@
 UNIMPLEMENTED art_quick_initialize_static_storage
 UNIMPLEMENTED art_quick_initialize_type
 UNIMPLEMENTED art_quick_initialize_type_and_verify_access
-UNIMPLEMENTED art_quick_get32_static
-UNIMPLEMENTED art_quick_get64_static
-UNIMPLEMENTED art_quick_get_obj_static
-UNIMPLEMENTED art_quick_get32_instance
-UNIMPLEMENTED art_quick_get64_instance
-UNIMPLEMENTED art_quick_get_obj_instance
-UNIMPLEMENTED art_quick_set32_static
-UNIMPLEMENTED art_quick_set64_static
-UNIMPLEMENTED art_quick_set_obj_static
-UNIMPLEMENTED art_quick_set32_instance
-UNIMPLEMENTED art_quick_set64_instance
-UNIMPLEMENTED art_quick_set_obj_instance
-UNIMPLEMENTED art_quick_resolve_string
 
 // Macro to facilitate adding new allocation entrypoints.
 // TODO: xSELF -> x19. Temporarily rely on xSELF being saved in REF_ONLY
@@ -1197,6 +1200,82 @@
 END \name
 .endm
 
+// Macros taking opportunity of code similarities for downcalls with referrer.
+
+// TODO: xSELF -> x19. Temporarily rely on xSELF being saved in REF_ONLY
+.macro ONE_ARG_REF_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
+    ldr    x1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
+    mov    x2, xSELF                  // pass Thread::Current
+    mov    x3, sp                     // pass SP
+    bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*, SP)
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    \return
+END \name
+.endm
+
+// TODO: xSELF -> x19. Temporarily rely on xSELF being saved in REF_ONLY
+.macro TWO_ARG_REF_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
+    ldr    x2, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
+    mov    x3, xSELF                  // pass Thread::Current
+    mov    x4, sp                     // pass SP
+    bl     \entrypoint
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    \return
+END \name
+.endm
+
+// TODO: xSELF -> x19. Temporarily rely on xSELF being saved in REF_ONLY
+.macro THREE_ARG_REF_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
+    ldr    x3, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
+    mov    x4, xSELF                  // pass Thread::Current
+    mov    x5, sp                     // pass SP
+    bl     \entrypoint
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    \return
+END \name
+.endm
+
+ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
+ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
+ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
+
+TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
+TWO_ARG_REF_DOWNCALL art_quick_get64_instance, artGet64InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
+TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
+
+TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
+TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
+
+THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
+THREE_ARG_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
+THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
+
+// This is separated out as the argument order is different.
+    .extern artSet64StaticFromCode
+ENTRY art_quick_set64_static
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
+    mov    x3, x1                     // Store value
+    ldr    x1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
+    mov    x2, x3                     // Put value param
+    mov    x3, xSELF                  // pass Thread::Current
+    mov    x4, sp                     // pass SP
+    bl     artSet64StaticFromCode
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_W0_IS_ZERO_OR_DELIVER
+END art_quick_set64_static
+
+
+UNIMPLEMENTED art_quick_resolve_string
+
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALL_ALLOC_ENTRYPOINTS
 
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 94a7598..4438f25 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -15,6 +15,7 @@
  */
 
 #include "common_runtime_test.h"
+#include "mirror/art_field-inl.h"
 #include "mirror/string-inl.h"
 
 #include <cstdio>
@@ -73,17 +74,28 @@
     __asm__ __volatile__(
         "push {r1-r12, lr}\n\t"     // Save state, 13*4B = 52B
         ".cfi_adjust_cfa_offset 52\n\t"
-        "sub sp, sp, #8\n\t"        // +8B, so 16B aligned with nullptr
-        ".cfi_adjust_cfa_offset 8\n\t"
-        "mov r0, %[arg0]\n\t"       // Set arg0-arg2
-        "mov r1, %[arg1]\n\t"       // TODO: Any way to use constraints like on x86?
-        "mov r2, %[arg2]\n\t"
-        // Use r9 last as we don't know whether it was used for arg0-arg2
-        "mov r9, #0\n\t"            // Push nullptr to terminate stack
         "push {r9}\n\t"
         ".cfi_adjust_cfa_offset 4\n\t"
-        "mov r9, %[self]\n\t"       // Set the thread
-        "blx %[code]\n\t"           // Call the stub
+        "mov r9, #0\n\n"
+        "str r9, [sp, #-8]!\n\t"   // Push nullptr to terminate stack, +8B padding so 16B aligned
+        ".cfi_adjust_cfa_offset 8\n\t"
+        "ldr r9, [sp, #8]\n\t"
+
+        // Push everything on the stack, so we don't rely on the order. What a mess. :-(
+        "sub sp, sp, #20\n\t"
+        "str %[arg0], [sp]\n\t"
+        "str %[arg1], [sp, #4]\n\t"
+        "str %[arg2], [sp, #8]\n\t"
+        "str %[code], [sp, #12]\n\t"
+        "str %[self], [sp, #16]\n\t"
+        "ldr r0, [sp]\n\t"
+        "ldr r1, [sp, #4]\n\t"
+        "ldr r2, [sp, #8]\n\t"
+        "ldr r3, [sp, #12]\n\t"
+        "ldr r9, [sp, #16]\n\t"
+        "add sp, sp, #20\n\t"
+
+        "blx r3\n\t"                // Call the stub
         "add sp, sp, #12\n\t"       // Pop nullptr and padding
         ".cfi_adjust_cfa_offset -12\n\t"
         "pop {r1-r12, lr}\n\t"      // Restore state
@@ -91,30 +103,42 @@
         "mov %[result], r0\n\t"     // Save the result
         : [result] "=r" (result)
           // Use the result from r0
-        : [arg0] "0"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self)
+        : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self)
         : );  // clobber.
 #elif defined(__aarch64__)
     __asm__ __volatile__(
         "sub sp, sp, #48\n\t"          // Reserve stack space, 16B aligned
         ".cfi_adjust_cfa_offset 48\n\t"
-        "stp xzr, x1, [sp]\n\t"        // nullptr(end of quick stack), x1
-        "stp x2, x18, [sp, #16]\n\t"   // Save x2, x18(xSELF)
-        "str x30, [sp, #32]\n\t"       // Save xLR
-        "mov x0, %[arg0]\n\t"          // Set arg0-arg2
-        "mov x1, %[arg1]\n\t"          // TODO: Any way to use constraints like on x86?
-        "mov x2, %[arg2]\n\t"
-        // Use r18 last as we don't know whether it was used for arg0-arg2
-        "mov x18, %[self]\n\t"         // Set the thread
-        "blr %[code]\n\t"              // Call the stub
+        "stp xzr, x1,  [sp]\n\t"        // nullptr(end of quick stack), x1
+        "stp x2, x3,   [sp, #16]\n\t"   // Save x2, x3
+        "stp x18, x30, [sp, #32]\n\t"   // Save x18(xSELF), xLR
+
+        // Push everything on the stack, so we don't rely on the order. What a mess. :-(
+        "sub sp, sp, #48\n\t"
+        "str %[arg0], [sp]\n\t"
+        "str %[arg1], [sp, #8]\n\t"
+        "str %[arg2], [sp, #16]\n\t"
+        "str %[code], [sp, #24]\n\t"
+        "str %[self], [sp, #32]\n\t"
+        "ldr x0, [sp]\n\t"
+        "ldr x1, [sp, #8]\n\t"
+        "ldr x2, [sp, #16]\n\t"
+        "ldr x3, [sp, #24]\n\t"
+        "ldr x18, [sp, #32]\n\t"
+        "add sp, sp, #48\n\t"
+
+        "blr x3\n\t"              // Call the stub
         "ldp x1, x2, [sp, #8]\n\t"     // Restore x1, x2
-        "ldp x18, x30, [sp, #24]\n\t"  // Restore xSELF, xLR
+        "ldp x3, x18, [sp, #24]\n\t"   // Restore x3, xSELF
+        "ldr x30, [sp, #40]\n\t"      // Restore xLR
         "add sp, sp, #48\n\t"          // Free stack space
         ".cfi_adjust_cfa_offset -48\n\t"
+
         "mov %[result], x0\n\t"        // Save the result
         : [result] "=r" (result)
           // Use the result from r0
         : [arg0] "0"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self)
-        : "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17");  // clobber.
+        : "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17");  // clobber.
 #elif defined(__x86_64__)
     // Note: Uses the native convention
     // TODO: Set the thread?
@@ -139,6 +163,151 @@
     self->PopManagedStackFragment(fragment);
     return result;
   }
+
+ public:
+  // TODO: Set up a frame according to referrer's specs.
+  size_t Invoke3WithReferrer(size_t arg0, size_t arg1, size_t arg2, uintptr_t code, Thread* self,
+                             mirror::ArtMethod* referrer) {
+    // Push a transition back into managed code onto the linked list in thread.
+    ManagedStack fragment;
+    self->PushManagedStackFragment(&fragment);
+
+    size_t result;
+#if defined(__i386__)
+    // TODO: Set the thread?
+    __asm__ __volatile__(
+        "pushl %[referrer]\n\t"     // Store referrer
+        "call *%%edi\n\t"           // Call the stub
+        "addl $4, %%esp"            // Pop referrer
+        : "=a" (result)
+          // Use the result from eax
+          : "a"(arg0), "c"(arg1), "d"(arg2), "D"(code), [referrer]"r"(referrer)
+            // This places code into edi, arg0 into eax, arg1 into ecx, and arg2 into edx
+            : );  // clobber.
+    // TODO: Should we clobber the other registers? EBX gets clobbered by some of the stubs,
+    //       but compilation fails when declaring that.
+#elif defined(__arm__)
+    __asm__ __volatile__(
+        "push {r1-r12, lr}\n\t"     // Save state, 13*4B = 52B
+        ".cfi_adjust_cfa_offset 52\n\t"
+        "push {r9}\n\t"
+        ".cfi_adjust_cfa_offset 4\n\t"
+        "mov r9, %[referrer]\n\n"
+        "str r9, [sp, #-8]!\n\t"   // Push referrer, +8B padding so 16B aligned
+        ".cfi_adjust_cfa_offset 8\n\t"
+        "ldr r9, [sp, #8]\n\t"
+
+        // Push everything on the stack, so we don't rely on the order. What a mess. :-(
+        "sub sp, sp, #20\n\t"
+        "str %[arg0], [sp]\n\t"
+        "str %[arg1], [sp, #4]\n\t"
+        "str %[arg2], [sp, #8]\n\t"
+        "str %[code], [sp, #12]\n\t"
+        "str %[self], [sp, #16]\n\t"
+        "ldr r0, [sp]\n\t"
+        "ldr r1, [sp, #4]\n\t"
+        "ldr r2, [sp, #8]\n\t"
+        "ldr r3, [sp, #12]\n\t"
+        "ldr r9, [sp, #16]\n\t"
+        "add sp, sp, #20\n\t"
+
+        "blx r3\n\t"                // Call the stub
+        "add sp, sp, #12\n\t"       // Pop nullptr and padding
+        ".cfi_adjust_cfa_offset -12\n\t"
+        "pop {r1-r12, lr}\n\t"      // Restore state
+        ".cfi_adjust_cfa_offset -52\n\t"
+        "mov %[result], r0\n\t"     // Save the result
+        : [result] "=r" (result)
+          // Use the result from r0
+        : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
+          [referrer] "r"(referrer)
+        : );  // clobber.
+#elif defined(__aarch64__)
+    __asm__ __volatile__(
+        "sub sp, sp, #48\n\t"          // Reserve stack space, 16B aligned
+        ".cfi_adjust_cfa_offset 48\n\t"
+        "stp %[referrer], x1, [sp]\n\t"// referrer, x1
+        "stp x2, x3,   [sp, #16]\n\t"   // Save x2, x3
+        "stp x18, x30, [sp, #32]\n\t"   // Save x18(xSELF), xLR
+
+        // Push everything on the stack, so we don't rely on the order. What a mess. :-(
+        "sub sp, sp, #48\n\t"
+        "str %[arg0], [sp]\n\t"
+        "str %[arg1], [sp, #8]\n\t"
+        "str %[arg2], [sp, #16]\n\t"
+        "str %[code], [sp, #24]\n\t"
+        "str %[self], [sp, #32]\n\t"
+        "ldr x0, [sp]\n\t"
+        "ldr x1, [sp, #8]\n\t"
+        "ldr x2, [sp, #16]\n\t"
+        "ldr x3, [sp, #24]\n\t"
+        "ldr x18, [sp, #32]\n\t"
+        "add sp, sp, #48\n\t"
+
+        "blr x3\n\t"              // Call the stub
+        "ldp x1, x2, [sp, #8]\n\t"     // Restore x1, x2
+        "ldp x3, x18, [sp, #24]\n\t"   // Restore x3, xSELF
+        "ldr x30, [sp, #40]\n\t"      // Restore xLR
+        "add sp, sp, #48\n\t"          // Free stack space
+        ".cfi_adjust_cfa_offset -48\n\t"
+
+        "mov %[result], x0\n\t"        // Save the result
+        : [result] "=r" (result)
+          // Use the result from r0
+        : [arg0] "0"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
+          [referrer] "r"(referrer)
+        : "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17");  // clobber.
+#elif defined(__x86_64__)
+    // Note: Uses the native convention
+    // TODO: Set the thread?
+    __asm__ __volatile__(
+        "pushq %[referrer]\n\t"        // Push referrer
+        "pushq (%%rsp)\n\t"             // & 16B alignment padding
+        ".cfi_adjust_cfa_offset 16\n\t"
+        "call *%%rax\n\t"              // Call the stub
+        "addq $16, %%rsp\n\t"          // Pop nullptr and padding
+        ".cfi_adjust_cfa_offset -16\n\t"
+        : "=a" (result)
+          // Use the result from rax
+          : "D"(arg0), "S"(arg1), "d"(arg2), "a"(code), [referrer] "m"(referrer)
+            // This places arg0 into rdi, arg1 into rsi, arg2 into rdx, and code into rax
+            : "rbx", "rcx", "rbp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15");  // clobber all
+    // TODO: Should we clobber the other registers?
+#else
+    LOG(WARNING) << "Was asked to invoke for an architecture I do not understand.";
+    result = 0;
+#endif
+    // Pop transition.
+    self->PopManagedStackFragment(fragment);
+    return result;
+  }
+
+  // Method with 32b arg0, 64b arg1
+  size_t Invoke3UWithReferrer(size_t arg0, uint64_t arg1, uintptr_t code, Thread* self,
+                              mirror::ArtMethod* referrer) {
+#if defined(__x86_64__) || defined(__aarch64__)
+    // Just pass through.
+    return Invoke3WithReferrer(arg0, arg1, 0U, code, self, referrer);
+#else
+    // Need to split up arguments.
+    uint32_t lower = static_cast<uint32_t>(arg1 & 0xFFFFFFFF);
+    uint32_t upper = static_cast<uint32_t>((arg1 >> 32) & 0xFFFFFFFF);
+
+    return Invoke3WithReferrer(arg0, lower, upper, code, self, referrer);
+#endif
+  }
+
+  // Method with 32b arg0, 32b arg1, 64b arg2
+  size_t Invoke3UUWithReferrer(uint32_t arg0, uint32_t arg1, uint64_t arg2, uintptr_t code,
+                               Thread* self, mirror::ArtMethod* referrer) {
+#if defined(__x86_64__) || defined(__aarch64__)
+    // Just pass through.
+    return Invoke3WithReferrer(arg0, arg1, arg2, code, self, referrer);
+#else
+    // TODO: Needs 4-param invoke.
+    return 0;
+#endif
+  }
 };
 
 
@@ -231,6 +400,7 @@
 #endif
 }
 
+
 class RandGen {
  public:
   explicit RandGen(uint32_t seed) : val_(seed) {}
@@ -723,11 +893,11 @@
   // Play with it...
 
   EXPECT_FALSE(self->IsExceptionPending());
-/*
- * For some reason this does not work, as the type_idx is artificial and outside what the
- * resolved types of c_obj allow...
- *
-  {
+
+  // For some reason this does not work, as the type_idx is artificial and outside what the
+  // resolved types of c_obj allow...
+
+  if (false) {
     // Use an arbitrary method from c to use as referrer
     size_t result = Invoke3(static_cast<size_t>(c->GetDexTypeIndex()),    // type_idx
                             reinterpret_cast<size_t>(c_obj->GetVirtualMethod(0)),  // arbitrary
@@ -742,7 +912,7 @@
     VerifyObject(obj);
     EXPECT_EQ(obj->GetLength(), 10);
   }
-*/
+
   {
     // We can use nullptr in the second argument as we do not need a method here (not used in
     // resolved/initialized cases)
@@ -750,7 +920,7 @@
                             reinterpret_cast<uintptr_t>(&art_quick_alloc_array_resolved_rosalloc),
                             self);
 
-    EXPECT_FALSE(self->IsExceptionPending());
+    EXPECT_FALSE(self->IsExceptionPending()) << PrettyTypeOf(self->GetException(nullptr));
     EXPECT_NE(reinterpret_cast<size_t>(nullptr), result);
     mirror::Object* obj = reinterpret_cast<mirror::Object*>(result);
     EXPECT_TRUE(obj->IsArrayInstance());
@@ -881,4 +1051,383 @@
 #endif
 }
 
+
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+extern "C" void art_quick_set32_static(void);
+extern "C" void art_quick_get32_static(void);
+#endif
+
+static void GetSet32Static(SirtRef<mirror::Object>* obj, SirtRef<mirror::ArtField>* f, Thread* self,
+                           mirror::ArtMethod* referrer, StubTest* test)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+  constexpr size_t num_values = 7;
+  uint32_t values[num_values] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF };
+
+  for (size_t i = 0; i < num_values; ++i) {
+    test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                              static_cast<size_t>(values[i]),
+                              0U,
+                              reinterpret_cast<uintptr_t>(&art_quick_set32_static),
+                              self,
+                              referrer);
+
+    size_t res = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                                           0U, 0U,
+                                           reinterpret_cast<uintptr_t>(&art_quick_get32_static),
+                                           self,
+                                           referrer);
+
+    EXPECT_EQ(res, values[i]) << "Iteration " << i;
+  }
+#else
+  LOG(INFO) << "Skipping set32static as I don't know how to do that on " << kRuntimeISA;
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping set32static as I don't know how to do that on " << kRuntimeISA << std::endl;
+#endif
+}
+
+
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+extern "C" void art_quick_set32_instance(void);
+extern "C" void art_quick_get32_instance(void);
+#endif
+
+static void GetSet32Instance(SirtRef<mirror::Object>* obj, SirtRef<mirror::ArtField>* f,
+                             Thread* self, mirror::ArtMethod* referrer, StubTest* test)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+  constexpr size_t num_values = 7;
+  uint32_t values[num_values] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF };
+
+  for (size_t i = 0; i < num_values; ++i) {
+    test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                              reinterpret_cast<size_t>(obj->get()),
+                              static_cast<size_t>(values[i]),
+                              reinterpret_cast<uintptr_t>(&art_quick_set32_instance),
+                              self,
+                              referrer);
+
+    int32_t res = f->get()->GetInt(obj->get());
+    EXPECT_EQ(res, static_cast<int32_t>(values[i])) << "Iteration " << i;
+
+    res++;
+    f->get()->SetInt<false>(obj->get(), res);
+
+    size_t res2 = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                                            reinterpret_cast<size_t>(obj->get()),
+                                            0U,
+                                            reinterpret_cast<uintptr_t>(&art_quick_get32_instance),
+                                            self,
+                                            referrer);
+    EXPECT_EQ(res, static_cast<int32_t>(res2));
+  }
+#else
+  LOG(INFO) << "Skipping set32instance as I don't know how to do that on " << kRuntimeISA;
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping set32instance as I don't know how to do that on " << kRuntimeISA << std::endl;
+#endif
+}
+
+
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+extern "C" void art_quick_set_obj_static(void);
+extern "C" void art_quick_get_obj_static(void);
+
+static void set_and_check_static(uint32_t f_idx, mirror::Object* val, Thread* self,
+                                 mirror::ArtMethod* referrer, StubTest* test)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  test->Invoke3WithReferrer(static_cast<size_t>(f_idx),
+                            reinterpret_cast<size_t>(val),
+                            0U,
+                            reinterpret_cast<uintptr_t>(&art_quick_set_obj_static),
+                            self,
+                            referrer);
+
+  size_t res = test->Invoke3WithReferrer(static_cast<size_t>(f_idx),
+                                         0U, 0U,
+                                         reinterpret_cast<uintptr_t>(&art_quick_get_obj_static),
+                                         self,
+                                         referrer);
+
+  EXPECT_EQ(res, reinterpret_cast<size_t>(val)) << "Value " << val;
+}
+#endif
+
+static void GetSetObjStatic(SirtRef<mirror::Object>* obj, SirtRef<mirror::ArtField>* f, Thread* self,
+                            mirror::ArtMethod* referrer, StubTest* test)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+  set_and_check_static((*f)->GetDexFieldIndex(), nullptr, self, referrer, test);
+
+  // Allocate a string object for simplicity.
+  mirror::String* str = mirror::String::AllocFromModifiedUtf8(self, "Test");
+  set_and_check_static((*f)->GetDexFieldIndex(), str, self, referrer, test);
+
+  set_and_check_static((*f)->GetDexFieldIndex(), nullptr, self, referrer, test);
+#else
+  LOG(INFO) << "Skipping setObjstatic as I don't know how to do that on " << kRuntimeISA;
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping setObjstatic as I don't know how to do that on " << kRuntimeISA << std::endl;
+#endif
+}
+
+
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+extern "C" void art_quick_set_obj_instance(void);
+extern "C" void art_quick_get_obj_instance(void);
+
+static void set_and_check_instance(SirtRef<mirror::ArtField>* f, mirror::Object* trg,
+                                   mirror::Object* val, Thread* self, mirror::ArtMethod* referrer,
+                                   StubTest* test)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                            reinterpret_cast<size_t>(trg),
+                            reinterpret_cast<size_t>(val),
+                            reinterpret_cast<uintptr_t>(&art_quick_set_obj_instance),
+                            self,
+                            referrer);
+
+  size_t res = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                                         reinterpret_cast<size_t>(trg),
+                                         0U,
+                                         reinterpret_cast<uintptr_t>(&art_quick_get_obj_instance),
+                                         self,
+                                         referrer);
+
+  EXPECT_EQ(res, reinterpret_cast<size_t>(val)) << "Value " << val;
+
+  EXPECT_EQ(val, f->get()->GetObj(trg));
+}
+#endif
+
+static void GetSetObjInstance(SirtRef<mirror::Object>* obj, SirtRef<mirror::ArtField>* f,
+                              Thread* self, mirror::ArtMethod* referrer, StubTest* test)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+  set_and_check_instance(f, obj->get(), nullptr, self, referrer, test);
+
+  // Allocate a string object for simplicity.
+  mirror::String* str = mirror::String::AllocFromModifiedUtf8(self, "Test");
+  set_and_check_instance(f, obj->get(), str, self, referrer, test);
+
+  set_and_check_instance(f, obj->get(), nullptr, self, referrer, test);
+#else
+  LOG(INFO) << "Skipping setObjinstance as I don't know how to do that on " << kRuntimeISA;
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping setObjinstance as I don't know how to do that on " << kRuntimeISA << std::endl;
+#endif
+}
+
+
+// TODO: Complete these tests for 32b architectures.
+
+#if defined(__x86_64__) || defined(__aarch64__)
+extern "C" void art_quick_set64_static(void);
+extern "C" void art_quick_get64_static(void);
+#endif
+
+static void GetSet64Static(SirtRef<mirror::Object>* obj, SirtRef<mirror::ArtField>* f, Thread* self,
+                           mirror::ArtMethod* referrer, StubTest* test)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+#if defined(__x86_64__) || defined(__aarch64__)
+  constexpr size_t num_values = 8;
+  uint64_t values[num_values] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF, 0xFFFFFFFFFFFF };
+
+  for (size_t i = 0; i < num_values; ++i) {
+    test->Invoke3UWithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                               values[i],
+                               reinterpret_cast<uintptr_t>(&art_quick_set64_static),
+                               self,
+                               referrer);
+
+    size_t res = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                                           0U, 0U,
+                                           reinterpret_cast<uintptr_t>(&art_quick_get64_static),
+                                           self,
+                                           referrer);
+
+    EXPECT_EQ(res, values[i]) << "Iteration " << i;
+  }
+#else
+  LOG(INFO) << "Skipping set64static as I don't know how to do that on " << kRuntimeISA;
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping set64static as I don't know how to do that on " << kRuntimeISA << std::endl;
+#endif
+}
+
+
+#if defined(__x86_64__) || defined(__aarch64__)
+extern "C" void art_quick_set64_instance(void);
+extern "C" void art_quick_get64_instance(void);
+#endif
+
+static void GetSet64Instance(SirtRef<mirror::Object>* obj, SirtRef<mirror::ArtField>* f,
+                             Thread* self, mirror::ArtMethod* referrer, StubTest* test)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+#if defined(__x86_64__) || defined(__aarch64__)
+  constexpr size_t num_values = 8;
+  uint64_t values[num_values] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF, 0xFFFFFFFFFFFF };
+
+  for (size_t i = 0; i < num_values; ++i) {
+    test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                              reinterpret_cast<size_t>(obj->get()),
+                              static_cast<size_t>(values[i]),
+                              reinterpret_cast<uintptr_t>(&art_quick_set64_instance),
+                              self,
+                              referrer);
+
+    int64_t res = f->get()->GetLong(obj->get());
+    EXPECT_EQ(res, static_cast<int64_t>(values[i])) << "Iteration " << i;
+
+    res++;
+    f->get()->SetLong<false>(obj->get(), res);
+
+    size_t res2 = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                                            reinterpret_cast<size_t>(obj->get()),
+                                            0U,
+                                            reinterpret_cast<uintptr_t>(&art_quick_get64_instance),
+                                            self,
+                                            referrer);
+    EXPECT_EQ(res, static_cast<int64_t>(res2));
+  }
+#else
+  LOG(INFO) << "Skipping set64instance as I don't know how to do that on " << kRuntimeISA;
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping set64instance as I don't know how to do that on " << kRuntimeISA << std::endl;
+#endif
+}
+
+static void TestFields(Thread* self, StubTest* test, Primitive::Type test_type) {
+  // garbage is created during ClassLinker::Init
+
+  JNIEnv* env = Thread::Current()->GetJniEnv();
+  jclass jc = env->FindClass("AllFields");
+  CHECK(jc != NULL);
+  jobject o = env->AllocObject(jc);
+  CHECK(o != NULL);
+
+  ScopedObjectAccess soa(self);
+  SirtRef<mirror::Object> obj(self, soa.Decode<mirror::Object*>(o));
+
+  SirtRef<mirror::Class> c(self, obj->GetClass());
+
+  // Need a method as a referrer
+  SirtRef<mirror::ArtMethod> m(self, c->GetDirectMethod(0));
+
+  // Play with it...
+
+  // Static fields.
+  {
+    SirtRef<mirror::ObjectArray<mirror::ArtField>> fields(self, c.get()->GetSFields());
+    int32_t num_fields = fields->GetLength();
+    for (int32_t i = 0; i < num_fields; ++i) {
+      SirtRef<mirror::ArtField> f(self, fields->Get(i));
+
+      FieldHelper fh(f.get());
+      Primitive::Type type = fh.GetTypeAsPrimitiveType();
+      switch (type) {
+        case Primitive::Type::kPrimInt:
+          if (test_type == type) {
+            GetSet32Static(&obj, &f, self, m.get(), test);
+          }
+          break;
+
+        case Primitive::Type::kPrimLong:
+          if (test_type == type) {
+            GetSet64Static(&obj, &f, self, m.get(), test);
+          }
+          break;
+
+        case Primitive::Type::kPrimNot:
+          // Don't try array.
+          if (test_type == type && fh.GetTypeDescriptor()[0] != '[') {
+            GetSetObjStatic(&obj, &f, self, m.get(), test);
+          }
+          break;
+
+        default:
+          break;  // Skip.
+      }
+    }
+  }
+
+  // Instance fields.
+  {
+    SirtRef<mirror::ObjectArray<mirror::ArtField>> fields(self, c.get()->GetIFields());
+    int32_t num_fields = fields->GetLength();
+    for (int32_t i = 0; i < num_fields; ++i) {
+      SirtRef<mirror::ArtField> f(self, fields->Get(i));
+
+      FieldHelper fh(f.get());
+      Primitive::Type type = fh.GetTypeAsPrimitiveType();
+      switch (type) {
+        case Primitive::Type::kPrimInt:
+          if (test_type == type) {
+            GetSet32Instance(&obj, &f, self, m.get(), test);
+          }
+          break;
+
+        case Primitive::Type::kPrimLong:
+          if (test_type == type) {
+            GetSet64Instance(&obj, &f, self, m.get(), test);
+          }
+          break;
+
+        case Primitive::Type::kPrimNot:
+          // Don't try array.
+          if (test_type == type && fh.GetTypeDescriptor()[0] != '[') {
+            GetSetObjInstance(&obj, &f, self, m.get(), test);
+          }
+          break;
+
+        default:
+          break;  // Skip.
+      }
+    }
+  }
+
+  // TODO: Deallocate things.
+}
+
+
+TEST_F(StubTest, Fields32) {
+  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
+
+  Thread* self = Thread::Current();
+
+  self->TransitionFromSuspendedToRunnable();
+  LoadDex("AllFields");
+  bool started = runtime_->Start();
+  CHECK(started);
+
+  TestFields(self, this, Primitive::Type::kPrimInt);
+}
+
+TEST_F(StubTest, FieldsObj) {
+  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
+
+  Thread* self = Thread::Current();
+
+  self->TransitionFromSuspendedToRunnable();
+  LoadDex("AllFields");
+  bool started = runtime_->Start();
+  CHECK(started);
+
+  TestFields(self, this, Primitive::Type::kPrimNot);
+}
+
+TEST_F(StubTest, Fields64) {
+  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
+
+  Thread* self = Thread::Current();
+
+  self->TransitionFromSuspendedToRunnable();
+  LoadDex("AllFields");
+  bool started = runtime_->Start();
+  CHECK(started);
+
+  TestFields(self, this, Primitive::Type::kPrimLong);
+}
+
 }  // namespace art
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 7b56718..a55dbb6 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -871,82 +871,63 @@
 UNIMPLEMENTED art_quick_lshr
 UNIMPLEMENTED art_quick_lushr
 
-DEFINE_FUNCTION art_quick_set32_instance
+
+MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
+    DEFINE_FUNCTION VAR(c_name, 0)
+    movq 8(%rsp), %rsi                 // pass referrer
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
+                                       // arg0 is in rdi
+    movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
+    movq %rsp, %rcx                    // pass SP
+    call PLT_VAR(cxx_name, 1)          // cxx_name(arg0, referrer, Thread*, SP)
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    CALL_MACRO(return_macro, 2)
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
+
+MACRO3(TWO_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
+    DEFINE_FUNCTION VAR(c_name, 0)
+    movq 8(%rsp), %rdx                 // pass referrer
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
+                                       // arg0 and arg1 are in rdi/rsi
+    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
+    movq %rsp, %r8                     // pass SP
+    call PLT_VAR(cxx_name, 1)          // (arg0, arg1, referrer, Thread*, SP)
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    CALL_MACRO(return_macro, 2)
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
+
+MACRO3(THREE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
+    DEFINE_FUNCTION VAR(c_name, 0)
     movq 8(%rsp), %rcx                 // pass referrer
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx, Object* and new_val are in rdi/rsi/rdx
-    movq %gs:THREAD_SELF_OFFSET, %r8   // pass Thread::Current()
+                                       // arg0, arg1, and arg2 are in rdi/rsi/rdx
+    movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
     movq %rsp, %r9                     // pass SP
-    call PLT_SYMBOL(artSet32InstanceFromCode)  // (field_idx, Object*, new_val, referrer, Thread*, SP)
+    call PLT_VAR(cxx_name, 1)          // cxx_name(arg0, arg1, arg2, referrer, Thread*, SP)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_IF_EAX_ZERO                 // return or deliver exception
-END_FUNCTION art_quick_set32_instance
+    CALL_MACRO(return_macro, 2)        // return or deliver exception
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
 
-DEFINE_FUNCTION art_quick_set64_instance
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx, Object* and new_val are in rdi/rsi/rdx
-    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
-    movq %rsp, %r8                     // pass SP
-    call PLT_SYMBOL(artSet64InstanceFromCode)  // (field_idx, Object*, new_val, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_IF_EAX_ZERO                 // return or deliver exception
-END_FUNCTION art_quick_set64_instance
 
-DEFINE_FUNCTION art_quick_set_obj_instance
-    movq 8(%rsp), %rcx                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx, Object* and new_val are in rdi/rsi/rdx
-    movq %gs:THREAD_SELF_OFFSET, %r8   // pass Thread::Current()
-    movq %rsp, %r9                     // pass SP
-    call PLT_SYMBOL(artSetObjInstanceFromCode)  // (field_idx, Object*, new_val, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_IF_EAX_ZERO                 // return or deliver exception
-END_FUNCTION art_quick_set_obj_instance
+THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCode, RETURN_IF_EAX_ZERO
+THREE_ARG_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCode, RETURN_IF_EAX_ZERO
+THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCode, RETURN_IF_EAX_ZERO
 
-DEFINE_FUNCTION art_quick_get32_instance
-    movq 8(%rsp), %rdx                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx and Object* are in rdi/rsi
-    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
-    movq %rsp, %r8                     // pass SP
-    call PLT_SYMBOL(artGet32InstanceFromCode)  // (field_idx, Object*, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_OR_DELIVER_PENDING_EXCEPTION  // return or deliver exception
-END_FUNCTION art_quick_get32_instance
+TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
+TWO_ARG_REF_DOWNCALL art_quick_get64_instance, artGet64InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
+TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
 
-DEFINE_FUNCTION art_quick_get64_instance
-    movq 8(%rsp), %rdx                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx and Object* are in rdi/rsi
-    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
-    movq %rsp, %r8                     // pass SP
-    call PLT_SYMBOL(artGet64InstanceFromCode)  // (field_idx, Object*, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_OR_DELIVER_PENDING_EXCEPTION  // return or deliver exception
-END_FUNCTION art_quick_get64_instance
+TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCode, RETURN_IF_EAX_ZERO
+TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCode, RETURN_IF_EAX_ZERO
 
-DEFINE_FUNCTION art_quick_get_obj_instance
-    movq 8(%rsp), %rdx                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx and Object* are in rdi/rsi
-    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
-    movq %rsp, %r8                     // pass SP
-    call PLT_SYMBOL(artGetObjInstanceFromCode)  // (field_idx, Object*, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_OR_DELIVER_PENDING_EXCEPTION  // return or deliver exception
-END_FUNCTION art_quick_get_obj_instance
+ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
+ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
+ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
 
-DEFINE_FUNCTION art_quick_set32_static
-    movq 8(%rsp), %rdx                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx and new_val are in rdi/rsi
-    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
-    movq %rsp, %r8                     // pass SP
-    call PLT_SYMBOL(artSet32StaticFromCode)  // (field_idx, new_val, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_IF_EAX_ZERO                 // return or deliver exception
-END_FUNCTION art_quick_set32_static
-
+// This is singled out as the argument order is different.
 DEFINE_FUNCTION art_quick_set64_static
     movq %rsi, %rdx                    // pass new_val
     movq 8(%rsp), %rsi                 // pass referrer
@@ -959,49 +940,6 @@
     RETURN_IF_EAX_ZERO                 // return or deliver exception
 END_FUNCTION art_quick_set64_static
 
-DEFINE_FUNCTION art_quick_set_obj_static
-    movq 8(%rsp), %rdx                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx and new_val are in rdi/rsi
-    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
-    movq %rsp, %r8                     // pass SP
-    call PLT_SYMBOL(artSetObjStaticFromCode)  // (field_idx, new_val, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_OR_DELIVER_PENDING_EXCEPTION
-END_FUNCTION art_quick_set_obj_static
-
-DEFINE_FUNCTION art_quick_get32_static
-    movq 8(%rsp), %rsi                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx is in rdi
-    movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
-    movq %rsp, %rcx                    // pass SP
-    call PLT_SYMBOL(artGet32StaticFromCode)  // (field_idx, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_OR_DELIVER_PENDING_EXCEPTION
-END_FUNCTION art_quick_get32_static
-
-DEFINE_FUNCTION art_quick_get64_static
-    movq 8(%rsp), %rsi                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx is in rdi
-    movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
-    movq %rsp, %rcx                    // pass SP
-    call PLT_SYMBOL(artGet64StaticFromCode)  // (field_idx, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_OR_DELIVER_PENDING_EXCEPTION
-END_FUNCTION art_quick_get64_static
-
-DEFINE_FUNCTION art_quick_get_obj_static
-    movq 8(%rsp), %rsi                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx is in rdi
-    movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
-    movq %rsp, %rcx                    // pass SP
-    call PLT_SYMBOL(artGetObjStaticFromCode)  // (field_idx, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_OR_DELIVER_PENDING_EXCEPTION
-END_FUNCTION art_quick_get_obj_static
 
 DEFINE_FUNCTION art_quick_proxy_invoke_handler
     // Save callee and GPR args, mixed together to agree with core spills bitmap of ref. and args
diff --git a/runtime/base/logging.h b/runtime/base/logging.h
index bd5ae85..c4461fa 100644
--- a/runtime/base/logging.h
+++ b/runtime/base/logging.h
@@ -296,6 +296,7 @@
   bool startup;
   bool third_party_jni;  // Enabled with "-verbose:third-party-jni".
   bool threads;
+  bool signals;
 };
 
 extern LogVerbosity gLogVerbosity;
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index dbea0d8..e3c162b 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -3309,33 +3309,36 @@
   if (klass->IsInterface()) {
     return true;
   }
-  Thread* self = Thread::Current();
-  // begin with the methods local to the superclass
+  // Begin with the methods local to the superclass.
+  MethodHelper mh;
+  MethodHelper super_mh;
   if (klass->HasSuperClass() &&
       klass->GetClassLoader() != klass->GetSuperClass()->GetClassLoader()) {
-    SirtRef<mirror::Class> super(self, klass->GetSuperClass());
-    for (int i = super->GetVTable()->GetLength() - 1; i >= 0; --i) {
-      mirror::ArtMethod* method = klass->GetVTable()->Get(i);
-      if (method != super->GetVTable()->Get(i) &&
-          !IsSameMethodSignatureInDifferentClassContexts(self, method, super.get(), klass.get())) {
+    for (int i = klass->GetSuperClass()->GetVTable()->GetLength() - 1; i >= 0; --i) {
+      mh.ChangeMethod(klass->GetVTable()->GetWithoutChecks(i));
+      super_mh.ChangeMethod(klass->GetSuperClass()->GetVTable()->GetWithoutChecks(i));
+      bool is_override = mh.GetMethod() != super_mh.GetMethod();
+      if (is_override && !mh.HasSameSignatureWithDifferentClassLoaders(&super_mh)) {
         ThrowLinkageError(klass.get(), "Class %s method %s resolves differently in superclass %s",
-                          PrettyDescriptor(klass.get()).c_str(), PrettyMethod(method).c_str(),
-                          PrettyDescriptor(super.get()).c_str());
+                          PrettyDescriptor(klass.get()).c_str(),
+                          PrettyMethod(mh.GetMethod()).c_str(),
+                          PrettyDescriptor(klass->GetSuperClass()).c_str());
         return false;
       }
     }
   }
   for (int32_t i = 0; i < klass->GetIfTableCount(); ++i) {
-    SirtRef<mirror::Class> interface(self, klass->GetIfTable()->GetInterface(i));
-    if (klass->GetClassLoader() != interface->GetClassLoader()) {
-      for (size_t j = 0; j < interface->NumVirtualMethods(); ++j) {
-        mirror::ArtMethod* method = klass->GetIfTable()->GetMethodArray(i)->Get(j);
-        if (!IsSameMethodSignatureInDifferentClassContexts(self, method, interface.get(),
-                                                           method->GetDeclaringClass())) {
+    if (klass->GetClassLoader() != klass->GetIfTable()->GetInterface(i)->GetClassLoader()) {
+      uint32_t num_methods = klass->GetIfTable()->GetInterface(i)->NumVirtualMethods();
+      for (uint32_t j = 0; j < num_methods; ++j) {
+        mh.ChangeMethod(klass->GetIfTable()->GetMethodArray(i)->GetWithoutChecks(j));
+        super_mh.ChangeMethod(klass->GetIfTable()->GetInterface(i)->GetVirtualMethod(j));
+        bool is_override = mh.GetMethod() != super_mh.GetMethod();
+        if (is_override && !mh.HasSameSignatureWithDifferentClassLoaders(&super_mh)) {
           ThrowLinkageError(klass.get(), "Class %s method %s resolves differently in interface %s",
-                            PrettyDescriptor(method->GetDeclaringClass()).c_str(),
-                            PrettyMethod(method).c_str(),
-                            PrettyDescriptor(interface.get()).c_str());
+                            PrettyDescriptor(klass.get()).c_str(),
+                            PrettyMethod(mh.GetMethod()).c_str(),
+                            PrettyDescriptor(klass->GetIfTable()->GetInterface(i)).c_str());
           return false;
         }
       }
@@ -3344,60 +3347,6 @@
   return true;
 }
 
-// Returns true if classes referenced by the signature of the method are the
-// same classes in klass1 as they are in klass2.
-bool ClassLinker::IsSameMethodSignatureInDifferentClassContexts(Thread* self,
-                                                                mirror::ArtMethod* method,
-                                                                mirror::Class* klass1,
-                                                                mirror::Class* klass2) {
-  if (klass1 == klass2) {
-    return true;
-  }
-  CHECK(klass1 != nullptr);
-  CHECK(klass2 != nullptr);
-  SirtRef<mirror::ClassLoader> loader1(self, klass1->GetClassLoader());
-  SirtRef<mirror::ClassLoader> loader2(self, klass2->GetClassLoader());
-  const DexFile& dex_file = *method->GetDeclaringClass()->GetDexCache()->GetDexFile();
-  const DexFile::ProtoId& proto_id =
-      dex_file.GetMethodPrototype(dex_file.GetMethodId(method->GetDexMethodIndex()));
-  for (DexFileParameterIterator it(dex_file, proto_id); it.HasNext(); it.Next()) {
-    const char* descriptor = it.GetDescriptor();
-    if (descriptor == nullptr) {
-      break;
-    }
-    if (descriptor[0] == 'L' || descriptor[0] == '[') {
-      // Found a non-primitive type.
-      if (!IsSameDescriptorInDifferentClassContexts(self, descriptor, loader1, loader2)) {
-        return false;
-      }
-    }
-  }
-  // Check the return type
-  const char* descriptor = dex_file.GetReturnTypeDescriptor(proto_id);
-  if (descriptor[0] == 'L' || descriptor[0] == '[') {
-    if (!IsSameDescriptorInDifferentClassContexts(self, descriptor, loader1, loader2)) {
-      return false;
-    }
-  }
-  return true;
-}
-
-// Returns true if the descriptor resolves to the same class in the context of loader1 and loader2.
-bool ClassLinker::IsSameDescriptorInDifferentClassContexts(Thread* self, const char* descriptor,
-                                                           SirtRef<mirror::ClassLoader>& loader1,
-                                                           SirtRef<mirror::ClassLoader>& loader2) {
-  CHECK(descriptor != nullptr);
-  SirtRef<mirror::Class> found1(self, FindClass(self, descriptor, loader1));
-  if (found1.get() == nullptr) {
-    self->ClearException();
-  }
-  mirror::Class* found2 = FindClass(self, descriptor, loader2);
-  if (found2 == nullptr) {
-    self->ClearException();
-  }
-  return found1.get() == found2;
-}
-
 bool ClassLinker::EnsureInitialized(const SirtRef<mirror::Class>& c, bool can_init_fields,
                                     bool can_init_parents) {
   DCHECK(c.get() != NULL);
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index b8093bc..283faa2 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -35,6 +35,13 @@
 // Static fault manger object accessed by signal handler.
 FaultManager fault_manager;
 
+extern "C" {
+void art_sigsegv_fault() {
+  // Set a breakpoint here to be informed when a SIGSEGV is unhandled by ART.
+  LOG(ERROR)<< "Caught unknown SIGSEGV in ART fault handler";
+}
+}
+
 // Signal handler called on SIGSEGV.
 static void art_fault_handler(int sig, siginfo_t* info, void* context) {
   fault_manager.HandleFault(sig, info, context);
@@ -75,7 +82,10 @@
       return;
     }
   }
-  LOG(ERROR)<< "Caught unknown SIGSEGV in ART fault handler";
+
+  // Allow the user to catch this problem with a simple breakpoint in art_sigsegv_fault.
+  art_sigsegv_fault();
+
   oldaction_.sa_sigaction(sig, info, context);
 }
 
@@ -106,23 +116,23 @@
 bool FaultManager::IsInGeneratedCode(void* context, bool check_dex_pc) {
   // We can only be running Java code in the current thread if it
   // is in Runnable state.
-  LOG(DEBUG) << "Checking for generated code";
+  VLOG(signals) << "Checking for generated code";
   Thread* thread = Thread::Current();
   if (thread == nullptr) {
-    LOG(DEBUG) << "no current thread";
+    VLOG(signals) << "no current thread";
     return false;
   }
 
   ThreadState state = thread->GetState();
   if (state != kRunnable) {
-    LOG(DEBUG) << "not runnable";
+    VLOG(signals) << "not runnable";
     return false;
   }
 
   // Current thread is runnable.
   // Make sure it has the mutator lock.
   if (!Locks::mutator_lock_->IsSharedHeld(thread)) {
-    LOG(DEBUG) << "no lock";
+    VLOG(signals) << "no lock";
     return false;
   }
 
@@ -135,9 +145,9 @@
   GetMethodAndReturnPCAndSP(context, &method_obj, &return_pc, &sp);
 
   // If we don't have a potential method, we're outta here.
-  LOG(DEBUG) << "potential method: " << method_obj;
+  VLOG(signals) << "potential method: " << method_obj;
   if (method_obj == 0 || !IsAligned<kObjectAlignment>(method_obj)) {
-    LOG(DEBUG) << "no method";
+    VLOG(signals) << "no method";
     return false;
   }
 
@@ -147,36 +157,36 @@
   // TODO: Method might be not a heap address, and GetClass could fault.
   mirror::Class* cls = method_obj->GetClass<kVerifyNone>();
   if (cls == nullptr) {
-    LOG(DEBUG) << "not a class";
+    VLOG(signals) << "not a class";
     return false;
   }
   if (!IsAligned<kObjectAlignment>(cls)) {
-    LOG(DEBUG) << "not aligned";
+    VLOG(signals) << "not aligned";
     return false;
   }
 
 
   if (!VerifyClassClass(cls)) {
-    LOG(DEBUG) << "not a class class";
+    VLOG(signals) << "not a class class";
     return false;
   }
 
   // Now make sure the class is a mirror::ArtMethod.
   if (!cls->IsArtMethodClass()) {
-    LOG(DEBUG) << "not a method";
+    VLOG(signals) << "not a method";
     return false;
   }
 
   // We can be certain that this is a method now.  Check if we have a GC map
   // at the return PC address.
   if (true || kIsDebugBuild) {
-    LOG(DEBUG) << "looking for dex pc for return pc " << std::hex << return_pc;
+    VLOG(signals) << "looking for dex pc for return pc " << std::hex << return_pc;
     const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(method_obj);
     uint32_t sought_offset = return_pc - reinterpret_cast<uintptr_t>(code);
-    LOG(DEBUG) << "pc offset: " << std::hex << sought_offset;
+    VLOG(signals) << "pc offset: " << std::hex << sought_offset;
   }
   uint32_t dexpc = method_obj->ToDexPc(return_pc, false);
-  LOG(DEBUG) << "dexpc: " << dexpc;
+  VLOG(signals) << "dexpc: " << dexpc;
   return !check_dex_pc || dexpc != DexFile::kDexNoIndex;
 }
 
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 4484494..5d38b02 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -367,7 +367,7 @@
     dlmalloc_space_ = space::DlMallocSpace::CreateFromMemMap(
         mem_map, "main dlmalloc space", kDefaultStartingSize, initial_size, growth_limit, capacity,
         can_move_objects);
-    main_space_ = rosalloc_space_;
+    main_space_ = dlmalloc_space_;
     CHECK(main_space_ != nullptr) << "Failed to create dlmalloc space";
   }
   main_space_->SetFootprintLimit(main_space_->Capacity());
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 7232e54..a87f95c 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -28,7 +28,17 @@
                                 Object* receiver, uint32_t* args, JValue* result)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   std::string name(PrettyMethod(method));
-  if (name == "java.lang.ClassLoader dalvik.system.VMStack.getCallingClassLoader()") {
+  if (name == "java.lang.Object dalvik.system.VMRuntime.newUnpaddedArray(java.lang.Class, int)") {
+    int32_t length = args[1];
+    DCHECK_GE(length, 0);
+    mirror::Class* element_class = reinterpret_cast<Object*>(args[0])->AsClass();
+    Runtime* runtime = Runtime::Current();
+    mirror::Class* array_class = runtime->GetClassLinker()->FindArrayClass(self, element_class);
+    DCHECK(array_class != nullptr);
+    gc::AllocatorType allocator = runtime->GetHeap()->GetCurrentAllocator();
+    result->SetL(mirror::Array::Alloc<true>(self, array_class, length,
+                                            array_class->GetComponentSize(), allocator, true));
+  } else if (name == "java.lang.ClassLoader dalvik.system.VMStack.getCallingClassLoader()") {
     result->SetL(NULL);
   } else if (name == "java.lang.Class dalvik.system.VMStack.getStackClass2()") {
     NthCallerVisitor visitor(self, 3);
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 74b7c42..e425e91 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -519,7 +519,7 @@
       // Don't allow finalizable objects to be allocated during a transaction since these can't be
       // finalized without a started runtime.
       if (transaction_active && obj->GetClass()->IsFinalizable()) {
-        AbortTransaction(self, "Allocating finalizable object in transcation: %s",
+        AbortTransaction(self, "Allocating finalizable object in transaction: %s",
                          PrettyTypeOf(obj).c_str());
         HANDLE_PENDING_EXCEPTION();
       }
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 0da1445..9c13973 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -432,7 +432,7 @@
           // Don't allow finalizable objects to be allocated during a transaction since these can't
           // be finalized without a started runtime.
           if (transaction_active && obj->GetClass()->IsFinalizable()) {
-            AbortTransaction(self, "Allocating finalizable object in transcation: %s",
+            AbortTransaction(self, "Allocating finalizable object in transaction: %s",
                              PrettyTypeOf(obj).c_str());
             HANDLE_PENDING_EXCEPTION();
             break;
diff --git a/runtime/object_utils.h b/runtime/object_utils.h
index 072f074..504537a 100644
--- a/runtime/object_utils.h
+++ b/runtime/object_utils.h
@@ -520,8 +520,7 @@
     return GetParamPrimitiveType(param) == Primitive::kPrimNot;
   }
 
-  bool HasSameNameAndSignature(MethodHelper* other)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  bool HasSameNameAndSignature(MethodHelper* other) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     const DexFile& dex_file = GetDexFile();
     const DexFile::MethodId& mid = dex_file.GetMethodId(method_->GetDexMethodIndex());
     if (GetDexCache() == other->GetDexCache()) {
@@ -539,6 +538,33 @@
     return dex_file.GetMethodSignature(mid) == other_dex_file.GetMethodSignature(other_mid);
   }
 
+  bool HasSameSignatureWithDifferentClassLoaders(MethodHelper* other)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (UNLIKELY(GetReturnType() != other->GetReturnType())) {
+      return false;
+    }
+    const DexFile::TypeList* types = GetParameterTypeList();
+    const DexFile::TypeList* other_types = other->GetParameterTypeList();
+    if (types == nullptr) {
+      return (other_types == nullptr) || (other_types->Size() == 0);
+    } else if (UNLIKELY(other_types == nullptr)) {
+      return types->Size() == 0;
+    }
+    uint32_t num_types = types->Size();
+    if (UNLIKELY(num_types != other_types->Size())) {
+      return false;
+    }
+    for (uint32_t i = 0; i < num_types; ++i) {
+      mirror::Class* param_type = GetClassFromTypeIdx(types->GetTypeItem(i).type_idx_);
+      mirror::Class* other_param_type =
+          other->GetClassFromTypeIdx(other_types->GetTypeItem(i).type_idx_);
+      if (UNLIKELY(param_type != other_param_type)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
   const DexFile::CodeItem* GetCodeItem()
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetDexFile().GetCodeItem(method_->GetCodeItemOffset());
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 9cf8785..1562527 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -232,6 +232,7 @@
 //  gLogVerbosity.startup = true;  // TODO: don't check this in!
 //  gLogVerbosity.third_party_jni = true;  // TODO: don't check this in!
 //  gLogVerbosity.threads = true;  // TODO: don't check this in!
+//  gLogVerbosity.signals = true;  // TODO: don't check this in!
 
   method_trace_ = false;
   method_trace_file_ = "/data/method-trace-file.bin";
@@ -253,7 +254,7 @@
 #ifdef HAVE_ANDROID_OS
   {
     char buf[PROP_VALUE_MAX];
-    property_get("dalvik.vm.implicit_checks", buf, "none");
+    property_get("dalvik.vm.implicit_checks", buf, "null,stack");
     std::string checks(buf);
     std::vector<std::string> checkvec;
     Split(checks, ',', checkvec);
@@ -464,6 +465,8 @@
           gLogVerbosity.third_party_jni = true;
         } else if (verbose_options[i] == "threads") {
           gLogVerbosity.threads = true;
+        } else if (verbose_options[i] == "signals") {
+           gLogVerbosity.signals = true;
         } else {
           Usage("Unknown -verbose option %s\n", verbose_options[i].c_str());
           return false;
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 027feee..cbd51d4 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -535,9 +535,20 @@
     GetInstrumentation()->ForceInterpretOnly();
   }
 
-  if (options->explicit_checks_ != (ParsedOptions::kExplicitSuspendCheck |
+  bool implicit_checks_supported = false;
+  switch (kRuntimeISA) {
+  case kArm:
+  case kThumb2:
+    implicit_checks_supported = true;
+    break;
+  default:
+    break;
+  }
+
+  if (implicit_checks_supported &&
+    (options->explicit_checks_ != (ParsedOptions::kExplicitSuspendCheck |
         ParsedOptions::kExplicitNullCheck |
-        ParsedOptions::kExplicitStackOverflowCheck) || kEnableJavaStackTraceHandler) {
+        ParsedOptions::kExplicitStackOverflowCheck) || kEnableJavaStackTraceHandler)) {
     fault_manager.Init();
 
     // These need to be in a specific order.  The null point check handler must be
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 9c709ae..5e64e59 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -32,14 +32,6 @@
 
 namespace art {
 
-// Define a piece of memory, the address of which can be used as a marker
-// for the gap in the stack added during stack overflow handling.
-static uint32_t stack_overflow_object;
-
-// The stack overflow gap marker is simply a valid unique address.
-void* stack_overflow_gap_marker = &stack_overflow_object;
-
-
 mirror::Object* ShadowFrame::GetThisObject() const {
   mirror::ArtMethod* m = GetMethod();
   if (m->IsStatic()) {
@@ -305,56 +297,23 @@
   bool exit_stubs_installed = Runtime::Current()->GetInstrumentation()->AreExitStubsInstalled();
   uint32_t instrumentation_stack_depth = 0;
 
-  bool kDebugStackWalk = false;
-  bool kDebugStackWalkVeryVerbose = false;            // The name says it all.
-
-  if (kDebugStackWalk) {
-    LOG(INFO) << "walking stack";
-  }
   for (const ManagedStack* current_fragment = thread_->GetManagedStack(); current_fragment != NULL;
        current_fragment = current_fragment->GetLink()) {
     cur_shadow_frame_ = current_fragment->GetTopShadowFrame();
     cur_quick_frame_ = current_fragment->GetTopQuickFrame();
     cur_quick_frame_pc_ = current_fragment->GetTopQuickFramePc();
-    if (kDebugStackWalkVeryVerbose) {
-      LOG(INFO) << "cur_quick_frame: " << cur_quick_frame_;
-      LOG(INFO) << "cur_quick_frame_pc: " << std::hex << cur_quick_frame_pc_;
-    }
 
     if (cur_quick_frame_ != NULL) {  // Handle quick stack frames.
       // Can't be both a shadow and a quick fragment.
       DCHECK(current_fragment->GetTopShadowFrame() == NULL);
       mirror::ArtMethod* method = *cur_quick_frame_;
       while (method != NULL) {
-        // Check for a stack overflow gap marker.
-        if (method == reinterpret_cast<mirror::ArtMethod*>(stack_overflow_gap_marker)) {
-          // Marker for a stack overflow.  This is followed by the offset from the
-          // current SP to the next frame.  There is a gap in the stack here.  Jump
-          // the gap silently.
-          // Caveat coder: the layout of the overflow marker depends on the architecture.
-          //   The first element is address sized (8 bytes on a 64 bit machine).  The second
-          //   element is 32 bits.  So be careful with those address calculations.
-
-          // Get the address of the offset, just beyond the marker pointer.
-          byte* gapsizeaddr = reinterpret_cast<byte*>(cur_quick_frame_) + sizeof(uintptr_t);
-          uint32_t gap = *reinterpret_cast<uint32_t*>(gapsizeaddr);
-          CHECK_GT(gap, Thread::kStackOverflowProtectedSize);
-          mirror::ArtMethod** next_frame = reinterpret_cast<mirror::ArtMethod**>(
-            reinterpret_cast<byte*>(gapsizeaddr) + gap);
-          if (kDebugStackWalk) {
-            LOG(INFO) << "stack overflow marker hit, gap: " << gap << ", next_frame: " <<
-                next_frame;
-          }
-          cur_quick_frame_ = next_frame;
-          method = *next_frame;
-          CHECK(method != nullptr);
-        } else {
-          SanityCheckFrame();
-          bool should_continue = VisitFrame();
-          if (UNLIKELY(!should_continue)) {
-            return;
-          }
+        SanityCheckFrame();
+        bool should_continue = VisitFrame();
+        if (UNLIKELY(!should_continue)) {
+          return;
         }
+
         if (context_ != NULL) {
           context_->FillCalleeSaves(*this);
         }
@@ -363,9 +322,6 @@
         size_t return_pc_offset = method->GetReturnPcOffsetInBytes();
         byte* return_pc_addr = reinterpret_cast<byte*>(cur_quick_frame_) + return_pc_offset;
         uintptr_t return_pc = *reinterpret_cast<uintptr_t*>(return_pc_addr);
-        if (kDebugStackWalkVeryVerbose) {
-          LOG(INFO) << "frame size: " << frame_size << ", return_pc: " << std::hex << return_pc;
-        }
         if (UNLIKELY(exit_stubs_installed)) {
           // While profiling, the return pc is restored from the side stack, except when walking
           // the stack for an exception where the side stack will be unwound in VisitFrame.
@@ -398,10 +354,6 @@
         cur_quick_frame_ = reinterpret_cast<mirror::ArtMethod**>(next_frame);
         cur_depth_++;
         method = *cur_quick_frame_;
-        if (kDebugStackWalkVeryVerbose) {
-          LOG(INFO) << "new cur_quick_frame_: " << cur_quick_frame_;
-          LOG(INFO) << "new cur_quick_frame_pc_: " << std::hex << cur_quick_frame_pc_;
-        }
       }
     } else if (cur_shadow_frame_ != NULL) {
       do {
diff --git a/runtime/stack.h b/runtime/stack.h
index 73a823a..88ef78f 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -102,14 +102,6 @@
   kVRegNonSpecialTempBaseReg = -3,
 };
 
-// Special object used to mark the gap in the stack placed when a stack
-// overflow fault occurs during implicit stack checking.  This is not
-// a real object - it is used simply as a valid address to which a
-// mirror::ArtMethod* can be compared during a stack walk.  It is inserted
-// into the stack during the stack overflow signal handling to mark the gap
-// in which the memory is protected against read and write.
-extern void* stack_overflow_gap_marker;
-
 // A reference from the shadow stack to a MirrorType object within the Java heap.
 template<class MirrorType>
 class MANAGED StackReference : public mirror::ObjectReference<false, MirrorType> {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 23a6779..3a62cd5 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -243,10 +243,16 @@
   pregion -= kStackOverflowProtectedSize;
 
   // Touch the pages in the region to map them in.  Otherwise mprotect fails.  Only
-  // need to do this on the main stack.
+  // need to do this on the main stack.  We only need to touch one byte per page.
   if (is_main_stack) {
-    memset(pregion, 0x55, kStackOverflowProtectedSize);
+    byte* start = pregion;
+    byte* end = pregion + kStackOverflowProtectedSize;
+    while (start < end) {
+      *start = static_cast<byte>(0);
+      start += kPageSize;
+    }
   }
+
   VLOG(threads) << "installing stack protected region at " << std::hex <<
       static_cast<void*>(pregion) << " to " <<
       static_cast<void*>(pregion + kStackOverflowProtectedSize - 1);
@@ -255,6 +261,11 @@
     LOG(FATAL) << "Unable to create protected region in stack for implicit overflow check. Reason:"
         << strerror(errno);
   }
+
+  // Tell the kernel that we won't be needing these pages any more.
+  if (is_main_stack) {
+    madvise(pregion, kStackOverflowProtectedSize, MADV_DONTNEED);
+  }
 }
 
 void Thread::CreateNativeThread(JNIEnv* env, jobject java_peer, size_t stack_size, bool is_daemon) {
diff --git a/runtime/utils.cc b/runtime/utils.cc
index ee2cca4..c332bdf 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -1169,10 +1169,12 @@
 
 std::string GetDalvikCacheOrDie(const char* subdir, const bool create_if_absent) {
   CHECK(subdir != nullptr);
-  const std::string dalvik_cache_root(StringPrintf("%s/dalvik-cache/", GetAndroidData()));
+  const char* android_data = GetAndroidData();
+  const std::string dalvik_cache_root(StringPrintf("%s/dalvik-cache/", android_data));
   const std::string dalvik_cache = dalvik_cache_root + subdir;
   if (create_if_absent && !OS::DirectoryExists(dalvik_cache.c_str())) {
-    if (StartsWith(dalvik_cache_root, "/tmp/")) {
+    // Don't create the system's /data/dalvik-cache/... because it needs special permissions.
+    if (strcmp(android_data, "/data") != 0) {
       int result = mkdir(dalvik_cache_root.c_str(), 0700);
       if (result != 0 && errno != EEXIST) {
         PLOG(FATAL) << "Failed to create dalvik-cache directory " << dalvik_cache_root;
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 03ceed3..bf1de86 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -3126,9 +3126,10 @@
     return nullptr;
   }
   mirror::ObjectArray<mirror::ArtMethod>* vtable = actual_arg_type.GetClass()->GetVTable();
-  CHECK(vtable != nullptr);
+  CHECK(vtable != nullptr) << PrettyDescriptor(actual_arg_type.GetClass());
   uint16_t vtable_index = is_range ? inst->VRegB_3rc() : inst->VRegB_35c();
-  CHECK_LT(static_cast<int32_t>(vtable_index), vtable->GetLength());
+  CHECK_LT(static_cast<int32_t>(vtable_index), vtable->GetLength())
+      << PrettyDescriptor(actual_arg_type.GetClass());
   mirror::ArtMethod* res_method = vtable->Get(vtable_index);
   CHECK(!Thread::Current()->IsExceptionPending());
   return res_method;
diff --git a/test/etc/push-and-run-test-jar b/test/etc/push-and-run-test-jar
index e0d2f1d..6cf7998 100755
--- a/test/etc/push-and-run-test-jar
+++ b/test/etc/push-and-run-test-jar
@@ -150,7 +150,7 @@
 
 JNI_OPTS="-Xjnigreflimit:512 -Xcheck:jni"
 
-cmdline="cd $DEX_LOCATION && mkdir -p dalvik-cache/{arm,arm64,mips,x86,x86_64} && export ANDROID_DATA=$DEX_LOCATION && export DEX_LOCATION=$DEX_LOCATION && \
+cmdline="cd $DEX_LOCATION && export ANDROID_DATA=$DEX_LOCATION && export DEX_LOCATION=$DEX_LOCATION && \
     $INVOKE_WITH $gdb /system/bin/dalvikvm$TARGET_SUFFIX $FLAGS $gdbargs -XXlib:$LIB $ZYGOTE $JNI_OPTS $INT_OPTS $DEBUGGER_OPTS $BOOT_OPT -cp $DEX_LOCATION/$TEST_NAME.jar Main"
 if [ "$DEV_MODE" = "y" ]; then
   echo $cmdline "$@"