Merge "Fix spammy "Disabling background compaction for non zygote" message."
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 429c523..d4e2cbb 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -78,6 +78,7 @@
 	compiler/oat_test.cc \
 	compiler/optimizing/codegen_test.cc \
 	compiler/optimizing/dominator_test.cc \
+	compiler/optimizing/liveness_test.cc \
 	compiler/optimizing/pretty_printer_test.cc \
 	compiler/optimizing/ssa_test.cc \
 	compiler/output_stream_test.cc \
diff --git a/compiler/Android.mk b/compiler/Android.mk
index e3201e7..1b70d59 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -27,6 +27,12 @@
 	dex/quick/arm/int_arm.cc \
 	dex/quick/arm/target_arm.cc \
 	dex/quick/arm/utility_arm.cc \
+	dex/quick/arm64/assemble_arm64.cc \
+	dex/quick/arm64/call_arm64.cc \
+	dex/quick/arm64/fp_arm64.cc \
+	dex/quick/arm64/int_arm64.cc \
+	dex/quick/arm64/target_arm64.cc \
+	dex/quick/arm64/utility_arm64.cc \
 	dex/quick/codegen_util.cc \
 	dex/quick/dex_file_method_inliner.cc \
 	dex/quick/dex_file_to_method_inliner_map.cc \
@@ -79,6 +85,7 @@
 	optimizing/nodes.cc \
 	optimizing/optimizing_compiler.cc \
 	optimizing/ssa_builder.cc \
+	optimizing/ssa_liveness_analysis.cc \
 	trampolines/trampoline_compiler.cc \
 	utils/arena_allocator.cc \
 	utils/arena_bit_vector.cc \
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index 59ed827..7441dac 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -138,7 +138,7 @@
   oatdata_offsets_to_compiled_code_offset_.push_back(offset);
 }
 
-CompiledMethod::CompiledMethod(CompilerDriver& driver,
+CompiledMethod::CompiledMethod(CompilerDriver* driver,
                                InstructionSet instruction_set,
                                const std::vector<uint8_t>& quick_code,
                                const size_t frame_size_in_bytes,
@@ -148,48 +148,48 @@
                                const std::vector<uint8_t>& vmap_table,
                                const std::vector<uint8_t>& native_gc_map,
                                const std::vector<uint8_t>* cfi_info)
-    : CompiledCode(&driver, instruction_set, quick_code), frame_size_in_bytes_(frame_size_in_bytes),
+    : CompiledCode(driver, instruction_set, quick_code), frame_size_in_bytes_(frame_size_in_bytes),
       core_spill_mask_(core_spill_mask), fp_spill_mask_(fp_spill_mask),
-  mapping_table_(driver.DeduplicateMappingTable(mapping_table)),
-  vmap_table_(driver.DeduplicateVMapTable(vmap_table)),
-  gc_map_(driver.DeduplicateGCMap(native_gc_map)),
-  cfi_info_(driver.DeduplicateCFIInfo(cfi_info)) {
+  mapping_table_(driver->DeduplicateMappingTable(mapping_table)),
+  vmap_table_(driver->DeduplicateVMapTable(vmap_table)),
+  gc_map_(driver->DeduplicateGCMap(native_gc_map)),
+  cfi_info_(driver->DeduplicateCFIInfo(cfi_info)) {
 }
 
-CompiledMethod::CompiledMethod(CompilerDriver& driver,
+CompiledMethod::CompiledMethod(CompilerDriver* driver,
                                InstructionSet instruction_set,
                                const std::vector<uint8_t>& code,
                                const size_t frame_size_in_bytes,
                                const uint32_t core_spill_mask,
                                const uint32_t fp_spill_mask)
-    : CompiledCode(&driver, instruction_set, code),
+    : CompiledCode(driver, instruction_set, code),
       frame_size_in_bytes_(frame_size_in_bytes),
       core_spill_mask_(core_spill_mask), fp_spill_mask_(fp_spill_mask),
-      mapping_table_(driver.DeduplicateMappingTable(std::vector<uint8_t>())),
-      vmap_table_(driver.DeduplicateVMapTable(std::vector<uint8_t>())),
-      gc_map_(driver.DeduplicateGCMap(std::vector<uint8_t>())),
+      mapping_table_(driver->DeduplicateMappingTable(std::vector<uint8_t>())),
+      vmap_table_(driver->DeduplicateVMapTable(std::vector<uint8_t>())),
+      gc_map_(driver->DeduplicateGCMap(std::vector<uint8_t>())),
       cfi_info_(nullptr) {
 }
 
 // Constructs a CompiledMethod for the Portable compiler.
-CompiledMethod::CompiledMethod(CompilerDriver& driver, InstructionSet instruction_set,
+CompiledMethod::CompiledMethod(CompilerDriver* driver, InstructionSet instruction_set,
                                const std::string& code, const std::vector<uint8_t>& gc_map,
                                const std::string& symbol)
-    : CompiledCode(&driver, instruction_set, code, symbol),
+    : CompiledCode(driver, instruction_set, code, symbol),
       frame_size_in_bytes_(kStackAlignment), core_spill_mask_(0),
-      fp_spill_mask_(0), gc_map_(driver.DeduplicateGCMap(gc_map)) {
-  mapping_table_ = driver.DeduplicateMappingTable(std::vector<uint8_t>());
-  vmap_table_ = driver.DeduplicateVMapTable(std::vector<uint8_t>());
+      fp_spill_mask_(0), gc_map_(driver->DeduplicateGCMap(gc_map)) {
+  mapping_table_ = driver->DeduplicateMappingTable(std::vector<uint8_t>());
+  vmap_table_ = driver->DeduplicateVMapTable(std::vector<uint8_t>());
 }
 
-CompiledMethod::CompiledMethod(CompilerDriver& driver, InstructionSet instruction_set,
+CompiledMethod::CompiledMethod(CompilerDriver* driver, InstructionSet instruction_set,
                                const std::string& code, const std::string& symbol)
-    : CompiledCode(&driver, instruction_set, code, symbol),
+    : CompiledCode(driver, instruction_set, code, symbol),
       frame_size_in_bytes_(kStackAlignment), core_spill_mask_(0),
       fp_spill_mask_(0) {
-  mapping_table_ = driver.DeduplicateMappingTable(std::vector<uint8_t>());
-  vmap_table_ = driver.DeduplicateVMapTable(std::vector<uint8_t>());
-  gc_map_ = driver.DeduplicateGCMap(std::vector<uint8_t>());
+  mapping_table_ = driver->DeduplicateMappingTable(std::vector<uint8_t>());
+  vmap_table_ = driver->DeduplicateVMapTable(std::vector<uint8_t>());
+  gc_map_ = driver->DeduplicateGCMap(std::vector<uint8_t>());
 }
 
 }  // namespace art
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index 90ae6ee..844b53c 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -102,7 +102,7 @@
 class CompiledMethod : public CompiledCode {
  public:
   // Constructs a CompiledMethod for the non-LLVM compilers.
-  CompiledMethod(CompilerDriver& driver,
+  CompiledMethod(CompilerDriver* driver,
                  InstructionSet instruction_set,
                  const std::vector<uint8_t>& quick_code,
                  const size_t frame_size_in_bytes,
@@ -114,7 +114,7 @@
                  const std::vector<uint8_t>* cfi_info);
 
   // Constructs a CompiledMethod for the QuickJniCompiler.
-  CompiledMethod(CompilerDriver& driver,
+  CompiledMethod(CompilerDriver* driver,
                  InstructionSet instruction_set,
                  const std::vector<uint8_t>& quick_code,
                  const size_t frame_size_in_bytes,
@@ -122,11 +122,11 @@
                  const uint32_t fp_spill_mask);
 
   // Constructs a CompiledMethod for the Portable compiler.
-  CompiledMethod(CompilerDriver& driver, InstructionSet instruction_set, const std::string& code,
+  CompiledMethod(CompilerDriver* driver, InstructionSet instruction_set, const std::string& code,
                  const std::vector<uint8_t>& gc_map, const std::string& symbol);
 
   // Constructs a CompiledMethod for the Portable JniCompiler.
-  CompiledMethod(CompilerDriver& driver, InstructionSet instruction_set, const std::string& code,
+  CompiledMethod(CompilerDriver* driver, InstructionSet instruction_set, const std::string& code,
                  const std::string& symbol);
 
   ~CompiledMethod() {}
diff --git a/compiler/compiler.cc b/compiler/compiler.cc
index c88c38e..a832c31 100644
--- a/compiler/compiler.cc
+++ b/compiler/compiler.cc
@@ -27,8 +27,7 @@
 namespace art {
 
 #ifdef ART_SEA_IR_MODE
-extern "C" art::CompiledMethod* SeaIrCompileMethod(art::CompilerDriver& driver,
-                                                   const art::DexFile::CodeItem* code_item,
+extern "C" art::CompiledMethod* SeaIrCompileMethod(const art::DexFile::CodeItem* code_item,
                                                    uint32_t access_flags,
                                                    art::InvokeType invoke_type,
                                                    uint16_t class_def_idx,
@@ -38,8 +37,7 @@
 #endif
 
 
-CompiledMethod* Compiler::TryCompileWithSeaIR(art::CompilerDriver& driver,
-                                              const art::DexFile::CodeItem* code_item,
+CompiledMethod* Compiler::TryCompileWithSeaIR(const art::DexFile::CodeItem* code_item,
                                               uint32_t access_flags,
                                               art::InvokeType invoke_type,
                                               uint16_t class_def_idx,
@@ -47,13 +45,10 @@
                                               jobject class_loader,
                                               const art::DexFile& dex_file) {
 #ifdef ART_SEA_IR_MODE
-    bool use_sea = Runtime::Current()->IsSeaIRMode();
-    use_sea = use_sea &&
-        (std::string::npos != PrettyMethod(method_idx, dex_file).find("fibonacci"));
+    bool use_sea = (std::string::npos != PrettyMethod(method_idx, dex_file).find("fibonacci"));
     if (use_sea) {
       LOG(INFO) << "Using SEA IR to compile..." << std::endl;
-      return SeaIrCompileMethod(compiler,
-                                code_item,
+      return SeaIrCompileMethod(code_item,
                                 access_flags,
                                 invoke_type,
                                 class_def_idx,
@@ -68,11 +63,11 @@
 
 #ifdef ART_USE_PORTABLE_COMPILER
 
-extern "C" void ArtInitCompilerContext(art::CompilerDriver& driver);
+extern "C" void ArtInitCompilerContext(art::CompilerDriver* driver);
 
-extern "C" void ArtUnInitCompilerContext(art::CompilerDriver& driver);
+extern "C" void ArtUnInitCompilerContext(art::CompilerDriver* driver);
 
-extern "C" art::CompiledMethod* ArtCompileMethod(art::CompilerDriver& driver,
+extern "C" art::CompiledMethod* ArtCompileMethod(art::CompilerDriver* driver,
                                                  const art::DexFile::CodeItem* code_item,
                                                  uint32_t access_flags,
                                                  art::InvokeType invoke_type,
@@ -81,45 +76,45 @@
                                                  jobject class_loader,
                                                  const art::DexFile& dex_file);
 
-extern "C" art::CompiledMethod* ArtLLVMJniCompileMethod(art::CompilerDriver& driver,
+extern "C" art::CompiledMethod* ArtLLVMJniCompileMethod(art::CompilerDriver* driver,
                                                         uint32_t access_flags, uint32_t method_idx,
                                                         const art::DexFile& dex_file);
 
-extern "C" void compilerLLVMSetBitcodeFileName(art::CompilerDriver& driver,
+extern "C" void compilerLLVMSetBitcodeFileName(art::CompilerDriver* driver,
                                                std::string const& filename);
 
 
-class LLVMCompiler : public Compiler {
+class LLVMCompiler FINAL : public Compiler {
  public:
-  LLVMCompiler() : Compiler(1000) {}
+  explicit LLVMCompiler(CompilerDriver* driver) : Compiler(driver, 1000) {}
 
-  void Init(CompilerDriver& driver) const {
-    ArtInitCompilerContext(driver);
+  void Init() const OVERRIDE {
+    ArtInitCompilerContext(GetCompilerDriver());
   }
 
-  void UnInit(CompilerDriver& driver) const {
-    ArtUnInitCompilerContext(driver);
+  void UnInit() const OVERRIDE {
+    ArtUnInitCompilerContext(GetCompilerDriver());
   }
 
-  CompiledMethod* Compile(CompilerDriver& driver,
-                          const DexFile::CodeItem* code_item,
+  CompiledMethod* Compile(const DexFile::CodeItem* code_item,
                           uint32_t access_flags,
                           InvokeType invoke_type,
                           uint16_t class_def_idx,
                           uint32_t method_idx,
                           jobject class_loader,
-                          const DexFile& dex_file) const {
-    CompiledMethod* method = TryCompileWithSeaIR(driver,
-                                                 code_item,
+                          const DexFile& dex_file) const OVERRIDE {
+    CompiledMethod* method = TryCompileWithSeaIR(code_item,
                                                  access_flags,
                                                  invoke_type,
                                                  class_def_idx,
                                                  method_idx,
                                                  class_loader,
                                                  dex_file);
-    if (method != nullptr) return method;
+    if (method != nullptr) {
+      return method;
+    }
 
-    return ArtCompileMethod(compiler,
+    return ArtCompileMethod(GetCompilerDriver(),
                             code_item,
                             access_flags,
                             invoke_type,
@@ -129,11 +124,10 @@
                             dex_file);
   }
 
-  CompiledMethod* JniCompile(CompilerDriver& driver,
-                             uint32_t access_flags,
+  CompiledMethod* JniCompile(uint32_t access_flags,
                              uint32_t method_idx,
-                             const DexFile& dex_file) const {
-    return ArtLLVMJniCompileMethod(driver, access_flags, method_idx, dex_file);
+                             const DexFile& dex_file) const OVERRIDE {
+    return ArtLLVMJniCompileMethod(GetCompilerDriver(), access_flags, method_idx, dex_file);
   }
 
   uintptr_t GetEntryPointOf(mirror::ArtMethod* method) const {
@@ -182,17 +176,17 @@
 };
 #endif
 
-Compiler* Compiler::Create(Compiler::Kind kind) {
+Compiler* Compiler::Create(CompilerDriver* driver, Compiler::Kind kind) {
   switch (kind) {
     case kQuick:
-      return new QuickCompiler();
+      return new QuickCompiler(driver);
       break;
     case kOptimizing:
-      return new OptimizingCompiler();
+      return new OptimizingCompiler(driver);
       break;
     case kPortable:
 #ifdef ART_USE_PORTABLE_COMPILER
-      return new LLVMCompiler();
+      return new LLVMCompiler(driver);
 #else
       LOG(FATAL) << "Portable compiler not compiled";
 #endif
diff --git a/compiler/compiler.h b/compiler/compiler.h
index 2357297..4caebf3 100644
--- a/compiler/compiler.h
+++ b/compiler/compiler.h
@@ -41,18 +41,13 @@
     kPortable
   };
 
-  explicit Compiler(uint64_t warning)
-      : maximum_compilation_time_before_warning_(warning) {
-  }
+  static Compiler* Create(CompilerDriver* driver, Kind kind);
 
-  static Compiler* Create(Kind kind);
+  virtual void Init() const = 0;
 
-  virtual void Init(CompilerDriver& driver) const = 0;
+  virtual void UnInit() const = 0;
 
-  virtual void UnInit(CompilerDriver& driver) const = 0;
-
-  virtual CompiledMethod* Compile(CompilerDriver& driver,
-                                  const DexFile::CodeItem* code_item,
+  virtual CompiledMethod* Compile(const DexFile::CodeItem* code_item,
                                   uint32_t access_flags,
                                   InvokeType invoke_type,
                                   uint16_t class_def_idx,
@@ -60,8 +55,7 @@
                                   jobject class_loader,
                                   const DexFile& dex_file) const = 0;
 
-  static CompiledMethod* TryCompileWithSeaIR(art::CompilerDriver& driver,
-                                             const art::DexFile::CodeItem* code_item,
+  static CompiledMethod* TryCompileWithSeaIR(const art::DexFile::CodeItem* code_item,
                                              uint32_t access_flags,
                                              art::InvokeType invoke_type,
                                              uint16_t class_def_idx,
@@ -69,8 +63,7 @@
                                              jobject class_loader,
                                              const art::DexFile& dex_file);
 
-  virtual CompiledMethod* JniCompile(CompilerDriver& driver,
-                                     uint32_t access_flags,
+  virtual CompiledMethod* JniCompile(uint32_t access_flags,
                                      uint32_t method_idx,
                                      const DexFile& dex_file) const = 0;
 
@@ -81,11 +74,10 @@
                         OatWriter* oat_writer,
                         const std::vector<const art::DexFile*>& dex_files,
                         const std::string& android_root,
-                        bool is_host, const CompilerDriver& driver) const
+                        bool is_host) const
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
 
-  virtual Backend* GetCodeGenerator(CompilationUnit* cu,
-                                    void* compilation_unit) const = 0;
+  virtual Backend* GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) const = 0;
 
   uint64_t GetMaximumCompilationTimeBeforeWarning() const {
     return maximum_compilation_time_before_warning_;
@@ -117,7 +109,17 @@
     return nullptr;
   }
 
+ protected:
+  explicit Compiler(CompilerDriver* driver, uint64_t warning) :
+      driver_(driver), maximum_compilation_time_before_warning_(warning) {
+  }
+
+  CompilerDriver* GetCompilerDriver() const {
+    return driver_;
+  }
+
  private:
+  CompilerDriver* const driver_;
   const uint64_t maximum_compilation_time_before_warning_;
 
   DISALLOW_COPY_AND_ASSIGN(Compiler);
diff --git a/compiler/compilers.cc b/compiler/compilers.cc
index 1237e70..79a85db 100644
--- a/compiler/compilers.cc
+++ b/compiler/compilers.cc
@@ -22,9 +22,9 @@
 
 namespace art {
 
-extern "C" void ArtInitQuickCompilerContext(art::CompilerDriver& driver);
-extern "C" void ArtUnInitQuickCompilerContext(art::CompilerDriver& driver);
-extern "C" art::CompiledMethod* ArtQuickCompileMethod(art::CompilerDriver& driver,
+extern "C" void ArtInitQuickCompilerContext(art::CompilerDriver* driver);
+extern "C" void ArtUnInitQuickCompilerContext(art::CompilerDriver* driver);
+extern "C" art::CompiledMethod* ArtQuickCompileMethod(art::CompilerDriver* driver,
                                                       const art::DexFile::CodeItem* code_item,
                                                       uint32_t access_flags,
                                                       art::InvokeType invoke_type,
@@ -33,40 +33,40 @@
                                                       jobject class_loader,
                                                       const art::DexFile& dex_file);
 
-extern "C" art::CompiledMethod* ArtQuickJniCompileMethod(art::CompilerDriver& driver,
+extern "C" art::CompiledMethod* ArtQuickJniCompileMethod(art::CompilerDriver* driver,
                                                          uint32_t access_flags, uint32_t method_idx,
                                                          const art::DexFile& dex_file);
 
 // Hack for CFI CIE initialization
 extern std::vector<uint8_t>* X86CFIInitialization();
 
-void QuickCompiler::Init(CompilerDriver& driver) const {
-  ArtInitQuickCompilerContext(driver);
+void QuickCompiler::Init() const {
+  ArtInitQuickCompilerContext(GetCompilerDriver());
 }
 
-void QuickCompiler::UnInit(CompilerDriver& driver) const {
-  ArtUnInitQuickCompilerContext(driver);
+void QuickCompiler::UnInit() const {
+  ArtUnInitQuickCompilerContext(GetCompilerDriver());
 }
 
-CompiledMethod* QuickCompiler::Compile(CompilerDriver& driver,
-                                      const DexFile::CodeItem* code_item,
-                                      uint32_t access_flags,
-                                      InvokeType invoke_type,
-                                      uint16_t class_def_idx,
-                                      uint32_t method_idx,
-                                      jobject class_loader,
-                                      const DexFile& dex_file) const {
-  CompiledMethod* method = TryCompileWithSeaIR(driver,
-                                               code_item,
+CompiledMethod* QuickCompiler::Compile(const DexFile::CodeItem* code_item,
+                                       uint32_t access_flags,
+                                       InvokeType invoke_type,
+                                       uint16_t class_def_idx,
+                                       uint32_t method_idx,
+                                       jobject class_loader,
+                                       const DexFile& dex_file) const {
+  CompiledMethod* method = TryCompileWithSeaIR(code_item,
                                                access_flags,
                                                invoke_type,
                                                class_def_idx,
                                                method_idx,
                                                class_loader,
                                                dex_file);
-  if (method != nullptr) return method;
+  if (method != nullptr) {
+    return method;
+  }
 
-  return ArtQuickCompileMethod(driver,
+  return ArtQuickCompileMethod(GetCompilerDriver(),
                                code_item,
                                access_flags,
                                invoke_type,
@@ -76,11 +76,10 @@
                                dex_file);
 }
 
-CompiledMethod* QuickCompiler::JniCompile(CompilerDriver& driver,
-                                          uint32_t access_flags,
+CompiledMethod* QuickCompiler::JniCompile(uint32_t access_flags,
                                           uint32_t method_idx,
                                           const DexFile& dex_file) const {
-  return ArtQuickJniCompileMethod(driver, access_flags, method_idx, dex_file);
+  return ArtQuickJniCompileMethod(GetCompilerDriver(), access_flags, method_idx, dex_file);
 }
 
 uintptr_t QuickCompiler::GetEntryPointOf(mirror::ArtMethod* method) const {
@@ -88,11 +87,12 @@
 }
 
 bool QuickCompiler::WriteElf(art::File* file,
-                            OatWriter* oat_writer,
-                            const std::vector<const art::DexFile*>& dex_files,
-                            const std::string& android_root,
-                            bool is_host, const CompilerDriver& driver) const {
-  return art::ElfWriterQuick::Create(file, oat_writer, dex_files, android_root, is_host, driver);
+                             OatWriter* oat_writer,
+                             const std::vector<const art::DexFile*>& dex_files,
+                             const std::string& android_root,
+                             bool is_host) const {
+  return art::ElfWriterQuick::Create(file, oat_writer, dex_files, android_root, is_host,
+                                     *GetCompilerDriver());
 }
 
 Backend* QuickCompiler::GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) const {
@@ -101,6 +101,9 @@
     case kThumb2:
       mir_to_lir = ArmCodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
       break;
+    case kArm64:
+      mir_to_lir = Arm64CodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
+      break;
     case kMips:
       mir_to_lir = MipsCodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
       break;
@@ -134,22 +137,21 @@
   return nullptr;
 }
 
-CompiledMethod* OptimizingCompiler::Compile(CompilerDriver& driver,
-                                            const DexFile::CodeItem* code_item,
+CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
                                             uint32_t access_flags,
                                             InvokeType invoke_type,
                                             uint16_t class_def_idx,
                                             uint32_t method_idx,
                                             jobject class_loader,
                                             const DexFile& dex_file) const {
-  CompiledMethod* method = TryCompile(
-      driver, code_item, access_flags, invoke_type, class_def_idx, method_idx,
-      class_loader, dex_file);
-  if (method != nullptr) return method;
+  CompiledMethod* method = TryCompile(code_item, access_flags, invoke_type, class_def_idx,
+                                      method_idx, class_loader, dex_file);
+  if (method != nullptr) {
+    return method;
+  }
 
-  return QuickCompiler::Compile(
-      driver, code_item, access_flags, invoke_type, class_def_idx, method_idx,
-      class_loader, dex_file);
+  return QuickCompiler::Compile(code_item, access_flags, invoke_type, class_def_idx, method_idx,
+                                class_loader, dex_file);
 }
 
 }  // namespace art
diff --git a/compiler/compilers.h b/compiler/compilers.h
index 255dd23..3ca78c9 100644
--- a/compiler/compilers.h
+++ b/compiler/compilers.h
@@ -23,14 +23,13 @@
 
 class QuickCompiler : public Compiler {
  public:
-  QuickCompiler() : Compiler(100) {}
+  explicit QuickCompiler(CompilerDriver* driver) : Compiler(driver, 100) {}
 
-  void Init(CompilerDriver& driver) const OVERRIDE;
+  void Init() const OVERRIDE;
 
-  void UnInit(CompilerDriver& driver) const OVERRIDE;
+  void UnInit() const OVERRIDE;
 
-  CompiledMethod* Compile(CompilerDriver& driver,
-                          const DexFile::CodeItem* code_item,
+  CompiledMethod* Compile(const DexFile::CodeItem* code_item,
                           uint32_t access_flags,
                           InvokeType invoke_type,
                           uint16_t class_def_idx,
@@ -38,8 +37,7 @@
                           jobject class_loader,
                           const DexFile& dex_file) const OVERRIDE;
 
-  CompiledMethod* JniCompile(CompilerDriver& driver,
-                             uint32_t access_flags,
+  CompiledMethod* JniCompile(uint32_t access_flags,
                              uint32_t method_idx,
                              const DexFile& dex_file) const OVERRIDE;
 
@@ -50,7 +48,7 @@
                 OatWriter* oat_writer,
                 const std::vector<const art::DexFile*>& dex_files,
                 const std::string& android_root,
-                bool is_host, const CompilerDriver& driver) const
+                bool is_host) const
     OVERRIDE
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -73,12 +71,11 @@
   DISALLOW_COPY_AND_ASSIGN(QuickCompiler);
 };
 
-class OptimizingCompiler : public QuickCompiler {
+class OptimizingCompiler FINAL : public QuickCompiler {
  public:
-  OptimizingCompiler() { }
+  explicit OptimizingCompiler(CompilerDriver* driver) : QuickCompiler(driver) { }
 
-  CompiledMethod* Compile(CompilerDriver& driver,
-                          const DexFile::CodeItem* code_item,
+  CompiledMethod* Compile(const DexFile::CodeItem* code_item,
                           uint32_t access_flags,
                           InvokeType invoke_type,
                           uint16_t class_def_idx,
@@ -86,8 +83,7 @@
                           jobject class_loader,
                           const DexFile& dex_file) const OVERRIDE;
 
-  CompiledMethod* TryCompile(CompilerDriver& driver,
-                             const DexFile::CodeItem* code_item,
+  CompiledMethod* TryCompile(const DexFile::CodeItem* code_item,
                              uint32_t access_flags,
                              InvokeType invoke_type,
                              uint16_t class_def_idx,
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 1a9379a..89c642d 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -30,12 +30,12 @@
 
 namespace art {
 
-extern "C" void ArtInitQuickCompilerContext(art::CompilerDriver& driver) {
-  CHECK(driver.GetCompilerContext() == NULL);
+extern "C" void ArtInitQuickCompilerContext(art::CompilerDriver* driver) {
+  CHECK(driver->GetCompilerContext() == nullptr);
 }
 
-extern "C" void ArtUnInitQuickCompilerContext(art::CompilerDriver& driver) {
-  CHECK(driver.GetCompilerContext() == NULL);
+extern "C" void ArtUnInitQuickCompilerContext(art::CompilerDriver* driver) {
+  CHECK(driver->GetCompilerContext() == nullptr);
 }
 
 /* Default optimizer/debug setting for the compiler. */
@@ -131,6 +131,370 @@
   }
 }
 
+// TODO: Remove this when we are able to compile everything.
+int arm64_support_list[] = {
+    Instruction::NOP,
+    // Instruction::MOVE,
+    // Instruction::MOVE_FROM16,
+    // Instruction::MOVE_16,
+    // Instruction::MOVE_WIDE,
+    // Instruction::MOVE_WIDE_FROM16,
+    // Instruction::MOVE_WIDE_16,
+    // Instruction::MOVE_OBJECT,
+    // Instruction::MOVE_OBJECT_FROM16,
+    // Instruction::MOVE_OBJECT_16,
+    // Instruction::MOVE_RESULT,
+    // Instruction::MOVE_RESULT_WIDE,
+    // Instruction::MOVE_RESULT_OBJECT,
+    // Instruction::MOVE_EXCEPTION,
+    // Instruction::RETURN_VOID,
+    // Instruction::RETURN,
+    // Instruction::RETURN_WIDE,
+    // Instruction::RETURN_OBJECT,
+    // Instruction::CONST_4,
+    // Instruction::CONST_16,
+    // Instruction::CONST,
+    // Instruction::CONST_HIGH16,
+    // Instruction::CONST_WIDE_16,
+    // Instruction::CONST_WIDE_32,
+    // Instruction::CONST_WIDE,
+    // Instruction::CONST_WIDE_HIGH16,
+    // Instruction::CONST_STRING,
+    // Instruction::CONST_STRING_JUMBO,
+    // Instruction::CONST_CLASS,
+    // Instruction::MONITOR_ENTER,
+    // Instruction::MONITOR_EXIT,
+    // Instruction::CHECK_CAST,
+    // Instruction::INSTANCE_OF,
+    // Instruction::ARRAY_LENGTH,
+    // Instruction::NEW_INSTANCE,
+    // Instruction::NEW_ARRAY,
+    // Instruction::FILLED_NEW_ARRAY,
+    // Instruction::FILLED_NEW_ARRAY_RANGE,
+    // Instruction::FILL_ARRAY_DATA,
+    // Instruction::THROW,
+    // Instruction::GOTO,
+    // Instruction::GOTO_16,
+    // Instruction::GOTO_32,
+    // Instruction::PACKED_SWITCH,
+    // Instruction::SPARSE_SWITCH,
+    // Instruction::CMPL_FLOAT,
+    // Instruction::CMPG_FLOAT,
+    // Instruction::CMPL_DOUBLE,
+    // Instruction::CMPG_DOUBLE,
+    // Instruction::CMP_LONG,
+    // Instruction::IF_EQ,
+    // Instruction::IF_NE,
+    // Instruction::IF_LT,
+    // Instruction::IF_GE,
+    // Instruction::IF_GT,
+    // Instruction::IF_LE,
+    // Instruction::IF_EQZ,
+    // Instruction::IF_NEZ,
+    // Instruction::IF_LTZ,
+    // Instruction::IF_GEZ,
+    // Instruction::IF_GTZ,
+    // Instruction::IF_LEZ,
+    // Instruction::UNUSED_3E,
+    // Instruction::UNUSED_3F,
+    // Instruction::UNUSED_40,
+    // Instruction::UNUSED_41,
+    // Instruction::UNUSED_42,
+    // Instruction::UNUSED_43,
+    // Instruction::AGET,
+    // Instruction::AGET_WIDE,
+    // Instruction::AGET_OBJECT,
+    // Instruction::AGET_BOOLEAN,
+    // Instruction::AGET_BYTE,
+    // Instruction::AGET_CHAR,
+    // Instruction::AGET_SHORT,
+    // Instruction::APUT,
+    // Instruction::APUT_WIDE,
+    // Instruction::APUT_OBJECT,
+    // Instruction::APUT_BOOLEAN,
+    // Instruction::APUT_BYTE,
+    // Instruction::APUT_CHAR,
+    // Instruction::APUT_SHORT,
+    // Instruction::IGET,
+    // Instruction::IGET_WIDE,
+    // Instruction::IGET_OBJECT,
+    // Instruction::IGET_BOOLEAN,
+    // Instruction::IGET_BYTE,
+    // Instruction::IGET_CHAR,
+    // Instruction::IGET_SHORT,
+    // Instruction::IPUT,
+    // Instruction::IPUT_WIDE,
+    // Instruction::IPUT_OBJECT,
+    // Instruction::IPUT_BOOLEAN,
+    // Instruction::IPUT_BYTE,
+    // Instruction::IPUT_CHAR,
+    // Instruction::IPUT_SHORT,
+    // Instruction::SGET,
+    // Instruction::SGET_WIDE,
+    // Instruction::SGET_OBJECT,
+    // Instruction::SGET_BOOLEAN,
+    // Instruction::SGET_BYTE,
+    // Instruction::SGET_CHAR,
+    // Instruction::SGET_SHORT,
+    // Instruction::SPUT,
+    // Instruction::SPUT_WIDE,
+    // Instruction::SPUT_OBJECT,
+    // Instruction::SPUT_BOOLEAN,
+    // Instruction::SPUT_BYTE,
+    // Instruction::SPUT_CHAR,
+    // Instruction::SPUT_SHORT,
+    Instruction::INVOKE_VIRTUAL,
+    Instruction::INVOKE_SUPER,
+    Instruction::INVOKE_DIRECT,
+    Instruction::INVOKE_STATIC,
+    Instruction::INVOKE_INTERFACE,
+    // Instruction::RETURN_VOID_BARRIER,
+    // Instruction::INVOKE_VIRTUAL_RANGE,
+    // Instruction::INVOKE_SUPER_RANGE,
+    // Instruction::INVOKE_DIRECT_RANGE,
+    // Instruction::INVOKE_STATIC_RANGE,
+    // Instruction::INVOKE_INTERFACE_RANGE,
+    // Instruction::UNUSED_79,
+    // Instruction::UNUSED_7A,
+    // Instruction::NEG_INT,
+    // Instruction::NOT_INT,
+    // Instruction::NEG_LONG,
+    // Instruction::NOT_LONG,
+    // Instruction::NEG_FLOAT,
+    // Instruction::NEG_DOUBLE,
+    // Instruction::INT_TO_LONG,
+    // Instruction::INT_TO_FLOAT,
+    // Instruction::INT_TO_DOUBLE,
+    // Instruction::LONG_TO_INT,
+    // Instruction::LONG_TO_FLOAT,
+    // Instruction::LONG_TO_DOUBLE,
+    // Instruction::FLOAT_TO_INT,
+    // Instruction::FLOAT_TO_LONG,
+    // Instruction::FLOAT_TO_DOUBLE,
+    // Instruction::DOUBLE_TO_INT,
+    // Instruction::DOUBLE_TO_LONG,
+    // Instruction::DOUBLE_TO_FLOAT,
+    // Instruction::INT_TO_BYTE,
+    // Instruction::INT_TO_CHAR,
+    // Instruction::INT_TO_SHORT,
+    // Instruction::ADD_INT,
+    // Instruction::SUB_INT,
+    // Instruction::MUL_INT,
+    // Instruction::DIV_INT,
+    // Instruction::REM_INT,
+    // Instruction::AND_INT,
+    // Instruction::OR_INT,
+    // Instruction::XOR_INT,
+    // Instruction::SHL_INT,
+    // Instruction::SHR_INT,
+    // Instruction::USHR_INT,
+    // Instruction::ADD_LONG,
+    // Instruction::SUB_LONG,
+    // Instruction::MUL_LONG,
+    // Instruction::DIV_LONG,
+    // Instruction::REM_LONG,
+    // Instruction::AND_LONG,
+    // Instruction::OR_LONG,
+    // Instruction::XOR_LONG,
+    // Instruction::SHL_LONG,
+    // Instruction::SHR_LONG,
+    // Instruction::USHR_LONG,
+    // Instruction::ADD_FLOAT,
+    // Instruction::SUB_FLOAT,
+    // Instruction::MUL_FLOAT,
+    // Instruction::DIV_FLOAT,
+    // Instruction::REM_FLOAT,
+    // Instruction::ADD_DOUBLE,
+    // Instruction::SUB_DOUBLE,
+    // Instruction::MUL_DOUBLE,
+    // Instruction::DIV_DOUBLE,
+    // Instruction::REM_DOUBLE,
+    // Instruction::ADD_INT_2ADDR,
+    // Instruction::SUB_INT_2ADDR,
+    // Instruction::MUL_INT_2ADDR,
+    // Instruction::DIV_INT_2ADDR,
+    // Instruction::REM_INT_2ADDR,
+    // Instruction::AND_INT_2ADDR,
+    // Instruction::OR_INT_2ADDR,
+    // Instruction::XOR_INT_2ADDR,
+    // Instruction::SHL_INT_2ADDR,
+    // Instruction::SHR_INT_2ADDR,
+    // Instruction::USHR_INT_2ADDR,
+    // Instruction::ADD_LONG_2ADDR,
+    // Instruction::SUB_LONG_2ADDR,
+    // Instruction::MUL_LONG_2ADDR,
+    // Instruction::DIV_LONG_2ADDR,
+    // Instruction::REM_LONG_2ADDR,
+    // Instruction::AND_LONG_2ADDR,
+    // Instruction::OR_LONG_2ADDR,
+    // Instruction::XOR_LONG_2ADDR,
+    // Instruction::SHL_LONG_2ADDR,
+    // Instruction::SHR_LONG_2ADDR,
+    // Instruction::USHR_LONG_2ADDR,
+    // Instruction::ADD_FLOAT_2ADDR,
+    // Instruction::SUB_FLOAT_2ADDR,
+    // Instruction::MUL_FLOAT_2ADDR,
+    // Instruction::DIV_FLOAT_2ADDR,
+    // Instruction::REM_FLOAT_2ADDR,
+    // Instruction::ADD_DOUBLE_2ADDR,
+    // Instruction::SUB_DOUBLE_2ADDR,
+    // Instruction::MUL_DOUBLE_2ADDR,
+    // Instruction::DIV_DOUBLE_2ADDR,
+    // Instruction::REM_DOUBLE_2ADDR,
+    // Instruction::ADD_INT_LIT16,
+    // Instruction::RSUB_INT,
+    // Instruction::MUL_INT_LIT16,
+    // Instruction::DIV_INT_LIT16,
+    // Instruction::REM_INT_LIT16,
+    // Instruction::AND_INT_LIT16,
+    // Instruction::OR_INT_LIT16,
+    // Instruction::XOR_INT_LIT16,
+    // Instruction::ADD_INT_LIT8,
+    // Instruction::RSUB_INT_LIT8,
+    // Instruction::MUL_INT_LIT8,
+    // Instruction::DIV_INT_LIT8,
+    // Instruction::REM_INT_LIT8,
+    // Instruction::AND_INT_LIT8,
+    // Instruction::OR_INT_LIT8,
+    // Instruction::XOR_INT_LIT8,
+    // Instruction::SHL_INT_LIT8,
+    // Instruction::SHR_INT_LIT8,
+    // Instruction::USHR_INT_LIT8,
+    // Instruction::IGET_QUICK,
+    // Instruction::IGET_WIDE_QUICK,
+    // Instruction::IGET_OBJECT_QUICK,
+    // Instruction::IPUT_QUICK,
+    // Instruction::IPUT_WIDE_QUICK,
+    // Instruction::IPUT_OBJECT_QUICK,
+    // Instruction::INVOKE_VIRTUAL_QUICK,
+    // Instruction::INVOKE_VIRTUAL_RANGE_QUICK,
+    // Instruction::UNUSED_EB,
+    // Instruction::UNUSED_EC,
+    // Instruction::UNUSED_ED,
+    // Instruction::UNUSED_EE,
+    // Instruction::UNUSED_EF,
+    // Instruction::UNUSED_F0,
+    // Instruction::UNUSED_F1,
+    // Instruction::UNUSED_F2,
+    // Instruction::UNUSED_F3,
+    // Instruction::UNUSED_F4,
+    // Instruction::UNUSED_F5,
+    // Instruction::UNUSED_F6,
+    // Instruction::UNUSED_F7,
+    // Instruction::UNUSED_F8,
+    // Instruction::UNUSED_F9,
+    // Instruction::UNUSED_FA,
+    // Instruction::UNUSED_FB,
+    // Instruction::UNUSED_FC,
+    // Instruction::UNUSED_FD,
+    // Instruction::UNUSED_FE,
+    // Instruction::UNUSED_FF,
+
+    // ----- ExtendedMIROpcode -----
+    // kMirOpPhi,
+    // kMirOpCopy,
+    // kMirOpFusedCmplFloat,
+    // kMirOpFusedCmpgFloat,
+    // kMirOpFusedCmplDouble,
+    // kMirOpFusedCmpgDouble,
+    // kMirOpFusedCmpLong,
+    // kMirOpNop,
+    // kMirOpNullCheck,
+    // kMirOpRangeCheck,
+    // kMirOpDivZeroCheck,
+    // kMirOpCheck,
+    // kMirOpCheckPart2,
+    // kMirOpSelect,
+    // kMirOpLast,
+};
+
+// TODO: Remove this when we are able to compile everything.
+static bool CanCompileShorty(const char* shorty) {
+  uint32_t shorty_size = strlen(shorty);
+  CHECK_GE(shorty_size, 1u);
+  // Set a limitation on maximum number of parameters.
+  // Note : there is an implied "method*" parameter, and probably "this" as well.
+  // 1 is for the return type. Currently, we only accept 2 parameters at the most.
+  if (shorty_size > (1 + 2)) {
+    return false;
+  }
+  // Z : boolean
+  // B : byte
+  // S : short
+  // C : char
+  // I : int
+  // L : long
+  // F : float
+  // D : double
+  // L : reference(object, array)
+  // V : void
+  // Current calling conversion only support 32bit softfp
+  // which has problems with long, float, double
+  constexpr char supported_types[] = "ZBSCILV";
+  for (uint32_t i = 0; i < shorty_size; i++) {
+    if (strchr(supported_types, shorty[i]) == nullptr) {
+      return false;
+    }
+  }
+  return true;
+};
+
+// TODO: Remove this when we are able to compile everything.
+// Skip the method that we do not support currently.
+static bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file,
+                             CompilationUnit& cu) {
+  // There is some limitation with current ARM 64 backend.
+  if (cu.instruction_set == kArm64) {
+    // Check if we can compile the prototype.
+    const char* shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx));
+    if (!CanCompileShorty(shorty)) {
+      VLOG(compiler) << "Unsupported shorty : " << shorty;
+      return false;
+    }
+
+    for (int idx = 0; idx < cu.mir_graph->GetNumBlocks(); idx++) {
+      BasicBlock *bb = cu.mir_graph->GetBasicBlock(idx);
+      if (bb == NULL) continue;
+      if (bb->block_type == kDead) continue;
+      for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
+        int opcode = mir->dalvikInsn.opcode;
+        // Check if we support the byte code.
+        if (std::find(arm64_support_list, arm64_support_list + arraysize(arm64_support_list),
+            opcode) == arm64_support_list + arraysize(arm64_support_list)) {
+          if (opcode < kMirOpFirst) {
+            VLOG(compiler) << "Unsupported dalvik byte code : "
+                           << mir->dalvikInsn.opcode;
+          } else {
+            VLOG(compiler) << "Unsupported extended MIR opcode : "
+                           << MIRGraph::extended_mir_op_names_[opcode - kMirOpFirst];
+          }
+          return false;
+        }
+        // Check if it invokes a prototype that we cannot support.
+        if (Instruction::INVOKE_VIRTUAL == opcode ||
+            Instruction::INVOKE_SUPER == opcode ||
+            Instruction::INVOKE_DIRECT == opcode ||
+            Instruction::INVOKE_STATIC == opcode ||
+            Instruction::INVOKE_INTERFACE == opcode) {
+          uint32_t invoke_method_idx = mir->dalvikInsn.vB;
+          const char* invoke_method_shorty = dex_file.GetMethodShorty(
+              dex_file.GetMethodId(invoke_method_idx));
+          if (!CanCompileShorty(invoke_method_shorty)) {
+            VLOG(compiler) << "Unsupported to invoke '"
+                           << PrettyMethod(invoke_method_idx, dex_file)
+                           << "' with shorty : " << invoke_method_shorty;
+            return false;
+          }
+        }
+      }
+    }
+
+    LOG(INFO) << "Using experimental instruction set A64 for "
+              << PrettyMethod(method_idx, dex_file);
+  }
+  return true;
+}
+
 static CompiledMethod* CompileMethod(CompilerDriver& driver,
                                      Compiler* compiler,
                                      const DexFile::CodeItem* code_item,
@@ -162,6 +526,7 @@
   cu.compiler = compiler;
   // TODO: x86_64 & arm64 are not yet implemented.
   CHECK((cu.instruction_set == kThumb2) ||
+        (cu.instruction_set == kArm64) ||
         (cu.instruction_set == kX86) ||
         (cu.instruction_set == kX86_64) ||
         (cu.instruction_set == kMips));
@@ -214,6 +579,11 @@
         (1 << kPromoteCompilerTemps));
   }
 
+  if (cu.instruction_set == kArm64) {
+    // TODO(Arm64): enable optimizations once backend is mature enough.
+    cu.disable_opt = ~(uint32_t)0;
+  }
+
   cu.StartTimingSplit("BuildMIRGraph");
   cu.mir_graph.reset(new MIRGraph(&cu, &cu.arena));
 
@@ -241,6 +611,12 @@
   cu.mir_graph->InlineMethod(code_item, access_flags, invoke_type, class_def_idx, method_idx,
                               class_loader, dex_file);
 
+  // TODO(Arm64): Remove this when we are able to compile everything.
+  if (!CanCompileMethod(method_idx, dex_file, cu)) {
+    VLOG(compiler) << "Cannot compile method : " << PrettyMethod(method_idx, dex_file);
+    return nullptr;
+  }
+
   cu.NewTimingSplit("MIROpt:CheckFilters");
   if (cu.mir_graph->SkipCompilation()) {
     return NULL;
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index 2f17e08..ed7e1f5 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -925,11 +925,17 @@
 int MIRGraph::AddNewSReg(int v_reg) {
   // Compiler temps always have a subscript of 0
   int subscript = (v_reg < 0) ? 0 : ++ssa_last_defs_[v_reg];
-  int ssa_reg = GetNumSSARegs();
+  uint32_t ssa_reg = GetNumSSARegs();
   SetNumSSARegs(ssa_reg + 1);
   ssa_base_vregs_->Insert(v_reg);
   ssa_subscripts_->Insert(subscript);
   DCHECK_EQ(ssa_base_vregs_->Size(), ssa_subscripts_->Size());
+  // If we are expanding very late, update use counts too.
+  if (ssa_reg > 0 && use_counts_.Size() == ssa_reg) {
+    // Need to expand the counts.
+    use_counts_.Insert(0);
+    raw_use_counts_.Insert(0);
+  }
   return ssa_reg;
 }
 
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 30d0bc3..ca90a83 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -1257,4 +1257,55 @@
   DoDFSPreOrderSSARename(GetEntryBlock());
 }
 
+ChildBlockIterator::ChildBlockIterator(BasicBlock* bb, MIRGraph* mir_graph)
+    : basic_block_(bb), mir_graph_(mir_graph), visited_fallthrough_(false),
+      visited_taken_(false), have_successors_(false) {
+  // Check if we actually do have successors.
+  if (basic_block_ != 0 && basic_block_->successor_block_list_type != kNotUsed) {
+    have_successors_ = true;
+    successor_iter_.Reset(basic_block_->successor_blocks);
+  }
+}
+
+BasicBlock* ChildBlockIterator::Next() {
+  // We check if we have a basic block. If we don't we cannot get next child.
+  if (basic_block_ == nullptr) {
+    return nullptr;
+  }
+
+  // If we haven't visited fallthrough, return that.
+  if (visited_fallthrough_ == false) {
+    visited_fallthrough_ = true;
+
+    BasicBlock* result = mir_graph_->GetBasicBlock(basic_block_->fall_through);
+    if (result != nullptr) {
+      return result;
+    }
+  }
+
+  // If we haven't visited taken, return that.
+  if (visited_taken_ == false) {
+    visited_taken_ = true;
+
+    BasicBlock* result = mir_graph_->GetBasicBlock(basic_block_->taken);
+    if (result != nullptr) {
+      return result;
+    }
+  }
+
+  // We visited both taken and fallthrough. Now check if we have successors we need to visit.
+  if (have_successors_ == true) {
+    // Get information about next successor block.
+    SuccessorBlockInfo* successor_block_info = successor_iter_.Next();
+
+    // If we don't have anymore successors, return nullptr.
+    if (successor_block_info != nullptr) {
+      return mir_graph_->GetBasicBlock(successor_block_info->block);
+    }
+  }
+
+  // We do not have anything.
+  return nullptr;
+}
+
 }  // namespace art
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index c728d84..85a2d04 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -341,6 +341,29 @@
   int key;
 };
 
+/**
+ * @class ChildBlockIterator
+ * @brief Enable an easy iteration of the children.
+ */
+class ChildBlockIterator {
+ public:
+  /**
+   * @brief Constructs a child iterator.
+   * @param bb The basic whose children we need to iterate through.
+   * @param mir_graph The MIRGraph used to get the basic block during iteration.
+   */
+  ChildBlockIterator(BasicBlock* bb, MIRGraph* mir_graph);
+  BasicBlock* Next();
+
+ private:
+  BasicBlock* basic_block_;
+  MIRGraph* mir_graph_;
+  bool visited_fallthrough_;
+  bool visited_taken_;
+  bool have_successors_;
+  GrowableArray<SuccessorBlockInfo*>::Iterator successor_iter_;
+};
+
 /*
  * Whereas a SSA name describes a definition of a Dalvik vreg, the RegLocation describes
  * the type of an SSA name (and, can also be used by code generators to record where the
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index 5c1bdf4..413b4e0 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -286,10 +286,6 @@
         reg_location_[ssa_reg_high].high_word = 1;
         reg_location_[ssa_reg_high].s_reg_low = ssa_reg_low;
         reg_location_[ssa_reg_high].wide = true;
-
-        // A new SSA needs new use counts.
-        use_counts_.Insert(0);
-        raw_use_counts_.Insert(0);
       }
 
       num_non_special_compiler_temps_++;
@@ -302,10 +298,6 @@
     reg_location_[ssa_reg_low] = temp_loc;
     reg_location_[ssa_reg_low].s_reg_low = ssa_reg_low;
     reg_location_[ssa_reg_low].wide = wide;
-
-    // A new SSA needs new use counts.
-    use_counts_.Insert(0);
-    raw_use_counts_.Insert(0);
   }
 
   compiler_temps_.Insert(compiler_temp);
@@ -743,18 +735,20 @@
       if (pred_bb->block_type == kDalvikByteCode) {
         // Check to see if predecessor had an explicit null-check.
         MIR* last_insn = pred_bb->last_mir_insn;
-        Instruction::Code last_opcode = last_insn->dalvikInsn.opcode;
-        if (last_opcode == Instruction::IF_EQZ) {
-          if (pred_bb->fall_through == bb->id) {
-            // The fall-through of a block following a IF_EQZ, set the vA of the IF_EQZ to show that
-            // it can't be null.
-            ssa_regs_to_check->ClearBit(last_insn->ssa_rep->uses[0]);
-          }
-        } else if (last_opcode == Instruction::IF_NEZ) {
-          if (pred_bb->taken == bb->id) {
-            // The taken block following a IF_NEZ, set the vA of the IF_NEZ to show that it can't be
-            // null.
-            ssa_regs_to_check->ClearBit(last_insn->ssa_rep->uses[0]);
+        if (last_insn != nullptr) {
+          Instruction::Code last_opcode = last_insn->dalvikInsn.opcode;
+          if (last_opcode == Instruction::IF_EQZ) {
+            if (pred_bb->fall_through == bb->id) {
+              // The fall-through of a block following a IF_EQZ, set the vA of the IF_EQZ to show that
+              // it can't be null.
+              ssa_regs_to_check->ClearBit(last_insn->ssa_rep->uses[0]);
+            }
+          } else if (last_opcode == Instruction::IF_NEZ) {
+            if (pred_bb->taken == bb->id) {
+              // The taken block following a IF_NEZ, set the vA of the IF_NEZ to show that it can't be
+              // null.
+              ssa_regs_to_check->ClearBit(last_insn->ssa_rep->uses[0]);
+            }
           }
         }
       }
@@ -903,7 +897,7 @@
           temp_scoped_alloc_.get(), temp_bit_vector_size_, false, kBitMapNullCheck);
       nce_changed = ssa_regs_to_check->GetHighestBitSet() != -1;
       bb->data_flow_info->ending_check_v->Copy(ssa_regs_to_check);
-    } else if (!ssa_regs_to_check->Equal(bb->data_flow_info->ending_check_v)) {
+    } else if (!ssa_regs_to_check->SameBitsSet(bb->data_flow_info->ending_check_v)) {
       nce_changed = true;
       bb->data_flow_info->ending_check_v->Copy(ssa_regs_to_check);
     }
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index cac766d..a895e6e 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -1213,7 +1213,7 @@
   cu_->NewTimingSplit("Assemble");
   int assembler_retries = 0;
   CodeOffset starting_offset = LinkFixupInsns(first_lir_insn_, last_lir_insn_, 0);
-  data_offset_ = (starting_offset + 0x3) & ~0x3;
+  data_offset_ = RoundUp(starting_offset, 4);
   int32_t offset_adjustment;
   AssignDataOffsets();
 
@@ -1596,7 +1596,7 @@
         LOG(FATAL) << "Assembler error - too many retries";
       }
       starting_offset += offset_adjustment;
-      data_offset_ = (starting_offset + 0x3) & ~0x3;
+      data_offset_ = RoundUp(starting_offset, 4);
       AssignDataOffsets();
     }
   }
@@ -1609,7 +1609,7 @@
   write_pos = EncodeLIRs(write_pos, first_lir_insn_);
   DCHECK_EQ(static_cast<CodeOffset>(write_pos - &code_buffer_[0]), starting_offset);
 
-  DCHECK_EQ(data_offset_, (code_buffer_.size() + 0x3) & ~0x3);
+  DCHECK_EQ(data_offset_, RoundUp(code_buffer_.size(), 4));
 
   // Install literals
   InstallLiteralPools();
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 163c0fe..d3477c9 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -360,6 +360,22 @@
     if (Runtime::Current()->ExplicitStackOverflowChecks()) {
       /* Load stack limit */
       Load32Disp(rs_rARM_SELF, Thread::StackEndOffset<4>().Int32Value(), rs_r12);
+    } else {
+      // Implicit stack overflow check.
+      // Generate a load from [sp, #-overflowsize].  If this is in the stack
+      // redzone we will get a segmentation fault.
+      //
+      // Caveat coder: if someone changes the kStackOverflowReservedBytes value
+      // we need to make sure that it's loadable in an immediate field of
+      // a sub instruction.  Otherwise we will get a temp allocation and the
+      // code size will increase.
+      //
+      // This is done before the callee save instructions to avoid any possibility
+      // of these overflowing.  This uses r12 and that's never saved in a callee
+      // save.
+      OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, Thread::kStackOverflowReservedBytes);
+      Load32Disp(rs_r12, 0, rs_r12);
+      MarkPossibleStackOverflowException();
     }
   }
   /* Spill core callee saves */
@@ -418,17 +434,8 @@
         AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, false, frame_size_));
       }
     } else {
-      // Implicit stack overflow check.
-      // Generate a load from [sp, #-overflowsize].  If this is in the stack
-      // redzone we will get a segmentation fault.
-      //
-      // Caveat coder: if someone changes the kStackOverflowReservedBytes value
-      // we need to make sure that it's loadable in an immediate field of
-      // a sub instruction.  Otherwise we will get a temp allocation and the
-      // code size will increase.
-      OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, Thread::kStackOverflowReservedBytes);
-      Load32Disp(rs_r12, 0, rs_r12);
-      MarkPossibleStackOverflowException();
+      // Implicit stack overflow check has already been done.  Just make room on the
+      // stack for the frame now.
       OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
     }
   } else {
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 9d1723a..aab6b46 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -32,21 +32,20 @@
     bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
     LIR* CheckSuspendUsingLoad() OVERRIDE;
     RegStorage LoadHelper(ThreadOffset<4> offset);
-    LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size,
-                      int s_reg);
-    LIR* LoadBaseDispWide(RegStorage r_base, int displacement, RegStorage r_dest, int s_reg);
+    LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
+                      OpSize size) OVERRIDE;
     LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale,
-                         OpSize size);
+                         OpSize size) OVERRIDE;
     LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
-                             RegStorage r_dest, OpSize size, int s_reg);
+                             RegStorage r_dest, OpSize size) OVERRIDE;
     LIR* LoadConstantNoClobber(RegStorage r_dest, int value);
     LIR* LoadConstantWide(RegStorage r_dest, int64_t value);
-    LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, OpSize size);
-    LIR* StoreBaseDispWide(RegStorage r_base, int displacement, RegStorage r_src);
+    LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
+                       OpSize size) OVERRIDE;
     LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale,
-                          OpSize size);
+                          OpSize size) OVERRIDE;
     LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
-                              RegStorage r_src, OpSize size, int s_reg);
+                              RegStorage r_src, OpSize size) OVERRIDE;
     void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
 
     // Required for target - register utilities.
@@ -173,8 +172,7 @@
     void OpRegCopyWide(RegStorage dest, RegStorage src);
     void OpTlsCmp(ThreadOffset<4> offset, int val);
 
-    LIR* LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size,
-                          int s_reg);
+    LIR* LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size);
     LIR* StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src, OpSize size);
     LIR* OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2,
                           int shift);
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 8391c03..0948ce3 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -692,7 +692,7 @@
   } else {
     DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
     // Unaligned load with LDR and LDRSH is allowed on ARMv7 with SCTLR.A set to 0.
-    LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, INVALID_SREG);
+    LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size);
     StoreValue(rl_dest, rl_result);
   }
   return true;
@@ -1170,19 +1170,14 @@
       }
       FreeTemp(reg_len);
     }
+    LoadBaseDisp(reg_ptr, data_offset, rl_result.reg, size);
+    MarkPossibleNullPointerException(opt_flags);
+    if (!constant_index) {
+      FreeTemp(reg_ptr);
+    }
     if (rl_dest.wide) {
-      LoadBaseDispWide(reg_ptr, data_offset, rl_result.reg, INVALID_SREG);
-      MarkPossibleNullPointerException(opt_flags);
-      if (!constant_index) {
-        FreeTemp(reg_ptr);
-      }
       StoreValueWide(rl_dest, rl_result);
     } else {
-      LoadBaseDisp(reg_ptr, data_offset, rl_result.reg, size, INVALID_SREG);
-      MarkPossibleNullPointerException(opt_flags);
-      if (!constant_index) {
-        FreeTemp(reg_ptr);
-      }
       StoreValue(rl_dest, rl_result);
     }
   } else {
@@ -1275,11 +1270,7 @@
       FreeTemp(reg_len);
     }
 
-    if (rl_src.wide) {
-      StoreBaseDispWide(reg_ptr, data_offset, rl_src.reg);
-    } else {
-      StoreBaseDisp(reg_ptr, data_offset, rl_src.reg, size);
-    }
+    StoreBaseDisp(reg_ptr, data_offset, rl_src.reg, size);
     MarkPossibleNullPointerException(opt_flags);
   } else {
     /* reg_ptr -> array data */
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 08acef7..1745c18 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -825,7 +825,7 @@
  * performing null check, incoming MIR can be null.
  */
 LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest,
-                                  OpSize size, int s_reg) {
+                                  OpSize size) {
   LIR* load = NULL;
   ArmOpcode opcode = kThumbBkpt;
   bool short_form = false;
@@ -833,30 +833,32 @@
   bool all_low = r_dest.Is32Bit() && r_base.Low8() && r_dest.Low8();
   int encoded_disp = displacement;
   bool already_generated = false;
-  bool null_pointer_safepoint = false;
   switch (size) {
     case kDouble:
     // Intentional fall-though.
-    case k64:
+    case k64: {
+      DCHECK_EQ(displacement & 3, 0);
+      encoded_disp = (displacement & 1020) >> 2;  // Within range of kThumb2Vldrd/kThumb2LdrdI8.
+      RegStorage r_ptr = r_base;
+      if ((displacement & ~1020) != 0) {
+        // For core register load, use the r_dest.GetLow() for the temporary pointer.
+        r_ptr = r_dest.IsFloat() ? AllocTemp() : r_dest.GetLow();
+        // Add displacement & ~1020 to base, it's a single instruction for up to +-256KiB.
+        OpRegRegImm(kOpAdd, r_ptr, r_base, displacement & ~1020);
+      }
       if (r_dest.IsFloat()) {
         DCHECK(!r_dest.IsPair());
-        opcode = kThumb2Vldrd;
-        if (displacement <= 1020) {
-          short_form = true;
-          encoded_disp >>= 2;
-        }
+        load = NewLIR3(kThumb2Vldrd, r_dest.GetReg(), r_ptr.GetReg(), encoded_disp);
       } else {
-        if (displacement <= 1020) {
-          load = NewLIR4(kThumb2LdrdI8, r_dest.GetLowReg(), r_dest.GetHighReg(), r_base.GetReg(),
-                         displacement >> 2);
-        } else {
-          load = LoadBaseDispBody(r_base, displacement, r_dest.GetLow(), k32, s_reg);
-          null_pointer_safepoint = true;
-          LoadBaseDispBody(r_base, displacement + 4, r_dest.GetHigh(), k32, INVALID_SREG);
-        }
-        already_generated = true;
+        load = NewLIR4(kThumb2LdrdI8, r_dest.GetLowReg(), r_dest.GetHighReg(), r_base.GetReg(),
+                       encoded_disp);
       }
+      if ((displacement & ~1020) != 0 && !r_dest.IsFloat()) {
+        FreeTemp(r_ptr);
+      }
+      already_generated = true;
       break;
+    }
     case kSingle:
     // Intentional fall-though.
     case k32:
@@ -935,7 +937,7 @@
       if (r_dest.IsFloat()) {
         // No index ops - must use a long sequence.  Turn the offset into a direct pointer.
         OpRegReg(kOpAdd, reg_offset, r_base);
-        load = LoadBaseDispBody(reg_offset, 0, r_dest, size, s_reg);
+        load = LoadBaseDispBody(reg_offset, 0, r_dest, size);
       } else {
         load = LoadBaseIndexed(r_base, reg_offset, r_dest, 0, size);
       }
@@ -946,28 +948,16 @@
   // TODO: in future may need to differentiate Dalvik accesses w/ spills
   if (r_base == rs_rARM_SP) {
     AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit());
-  } else {
-     // We might need to generate a safepoint if we have two store instructions (wide or double).
-     if (!Runtime::Current()->ExplicitNullChecks() && null_pointer_safepoint) {
-       MarkSafepointPC(load);
-     }
   }
   return load;
 }
 
-LIR* ArmMir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size,
-                              int s_reg) {
-  DCHECK(!((size == k64) || (size == kDouble)));
+LIR* ArmMir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size) {
   // TODO: base this on target.
   if (size == kWord) {
     size = k32;
   }
-  return LoadBaseDispBody(r_base, displacement, r_dest, size, s_reg);
-}
-
-LIR* ArmMir2Lir::LoadBaseDispWide(RegStorage r_base, int displacement, RegStorage r_dest,
-                                  int s_reg) {
-  return LoadBaseDispBody(r_base, displacement, r_dest, k64, s_reg);
+  return LoadBaseDispBody(r_base, displacement, r_dest, size);
 }
 
 
@@ -980,29 +970,31 @@
   bool all_low = r_src.Is32Bit() && r_base.Low8() && r_src.Low8();
   int encoded_disp = displacement;
   bool already_generated = false;
-  bool null_pointer_safepoint = false;
   switch (size) {
-    case k64:
     case kDouble:
-      if (!r_src.IsFloat()) {
-        if (displacement <= 1020) {
-          store = NewLIR4(kThumb2StrdI8, r_src.GetLowReg(), r_src.GetHighReg(), r_base.GetReg(),
-                          displacement >> 2);
-        } else {
-          store = StoreBaseDispBody(r_base, displacement, r_src.GetLow(), k32);
-          null_pointer_safepoint = true;
-          StoreBaseDispBody(r_base, displacement + 4, r_src.GetHigh(), k32);
-        }
-        already_generated = true;
-      } else {
-        DCHECK(!r_src.IsPair());
-        opcode = kThumb2Vstrd;
-        if (displacement <= 1020) {
-          short_form = true;
-          encoded_disp >>= 2;
-        }
+    // Intentional fall-though.
+    case k64: {
+      DCHECK_EQ(displacement & 3, 0);
+      encoded_disp = (displacement & 1020) >> 2;  // Within range of kThumb2Vstrd/kThumb2StrdI8.
+      RegStorage r_ptr = r_base;
+      if ((displacement & ~1020) != 0) {
+        r_ptr = AllocTemp();
+        // Add displacement & ~1020 to base, it's a single instruction for up to +-256KiB.
+        OpRegRegImm(kOpAdd, r_ptr, r_base, displacement & ~1020);
       }
+      if (r_src.IsFloat()) {
+        DCHECK(!r_src.IsPair());
+        store = NewLIR3(kThumb2Vstrd, r_src.GetReg(), r_ptr.GetReg(), encoded_disp);
+      } else {
+        store = NewLIR4(kThumb2StrdI8, r_src.GetLowReg(), r_src.GetHighReg(), r_ptr.GetReg(),
+                        encoded_disp);
+      }
+      if ((displacement & ~1020) != 0) {
+        FreeTemp(r_ptr);
+      }
+      already_generated = true;
       break;
+    }
     case kSingle:
     // Intentional fall-through.
     case k32:
@@ -1076,11 +1068,6 @@
   // TODO: In future, may need to differentiate Dalvik & spill accesses
   if (r_base == rs_rARM_SP) {
     AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit());
-  } else {
-    // We might need to generate a safepoint if we have two store instructions (wide or double).
-    if (!Runtime::Current()->ExplicitNullChecks() && null_pointer_safepoint) {
-      MarkSafepointPC(store);
-    }
   }
   return store;
 }
@@ -1091,14 +1078,9 @@
   if (size == kWord) {
     size = k32;
   }
-  DCHECK(!((size == k64) || (size == kDouble)));
   return StoreBaseDispBody(r_base, displacement, r_src, size);
 }
 
-LIR* ArmMir2Lir::StoreBaseDispWide(RegStorage r_base, int displacement, RegStorage r_src) {
-  return StoreBaseDispBody(r_base, displacement, r_src, k64);
-}
-
 LIR* ArmMir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) {
   int opcode;
   DCHECK_EQ(r_dest.IsDouble(), r_src.IsDouble());
@@ -1130,7 +1112,7 @@
 }
 
 LIR* ArmMir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
-                                      int displacement, RegStorage r_src, OpSize size, int s_reg) {
+                                      int displacement, RegStorage r_src, OpSize size) {
   LOG(FATAL) << "Unexpected use of StoreBaseIndexedDisp for Arm";
   return NULL;
 }
@@ -1141,7 +1123,7 @@
 }
 
 LIR* ArmMir2Lir::LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
-                                     int displacement, RegStorage r_dest, OpSize size, int s_reg) {
+                                     int displacement, RegStorage r_dest, OpSize size) {
   LOG(FATAL) << "Unexpected use of LoadBaseIndexedDisp for Arm";
   return NULL;
 }
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
new file mode 100644
index 0000000..452c8d7
--- /dev/null
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -0,0 +1,447 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_QUICK_ARM64_ARM64_LIR_H_
+#define ART_COMPILER_DEX_QUICK_ARM64_ARM64_LIR_H_
+
+#include "dex/compiler_internals.h"
+
+namespace art {
+
+/*
+ * TODO(Arm64): the comments below are outdated.
+ *
+ * Runtime register usage conventions.
+ *
+ * r0-r3: Argument registers in both Dalvik and C/C++ conventions.
+ *        However, for Dalvik->Dalvik calls we'll pass the target's Method*
+ *        pointer in r0 as a hidden arg0. Otherwise used as codegen scratch
+ *        registers.
+ * r0-r1: As in C/C++ r0 is 32-bit return register and r0/r1 is 64-bit
+ * r4   : (rA64_SUSPEND) is reserved (suspend check/debugger assist)
+ * r5   : Callee save (promotion target)
+ * r6   : Callee save (promotion target)
+ * r7   : Callee save (promotion target)
+ * r8   : Callee save (promotion target)
+ * r9   : (rA64_SELF) is reserved (pointer to thread-local storage)
+ * r10  : Callee save (promotion target)
+ * r11  : Callee save (promotion target)
+ * r12  : Scratch, may be trashed by linkage stubs
+ * r13  : (sp) is reserved
+ * r14  : (lr) is reserved
+ * r15  : (pc) is reserved
+ *
+ * 5 core temps that codegen can use (r0, r1, r2, r3, r12)
+ * 7 core registers that can be used for promotion
+ *
+ * Floating pointer registers
+ * s0-s31
+ * d0-d15, where d0={s0,s1}, d1={s2,s3}, ... , d15={s30,s31}
+ *
+ * s16-s31 (d8-d15) preserved across C calls
+ * s0-s15 (d0-d7) trashed across C calls
+ *
+ * s0-s15/d0-d7 used as codegen temp/scratch
+ * s16-s31/d8-d31 can be used for promotion.
+ *
+ * Calling convention
+ *     o On a call to a Dalvik method, pass target's Method* in r0
+ *     o r1-r3 will be used for up to the first 3 words of arguments
+ *     o Arguments past the first 3 words will be placed in appropriate
+ *       out slots by the caller.
+ *     o If a 64-bit argument would span the register/memory argument
+ *       boundary, it will instead be fully passed in the frame.
+ *     o Maintain a 16-byte stack alignment
+ *
+ *  Stack frame diagram (stack grows down, higher addresses at top):
+ *
+ * +------------------------+
+ * | IN[ins-1]              |  {Note: resides in caller's frame}
+ * |       .                |
+ * | IN[0]                  |
+ * | caller's Method*       |
+ * +========================+  {Note: start of callee's frame}
+ * | spill region           |  {variable sized - will include lr if non-leaf.}
+ * +------------------------+
+ * | ...filler word...      |  {Note: used as 2nd word of V[locals-1] if long]
+ * +------------------------+
+ * | V[locals-1]            |
+ * | V[locals-2]            |
+ * |      .                 |
+ * |      .                 |
+ * | V[1]                   |
+ * | V[0]                   |
+ * +------------------------+
+ * |  0 to 3 words padding  |
+ * +------------------------+
+ * | OUT[outs-1]            |
+ * | OUT[outs-2]            |
+ * |       .                |
+ * | OUT[0]                 |
+ * | cur_method*            | <<== sp w/ 16-byte alignment
+ * +========================+
+ */
+
+#if 1
+#define A64_PTR_SIZE 4
+#define A64_GET_INT_OFFS(offs) ((offs).Int32Value())
+#else
+// Not yet ready for this.
+#define A64_PTR_SIZE 8
+#define A64_GET_INT_OFFS(offs) ((offs).Int32Value())
+#endif
+
+#define A64_QUICK_ENTRYPOINT_OFFSET(name) QUICK_ENTRYPOINT_OFFSET(A64_PTR_SIZE, name)
+#define A64_QUICK_ENTRYPOINT_INT_OFFS(name) A64_GET_INT_OFFS(A64_QUICK_ENTRYPOINT_OFFSET(name))
+#define A64_THREAD_THIN_LOCK_ID_OFFSET A64_GET_INT_OFFS(Thread::ThinLockIdOffset<A64_PTR_SIZE>())
+#define A64_THREAD_EXCEPTION_INT_OFFS A64_GET_INT_OFFS(Thread::ExceptionOffset<A64_PTR_SIZE>())
+#define A64_THREAD_CARD_TABLE_INT_OFFS A64_GET_INT_OFFS(Thread::CardTableOffset<A64_PTR_SIZE>())
+#define A64_THREAD_STACK_END_INT_OFFS A64_GET_INT_OFFS(Thread::StackEndOffset<A64_PTR_SIZE>())
+#define A64_THREAD_SUSPEND_TRIGGER_OFFSET \
+  A64_GET_INT_OFFS(Thread::ThreadSuspendTriggerOffset<A64_PTR_SIZE>())
+typedef ThreadOffset<A64_PTR_SIZE> A64ThreadOffset;
+
+// Offset to distinguish FP regs.
+#define ARM_FP_REG_OFFSET 32
+// First FP callee save.
+#define ARM_FP_CALLEE_SAVE_BASE 16
+
+// Mask to strip off fp flags.
+#define ARM_FP_REG_MASK (ARM_FP_REG_OFFSET - 1)
+
+// Temporary macros, used to mark code which wants to distinguish betweek zr/sp.
+#define A64_REG_IS_SP(reg_num) ((reg_num) == rwsp || (reg_num) == rsp)
+#define A64_REG_IS_ZR(reg_num) ((reg_num) == rwzr || (reg_num) == rxzr)
+
+enum ArmResourceEncodingPos {
+  kArmGPReg0   = 0,
+  kArmRegLR    = 30,
+  kArmRegSP    = 31,
+  kArmFPReg0   = 32,
+  kArmRegEnd   = 64,
+};
+
+#define ENCODE_ARM_REG_SP           (1ULL << kArmRegSP)
+#define ENCODE_ARM_REG_LR           (1ULL << kArmRegLR)
+
+#define IS_SIGNED_IMM(size, value) \
+  ((value) >= -(1 << ((size) - 1)) && (value) < (1 << ((size) - 1)))
+#define IS_SIGNED_IMM7(value) IS_SIGNED_IMM(7, value)
+#define IS_SIGNED_IMM9(value) IS_SIGNED_IMM(9, value)
+#define IS_SIGNED_IMM12(value) IS_SIGNED_IMM(12, value)
+#define IS_SIGNED_IMM19(value) IS_SIGNED_IMM(19, value)
+#define IS_SIGNED_IMM21(value) IS_SIGNED_IMM(21, value)
+
+// Quick macro used to define the registers.
+#define A64_REGISTER_CODE_LIST(R) \
+  R(0)  R(1)  R(2)  R(3)  R(4)  R(5)  R(6)  R(7) \
+  R(8)  R(9)  R(10) R(11) R(12) R(13) R(14) R(15) \
+  R(16) R(17) R(18) R(19) R(20) R(21) R(22) R(23) \
+  R(24) R(25) R(26) R(27) R(28) R(29) R(30) R(31)
+
+// Registers (integer) values.
+// TODO(Arm64): for now we define rx##nr identically to rw##nr. We should rather define rx##nr as
+// a k64BitSolo. We should do this once the register allocator is ready.
+enum A64NativeRegisterPool {
+#  define A64_DEFINE_REGISTERS(nr) \
+    rw##nr = RegStorage::k32BitSolo | RegStorage::kCoreRegister | nr, \
+    rx##nr = RegStorage::k32BitSolo | RegStorage::kCoreRegister | nr, \
+    rf##nr = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | nr, \
+    rd##nr = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | nr,
+  A64_REGISTER_CODE_LIST(A64_DEFINE_REGISTERS)
+#undef A64_DEFINE_REGISTERS
+
+  // TODO(Arm64): can we change the lines below such that rwzr != rwsp && rxzr != rsp?
+  //   This would be desirable to allow detecting usage-errors in the assembler.
+  rwzr = rw31,
+  rxzr = rx31,
+  rwsp = rw31,
+  rsp = rx31,
+  rA64_SUSPEND = rx4,
+  rA64_SELF = rx18,
+  rA64_SP = rx31,
+  rA64_LR = rx30
+};
+
+#define A64_DEFINE_REGSTORAGES(nr) \
+  constexpr RegStorage rs_w##nr(RegStorage::kValid | rw##nr); \
+  constexpr RegStorage rs_x##nr(RegStorage::kValid | rx##nr); \
+  constexpr RegStorage rs_f##nr(RegStorage::kValid | rf##nr); \
+  constexpr RegStorage rs_d##nr(RegStorage::kValid | rd##nr);
+A64_REGISTER_CODE_LIST(A64_DEFINE_REGSTORAGES)
+#undef A64_DEFINE_REGSTORAGES
+
+constexpr RegStorage rs_wzr(RegStorage::kValid | rwzr);
+constexpr RegStorage rs_xzr(RegStorage::kValid | rxzr);
+constexpr RegStorage rs_rA64_SUSPEND(RegStorage::kValid | rA64_SUSPEND);
+constexpr RegStorage rs_rA64_SELF(RegStorage::kValid | rA64_SELF);
+constexpr RegStorage rs_rA64_SP(RegStorage::kValid | rA64_SP);
+constexpr RegStorage rs_rA64_LR(RegStorage::kValid | rA64_LR);
+
+// RegisterLocation templates return values (following the hard-float calling convention).
+const RegLocation arm_loc_c_return =
+    {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_w0, INVALID_SREG, INVALID_SREG};
+const RegLocation arm_loc_c_return_wide =
+    {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_x0, INVALID_SREG, INVALID_SREG};
+const RegLocation arm_loc_c_return_float =
+    {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_f0, INVALID_SREG, INVALID_SREG};
+const RegLocation arm_loc_c_return_double =
+    {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_d0, INVALID_SREG, INVALID_SREG};
+
+/**
+ * @brief Shift-type to be applied to a register via EncodeShift().
+ */
+enum A64ShiftEncodings {
+  kA64Lsl = 0x0,
+  kA64Lsr = 0x1,
+  kA64Asr = 0x2,
+  kA64Ror = 0x3
+};
+
+/**
+ * @brief Extend-type to be applied to a register via EncodeExtend().
+ */
+enum A64RegExtEncodings {
+  kA64Uxtb = 0x0,
+  kA64Uxth = 0x1,
+  kA64Uxtw = 0x2,
+  kA64Uxtx = 0x3,
+  kA64Sxtb = 0x4,
+  kA64Sxth = 0x5,
+  kA64Sxtw = 0x6,
+  kA64Sxtx = 0x7
+};
+
+#define ENCODE_NO_SHIFT (EncodeShift(kA64Lsl, 0))
+
+/*
+ * The following enum defines the list of supported A64 instructions by the
+ * assembler. Their corresponding EncodingMap positions will be defined in
+ * assemble_arm64.cc.
+ */
+enum ArmOpcode {
+  kA64First = 0,
+  kA64Adc3rrr = kA64First,  // adc [00011010000] rm[20-16] [000000] rn[9-5] rd[4-0].
+  kA64Add4RRdT,      // add [s001000100] imm_12[21-10] rn[9-5] rd[4-0].
+  kA64Add4rrro,      // add [00001011000] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0].
+  kA64Adr2xd,        // adr [0] immlo[30-29] [10000] immhi[23-5] rd[4-0].
+  kA64And3Rrl,       // and [00010010] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
+  kA64And4rrro,      // and [00001010] shift[23-22] [N=0] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
+  kA64Asr3rrd,       // asr [0001001100] immr[21-16] imms[15-10] rn[9-5] rd[4-0].
+  kA64Asr3rrr,       // asr alias of "sbfm arg0, arg1, arg2, {#31/#63}".
+  kA64B2ct,          // b.cond [01010100] imm_19[23-5] [0] cond[3-0].
+  kA64Blr1x,         // blr [1101011000111111000000] rn[9-5] [00000].
+  kA64Br1x,          // br  [1101011000011111000000] rn[9-5] [00000].
+  kA64Brk1d,         // brk [11010100001] imm_16[20-5] [00000].
+  kA64B1t,           // b   [00010100] offset_26[25-0].
+  kA64Cbnz2rt,       // cbnz[00110101] imm_19[23-5] rt[4-0].
+  kA64Cbz2rt,        // cbz [00110100] imm_19[23-5] rt[4-0].
+  kA64Cmn3Rro,       // cmn [s0101011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] [11111].
+  kA64Cmn3RdT,       // cmn [00110001] shift[23-22] imm_12[21-10] rn[9-5] [11111].
+  kA64Cmp3Rro,       // cmp [s1101011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] [11111].
+  kA64Cmp3RdT,       // cmp [01110001] shift[23-22] imm_12[21-10] rn[9-5] [11111].
+  kA64Csel4rrrc,     // csel[s0011010100] rm[20-16] cond[15-12] [00] rn[9-5] rd[4-0].
+  kA64Csinc4rrrc,    // csinc [s0011010100] rm[20-16] cond[15-12] [01] rn[9-5] rd[4-0].
+  kA64Csneg4rrrc,    // csneg [s1011010100] rm[20-16] cond[15-12] [01] rn[9-5] rd[4-0].
+  kA64Dmb1B,         // dmb [11010101000000110011] CRm[11-8] [10111111].
+  kA64Eor3Rrl,       // eor [s10100100] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
+  kA64Eor4rrro,      // eor [s1001010] shift[23-22] [0] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
+  kA64Extr4rrrd,     // extr[s00100111N0] rm[20-16] imm_s[15-10] rn[9-5] rd[4-0].
+  kA64Fabs2ff,       // fabs[000111100s100000110000] rn[9-5] rd[4-0].
+  kA64Fadd3fff,      // fadd[000111100s1] rm[20-16] [001010] rn[9-5] rd[4-0].
+  kA64Fcmp1f,        // fcmp[000111100s100000001000] rn[9-5] [01000].
+  kA64Fcmp2ff,       // fcmp[000111100s1] rm[20-16] [001000] rn[9-5] [00000].
+  kA64Fcvtzs2wf,     // fcvtzs [000111100s111000000000] rn[9-5] rd[4-0].
+  kA64Fcvtzs2xf,     // fcvtzs [100111100s111000000000] rn[9-5] rd[4-0].
+  kA64Fcvt2Ss,       // fcvt   [0001111000100010110000] rn[9-5] rd[4-0].
+  kA64Fcvt2sS,       // fcvt   [0001111001100010010000] rn[9-5] rd[4-0].
+  kA64Fdiv3fff,      // fdiv[000111100s1] rm[20-16] [000110] rn[9-5] rd[4-0].
+  kA64Fmov2ff,       // fmov[000111100s100000010000] rn[9-5] rd[4-0].
+  kA64Fmov2fI,       // fmov[000111100s1] imm_8[20-13] [10000000] rd[4-0].
+  kA64Fmov2sw,       // fmov[0001111000100111000000] rn[9-5] rd[4-0].
+  kA64Fmov2Sx,       // fmov[1001111001100111000000] rn[9-5] rd[4-0].
+  kA64Fmov2ws,       // fmov[0001111001101110000000] rn[9-5] rd[4-0].
+  kA64Fmov2xS,       // fmov[1001111001101111000000] rn[9-5] rd[4-0].
+  kA64Fmul3fff,      // fmul[000111100s1] rm[20-16] [000010] rn[9-5] rd[4-0].
+  kA64Fneg2ff,       // fneg[000111100s100001010000] rn[9-5] rd[4-0].
+  kA64Frintz2ff,     // frintz [000111100s100101110000] rn[9-5] rd[4-0].
+  kA64Fsqrt2ff,      // fsqrt[000111100s100001110000] rn[9-5] rd[4-0].
+  kA64Fsub3fff,      // fsub[000111100s1] rm[20-16] [001110] rn[9-5] rd[4-0].
+  kA64Ldrb3wXd,      // ldrb[0011100101] imm_12[21-10] rn[9-5] rt[4-0].
+  kA64Ldrb3wXx,      // ldrb[00111000011] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
+  kA64Ldrsb3rXd,     // ldrsb[001110011s] imm_12[21-10] rn[9-5] rt[4-0].
+  kA64Ldrsb3rXx,     // ldrsb[0011 1000 1s1] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
+  kA64Ldrh3wXF,      // ldrh[0111100101] imm_12[21-10] rn[9-5] rt[4-0].
+  kA64Ldrh4wXxd,     // ldrh[01111000011] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
+  kA64Ldrsh3rXF,     // ldrsh[011110011s] imm_12[21-10] rn[9-5] rt[4-0].
+  kA64Ldrsh4rXxd,    // ldrsh[011110001s1] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0]
+  kA64Ldr2fp,        // ldr [0s011100] imm_19[23-5] rt[4-0].
+  kA64Ldr2rp,        // ldr [0s011000] imm_19[23-5] rt[4-0].
+  kA64Ldr3fXD,       // ldr [1s11110100] imm_12[21-10] rn[9-5] rt[4-0].
+  kA64Ldr3rXD,       // ldr [1s111000010] imm_9[20-12] [01] rn[9-5] rt[4-0].
+  kA64Ldr4fXxG,      // ldr [1s111100011] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
+  kA64Ldr4rXxG,      // ldr [1s111000011] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
+  kA64LdrPost3rXd,   // ldr [1s111000010] imm_9[20-12] [01] rn[9-5] rt[4-0].
+  kA64Ldp4rrXD,      // ldp [s010100101] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
+  kA64LdpPost4rrXD,  // ldp [s010100011] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
+  kA64Ldur3fXd,      // ldur[1s111100010] imm_9[20-12] [00] rn[9-5] rt[4-0].
+  kA64Ldur3rXd,      // ldur[1s111000010] imm_9[20-12] [00] rn[9-5] rt[4-0].
+  kA64Ldxr2rX,       // ldxr[1s00100001011111011111] rn[9-5] rt[4-0].
+  kA64Lsl3rrr,       // lsl [s0011010110] rm[20-16] [001000] rn[9-5] rd[4-0].
+  kA64Lsr3rrd,       // lsr alias of "ubfm arg0, arg1, arg2, #{31/63}".
+  kA64Lsr3rrr,       // lsr [s0011010110] rm[20-16] [001001] rn[9-5] rd[4-0].
+  kA64Movk3rdM,      // mov [010100101] hw[22-21] imm_16[20-5] rd[4-0].
+  kA64Movn3rdM,      // mov [000100101] hw[22-21] imm_16[20-5] rd[4-0].
+  kA64Movz3rdM,      // mov [011100101] hw[22-21] imm_16[20-5] rd[4-0].
+  kA64Mov2rr,        // mov [00101010000] rm[20-16] [000000] [11111] rd[4-0].
+  kA64Mvn2rr,        // mov [00101010001] rm[20-16] [000000] [11111] rd[4-0].
+  kA64Mul3rrr,       // mul [00011011000] rm[20-16] [011111] rn[9-5] rd[4-0].
+  kA64Neg3rro,       // neg alias of "sub arg0, rzr, arg1, arg2".
+  kA64Orr3Rrl,       // orr [s01100100] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
+  kA64Orr4rrro,      // orr [s0101010] shift[23-22] [0] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
+  kA64Ret,           // ret [11010110010111110000001111000000].
+  kA64Rev2rr,        // rev [s10110101100000000001x] rn[9-5] rd[4-0].
+  kA64Rev162rr,      // rev16[s101101011000000000001] rn[9-5] rd[4-0].
+  kA64Ror3rrr,       // ror [s0011010110] rm[20-16] [001011] rn[9-5] rd[4-0].
+  kA64Sbc3rrr,       // sbc [s0011010000] rm[20-16] [000000] rn[9-5] rd[4-0].
+  kA64Sbfm4rrdd,     // sbfm[0001001100] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
+  kA64Scvtf2fw,      // scvtf  [000111100s100010000000] rn[9-5] rd[4-0].
+  kA64Scvtf2fx,      // scvtf  [100111100s100010000000] rn[9-5] rd[4-0].
+  kA64Sdiv3rrr,      // sdiv[s0011010110] rm[20-16] [000011] rn[9-5] rd[4-0].
+  kA64Smaddl4xwwx,   // smaddl [10011011001] rm[20-16] [0] ra[14-10] rn[9-5] rd[4-0].
+  kA64Stp4rrXD,      // stp [s010100101] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
+  kA64StpPost4rrXD,  // stp [s010100010] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
+  kA64StpPre4rrXD,   // stp [s010100110] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
+  kA64Str3fXD,       // str [1s11110100] imm_12[21-10] rn[9-5] rt[4-0].
+  kA64Str4fXxG,      // str [1s111100001] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
+  kA64Str3rXD,       // str [1s11100100] imm_12[21-10] rn[9-5] rt[4-0].
+  kA64Str4rXxG,      // str [1s111000001] rm[20-16] option[15-13] S[12-12] [10] rn[9-5] rt[4-0].
+  kA64Strb3wXd,      // strb[0011100100] imm_12[21-10] rn[9-5] rt[4-0].
+  kA64Strb3wXx,      // strb[00111000001] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
+  kA64Strh3wXF,      // strh[0111100100] imm_12[21-10] rn[9-5] rt[4-0].
+  kA64Strh4wXxd,     // strh[01111000001] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
+  kA64StrPost3rXd,   // str [1s111000000] imm_9[20-12] [01] rn[9-5] rt[4-0].
+  kA64Stur3fXd,      // stur[1s111100000] imm_9[20-12] [00] rn[9-5] rt[4-0].
+  kA64Stur3rXd,      // stur[1s111000000] imm_9[20-12] [00] rn[9-5] rt[4-0].
+  kA64Stxr3wrX,      // stxr[11001000000] rs[20-16] [011111] rn[9-5] rt[4-0].
+  kA64Sub4RRdT,      // sub [s101000100] imm_12[21-10] rn[9-5] rd[4-0].
+  kA64Sub4rrro,      // sub [s1001011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0].
+  kA64Subs3rRd,      // subs[s111000100] imm_12[21-10] rn[9-5] rd[4-0].
+  kA64Tst3rro,       // tst alias of "ands rzr, arg1, arg2, arg3".
+  kA64Ubfm4rrdd,     // ubfm[s10100110] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
+  kA64Last,
+  kA64NotWide = 0,   // Flag used to select the first instruction variant.
+  kA64Wide = 0x1000  // Flag used to select the second instruction variant.
+};
+
+/*
+ * The A64 instruction set provides two variants for many instructions. For example, "mov wN, wM"
+ * and "mov xN, xM" or - for floating point instructions - "mov sN, sM" and "mov dN, dM".
+ * It definitely makes sense to exploit this symmetries of the instruction set. We do this via the
+ * WIDE, UNWIDE macros. For opcodes that allow it, the wide variant can be obtained by applying the
+ * WIDE macro to the non-wide opcode. E.g. WIDE(kA64Sub4RRdT).
+ */
+
+// Return the wide and no-wide variants of the given opcode.
+#define WIDE(op) ((ArmOpcode)((op) | kA64Wide))
+#define UNWIDE(op) ((ArmOpcode)((op) & ~kA64Wide))
+
+// Whether the given opcode is wide.
+#define IS_WIDE(op) (((op) & kA64Wide) != 0)
+
+/*
+ * Floating point variants. These are just aliases of the macros above which we use for floating
+ * point instructions, just for readibility reasons.
+ * TODO(Arm64): should we remove these and use the original macros?
+ */
+#define FWIDE WIDE
+#define FUNWIDE UNWIDE
+#define IS_FWIDE IS_WIDE
+
+#define OP_KIND_UNWIDE(opcode) (opcode)
+#define OP_KIND_IS_WIDE(opcode) (false)
+
+enum ArmOpDmbOptions {
+  kSY = 0xf,
+  kST = 0xe,
+  kISH = 0xb,
+  kISHST = 0xa,
+  kNSH = 0x7,
+  kNSHST = 0x6
+};
+
+// Instruction assembly field_loc kind.
+enum ArmEncodingKind {
+  // All the formats below are encoded in the same way (as a kFmtBitBlt).
+  // These are grouped together, for fast handling (e.g. "if (LIKELY(fmt <= kFmtBitBlt)) ...").
+  kFmtRegW = 0,  // Word register (w) or wzr.
+  kFmtRegX,      // Extended word register (x) or xzr.
+  kFmtRegR,      // Register with same width as the instruction or zr.
+  kFmtRegWOrSp,  // Word register (w) or wsp.
+  kFmtRegXOrSp,  // Extended word register (x) or sp.
+  kFmtRegROrSp,  // Register with same width as the instruction or sp.
+  kFmtRegS,      // Single FP reg.
+  kFmtRegD,      // Double FP reg.
+  kFmtRegF,      // Single/double FP reg depending on the instruction width.
+  kFmtBitBlt,    // Bit string using end/start.
+
+  // Less likely formats.
+  kFmtUnused,    // Unused field and marks end of formats.
+  kFmtImm21,     // Sign-extended immediate using [23..5,30..29].
+  kFmtShift,     // Register shift, 9-bit at [23..21, 15..10]..
+  kFmtExtend,    // Register extend, 9-bit at [23..21, 15..10].
+  kFmtSkip,      // Unused field, but continue to next.
+};
+
+// Struct used to define the snippet positions for each A64 opcode.
+struct ArmEncodingMap {
+  uint32_t wskeleton;
+  uint32_t xskeleton;
+  struct {
+    ArmEncodingKind kind;
+    int end;         // end for kFmtBitBlt, 1-bit slice end for FP regs.
+    int start;       // start for kFmtBitBlt, 4-bit slice end for FP regs.
+  } field_loc[4];
+  ArmOpcode opcode;  // can be WIDE()-ned to indicate it has a wide variant.
+  uint64_t flags;
+  const char* name;
+  const char* fmt;
+  int size;          // Note: size is in bytes.
+  FixupKind fixup;
+};
+
+#if 0
+// TODO(Arm64): try the following alternative, which fits exactly in one cache line (64 bytes).
+struct ArmEncodingMap {
+  uint32_t wskeleton;
+  uint32_t xskeleton;
+  uint64_t flags;
+  const char* name;
+  const char* fmt;
+  struct {
+    uint8_t kind;
+    int8_t end;         // end for kFmtBitBlt, 1-bit slice end for FP regs.
+    int8_t start;       // start for kFmtBitBlt, 4-bit slice end for FP regs.
+  } field_loc[4];
+  uint32_t fixup;
+  uint32_t opcode;         // can be WIDE()-ned to indicate it has a wide variant.
+  uint32_t padding[3];
+};
+#endif
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_QUICK_ARM64_ARM64_LIR_H_
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
new file mode 100644
index 0000000..8accd0a
--- /dev/null
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -0,0 +1,913 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm64_lir.h"
+#include "codegen_arm64.h"
+#include "dex/quick/mir_to_lir-inl.h"
+
+namespace art {
+
+// The macros below are exclusively used in the encoding map.
+
+// Most generic way of providing two variants for one instructions.
+#define CUSTOM_VARIANTS(variant1, variant2) variant1, variant2
+
+// Used for instructions which do not have a wide variant.
+#define NO_VARIANTS(variant) \
+  CUSTOM_VARIANTS(variant, 0)
+
+// Used for instructions which have a wide variant with the sf bit set to 1.
+#define SF_VARIANTS(sf0_skeleton) \
+  CUSTOM_VARIANTS(sf0_skeleton, (sf0_skeleton | 0x80000000))
+
+// Used for instructions which have a wide variant with the size bits set to either x0 or x1.
+#define SIZE_VARIANTS(sizex0_skeleton) \
+  CUSTOM_VARIANTS(sizex0_skeleton, (sizex0_skeleton | 0x40000000))
+
+// Used for instructions which have a wide variant with the sf and n bits set to 1.
+#define SF_N_VARIANTS(sf0_n0_skeleton) \
+  CUSTOM_VARIANTS(sf0_n0_skeleton, (sf0_n0_skeleton | 0x80400000))
+
+// Used for FP instructions which have a single and double precision variants, with he type bits set
+// to either 00 or 01.
+#define FLOAT_VARIANTS(type00_skeleton) \
+  CUSTOM_VARIANTS(type00_skeleton, (type00_skeleton | 0x00400000))
+
+/*
+ * opcode: ArmOpcode enum
+ * variants: instruction skeletons supplied via CUSTOM_VARIANTS or derived macros.
+ * a{n}k: key to applying argument {n}    \
+ * a{n}s: argument {n} start bit position | n = 0, 1, 2, 3
+ * a{n}e: argument {n} end bit position   /
+ * flags: instruction attributes (used in optimization)
+ * name: mnemonic name
+ * fmt: for pretty-printing
+ * fixup: used for second-pass fixes (e.g. adresses fixups in branch instructions).
+ */
+#define ENCODING_MAP(opcode, variants, a0k, a0s, a0e, a1k, a1s, a1e, a2k, a2s, a2e, \
+                     a3k, a3s, a3e, flags, name, fmt, fixup) \
+        {variants, {{a0k, a0s, a0e}, {a1k, a1s, a1e}, {a2k, a2s, a2e}, \
+                    {a3k, a3s, a3e}}, opcode, flags, name, fmt, 4, fixup}
+
+/* Instruction dump string format keys: !pf, where "!" is the start
+ * of the key, "p" is which numeric operand to use and "f" is the
+ * print format.
+ *
+ * [p]ositions:
+ *     0 -> operands[0] (dest)
+ *     1 -> operands[1] (src1)
+ *     2 -> operands[2] (src2)
+ *     3 -> operands[3] (extra)
+ *
+ * [f]ormats:
+ *     d -> decimal
+ *     D -> decimal*4 or decimal*8 depending on the instruction width
+ *     E -> decimal*4
+ *     F -> decimal*2
+ *     G -> ", lsl #2" or ", lsl #3" depending on the instruction width
+ *     c -> branch condition (eq, ne, etc.)
+ *     t -> pc-relative target
+ *     p -> pc-relative address
+ *     s -> single precision floating point register
+ *     S -> double precision floating point register
+ *     f -> single or double precision register (depending on instruction width)
+ *     I -> 8-bit immediate floating point number
+ *     l -> logical immediate
+ *     M -> 16-bit shift expression ("" or ", lsl #16" or ", lsl #32"...)
+ *     B -> dmb option string (sy, st, ish, ishst, nsh, hshst)
+ *     H -> operand shift
+ *     T -> register shift (either ", lsl #0" or ", lsl #12")
+ *     e -> register extend (e.g. uxtb #1)
+ *     o -> register shift (e.g. lsl #1) for Word registers
+ *     w -> word (32-bit) register wn, or wzr
+ *     W -> word (32-bit) register wn, or wsp
+ *     x -> extended (64-bit) register xn, or xzr
+ *     X -> extended (64-bit) register xn, or sp
+ *     r -> register with same width as instruction, r31 -> wzr, xzr
+ *     R -> register with same width as instruction, r31 -> wsp, sp
+ *
+ *  [!] escape.  To insert "!", use "!!"
+ */
+/* NOTE: must be kept in sync with enum ArmOpcode from arm64_lir.h */
+const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = {
+    ENCODING_MAP(WIDE(kA64Adc3rrr), SF_VARIANTS(0x1a000000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "adc", "!0r, !1r, !2r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Add4RRdT), SF_VARIANTS(0x11000000),
+                 kFmtRegROrSp, 4, 0, kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtBitBlt, 23, 22, IS_QUAD_OP | REG_DEF0_USE1,
+                 "add", "!0R, !1R, #!2d!3T", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Add4rrro), SF_VARIANTS(0x0b000000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE1,
+                 "add", "!0r, !1r, !2r!3o", kFixupNone),
+    // Note: adr is binary, but declared as tertiary. The third argument is used while doing the
+    //   fixups and contains information to identify the adr label.
+    ENCODING_MAP(kA64Adr2xd, NO_VARIANTS(0x10000000),
+                 kFmtRegX, 4, 0, kFmtImm21, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | NEEDS_FIXUP,
+                 "adr", "!0x, #!1d", kFixupAdr),
+    ENCODING_MAP(WIDE(kA64And3Rrl), SF_VARIANTS(0x12000000),
+                 kFmtRegROrSp, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 22, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "and", "!0R, !1r, #!2l", kFixupNone),
+    ENCODING_MAP(WIDE(kA64And4rrro), SF_VARIANTS(0x0a000000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
+                 "and", "!0r, !1r, !2r!3o", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Asr3rrd), CUSTOM_VARIANTS(0x13007c00, 0x9340fc00),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 21, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "asr", "!0r, !1r, #!2d", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Asr3rrr), SF_VARIANTS(0x1ac02800),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "asr", "!0r, !1r, !2r", kFixupNone),
+    ENCODING_MAP(kA64B2ct, NO_VARIANTS(0x54000000),
+                 kFmtBitBlt, 3, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | USES_CCODES |
+                 NEEDS_FIXUP, "b.!0c", "!1t", kFixupCondBranch),
+    ENCODING_MAP(kA64Blr1x, NO_VARIANTS(0xd63f0000),
+                 kFmtRegX, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_UNARY_OP | REG_USE0 | IS_BRANCH | REG_DEF_LR,
+                 "blr", "!0x", kFixupNone),
+    ENCODING_MAP(kA64Br1x, NO_VARIANTS(0xd61f0000),
+                 kFmtRegX, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE0 | IS_BRANCH,
+                 "br", "!0x", kFixupNone),
+    ENCODING_MAP(kA64Brk1d, NO_VARIANTS(0xd4200000),
+                 kFmtBitBlt, 20, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH,
+                 "brk", "!0d", kFixupNone),
+    ENCODING_MAP(kA64B1t, NO_VARIANTS(0x14000000),
+                 kFmtBitBlt, 25, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP,
+                 "b", "!0t", kFixupT1Branch),
+    ENCODING_MAP(WIDE(kA64Cbnz2rt), SF_VARIANTS(0x35000000),
+                 kFmtRegR, 4, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP,
+                 "cbnz", "!0r, !1t", kFixupCBxZ),
+    ENCODING_MAP(WIDE(kA64Cbz2rt), SF_VARIANTS(0x34000000),
+                 kFmtRegR, 4, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_USE0 | IS_BRANCH  | NEEDS_FIXUP,
+                 "cbz", "!0r, !1t", kFixupCBxZ),
+    ENCODING_MAP(WIDE(kA64Cmn3Rro), SF_VARIANTS(0x6b20001f),
+                 kFmtRegROrSp, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
+                 "cmn", "!0R, !1r!2o", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Cmn3RdT), SF_VARIANTS(0x3100001f),
+                 kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10, kFmtBitBlt, 23, 22,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | SETS_CCODES,
+                 "cmn", "!0R, #!1d!2T", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Cmp3Rro), SF_VARIANTS(0x6b20001f),
+                 kFmtRegROrSp, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
+                 "cmp", "!0R, !1r!2o", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Cmp3RdT), SF_VARIANTS(0x7100001f),
+                 kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10, kFmtBitBlt, 23, 22,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | SETS_CCODES,
+                 "cmp", "!0R, #!1d!2T", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Csel4rrrc), SF_VARIANTS(0x1a800000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES,
+                 "csel", "!0r, !1r, !2r, !3c", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Csinc4rrrc), SF_VARIANTS(0x1a800400),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES,
+                 "csinc", "!0r, !1r, !2r, !3c", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Csneg4rrrc), SF_VARIANTS(0x5a800400),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES,
+                 "csneg", "!0r, !1r, !2r, !3c", kFixupNone),
+    ENCODING_MAP(kA64Dmb1B, NO_VARIANTS(0xd50330bf),
+                 kFmtBitBlt, 11, 8, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP,
+                 "dmb", "#!0B", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Eor3Rrl), SF_VARIANTS(0x52000000),
+                 kFmtRegROrSp, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 22, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "eor", "!0R, !1r, #!2l", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Eor4rrro), SF_VARIANTS(0x4a000000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
+                 "eor", "!0r, !1r, !2r!3o", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Extr4rrrd), SF_N_VARIANTS(0x13800000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtBitBlt, 15, 10, IS_QUAD_OP | REG_DEF0_USE12,
+                 "extr", "!0r, !1r, !2r, #!3d", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fabs2ff), FLOAT_VARIANTS(0x1e20c000),
+                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP| REG_DEF0_USE1,
+                 "fabs", "!0f, !1f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fadd3fff), FLOAT_VARIANTS(0x1e202800),
+                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "fadd", "!0f, !1f, !2f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fcmp1f), FLOAT_VARIANTS(0x1e202008),
+                 kFmtRegF, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE0 | SETS_CCODES,
+                 "fcmp", "!0f, #0", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fcmp2ff), FLOAT_VARIANTS(0x1e202000),
+                 kFmtRegF, 9, 5, kFmtRegF, 20, 16, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
+                 "fcmp", "!0f, !1f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fcvtzs2wf), FLOAT_VARIANTS(0x1e380000),
+                 kFmtRegW, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fcvtzs", "!0w, !1f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fcvtzs2xf), FLOAT_VARIANTS(0x9e380000),
+                 kFmtRegX, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fcvtzs", "!0x, !1f", kFixupNone),
+    ENCODING_MAP(kA64Fcvt2Ss, NO_VARIANTS(0x1e22C000),
+                 kFmtRegD, 4, 0, kFmtRegS, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fcvt", "!0S, !1s", kFixupNone),
+    ENCODING_MAP(kA64Fcvt2sS, NO_VARIANTS(0x1e624000),
+                 kFmtRegS, 4, 0, kFmtRegD, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fcvt", "!0s, !1S", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fdiv3fff), FLOAT_VARIANTS(0x1e201800),
+                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "fdiv", "!0f, !1f, !2f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fmov2ff), FLOAT_VARIANTS(0x1e204000),
+                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fmov", "!0f, !1f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fmov2fI), FLOAT_VARIANTS(0x1e201000),
+                 kFmtRegF, 4, 0, kFmtBitBlt, 20, 13, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
+                 "fmov", "!0f, #!1I", kFixupNone),
+    ENCODING_MAP(kA64Fmov2sw, NO_VARIANTS(0x1e270000),
+                 kFmtRegS, 4, 0, kFmtRegW, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fmov", "!0s, !1w", kFixupNone),
+    ENCODING_MAP(kA64Fmov2Sx, NO_VARIANTS(0x9e6f0000),
+                 kFmtRegD, 4, 0, kFmtRegX, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fmov", "!0S, !1x", kFixupNone),
+    ENCODING_MAP(kA64Fmov2ws, NO_VARIANTS(0x1e260000),
+                 kFmtRegW, 4, 0, kFmtRegS, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fmov", "!0w, !1s", kFixupNone),
+    ENCODING_MAP(kA64Fmov2xS, NO_VARIANTS(0x9e6e0000),
+                 kFmtRegX, 4, 0, kFmtRegD, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fmov", "!0x, !1S", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fmul3fff), FLOAT_VARIANTS(0x1e200800),
+                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "fmul", "!0f, !1f, !2f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fneg2ff), FLOAT_VARIANTS(0x1e214000),
+                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fneg", "!0f, !1f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Frintz2ff), FLOAT_VARIANTS(0x1e25c000),
+                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "frintz", "!0f, !1f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fsqrt2ff), FLOAT_VARIANTS(0x1e61c000),
+                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fsqrt", "!0f, !1f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fsub3fff), FLOAT_VARIANTS(0x1e203800),
+                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "fsub", "!0f, !1f, !2f", kFixupNone),
+    ENCODING_MAP(kA64Ldrb3wXd, NO_VARIANTS(0x39400000),
+                 kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldrb", "!0w, [!1X, #!2d]", kFixupNone),
+    ENCODING_MAP(kA64Ldrb3wXx, NO_VARIANTS(0x38606800),
+                 kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldrb", "!0w, [!1X, !2x]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ldrsb3rXd), CUSTOM_VARIANTS(0x39c00000, 0x39800000),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldrsb", "!0r, [!1X, #!2d]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ldrsb3rXx), CUSTOM_VARIANTS(0x38e06800, 0x38a06800),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldrsb", "!0r, [!1X, !2x]", kFixupNone),
+    ENCODING_MAP(kA64Ldrh3wXF, NO_VARIANTS(0x79400000),
+                 kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldrh", "!0w, [!1X, #!2F]", kFixupNone),
+    ENCODING_MAP(kA64Ldrh4wXxd, NO_VARIANTS(0x78606800),
+                 kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldrh", "!0w, [!1X, !2x, lsl #!3d]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ldrsh3rXF), CUSTOM_VARIANTS(0x79c00000, 0x79800000),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldrsh", "!0r, [!1X, #!2F]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ldrsh4rXxd), CUSTOM_VARIANTS(0x78e06800, 0x78906800),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldrsh", "!0r, [!1X, !2x, lsl #!3d]", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Ldr2fp), SIZE_VARIANTS(0x1c000000),
+                 kFmtRegF, 4, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP,
+                 "ldr", "!0f, !1p", kFixupLoad),
+    ENCODING_MAP(WIDE(kA64Ldr2rp), SIZE_VARIANTS(0x18000000),
+                 kFmtRegR, 4, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP,
+                 "ldr", "!0r, !1p", kFixupLoad),
+    ENCODING_MAP(FWIDE(kA64Ldr3fXD), SIZE_VARIANTS(0xbd400000),
+                 kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldr", "!0f, [!1X, #!2D]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ldr3rXD), SIZE_VARIANTS(0xb9400000),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldr", "!0r, [!1X, #!2D]", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Ldr4fXxG), SIZE_VARIANTS(0xbc606800),
+                 kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldr", "!0f, [!1X, !2x!3G]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ldr4rXxG), SIZE_VARIANTS(0xb8606800),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldr", "!0r, [!1X, !2x!3G]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64LdrPost3rXd), SIZE_VARIANTS(0xb8400400),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF01 | REG_USE1 | IS_LOAD,
+                 "ldr", "!0r, [!1X], #!2d", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ldp4rrXD), SF_VARIANTS(0x29400000),
+                 kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
+                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF012 | IS_LOAD,
+                 "ldp", "!0r, !1r, [!2X, #!3D]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64LdpPost4rrXD), CUSTOM_VARIANTS(0x28c00000, 0xa8c00000),
+                 kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
+                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF012 | IS_LOAD,
+                 "ldp", "!0r, !1r, [!2X], #!3D", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Ldur3fXd), CUSTOM_VARIANTS(0xbc400000, 0xfc400000),
+                 kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldur", "!0f, [!1X, #!2d]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ldur3rXd), SIZE_VARIANTS(0xb8400000),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldur", "!0r, [!1X, #!2d]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ldxr2rX), SIZE_VARIANTS(0x885f7c00),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldxr", "!0r, [!1X]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Lsl3rrr), SF_VARIANTS(0x1ac02000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "lsl", "!0r, !1r, !2r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Lsr3rrd), CUSTOM_VARIANTS(0x53007c00, 0xd340fc00),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 21, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "lsr", "!0r, !1r, #!2d", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Lsr3rrr), SF_VARIANTS(0x1ac02400),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "lsr", "!0r, !1r, !2r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Movk3rdM), SF_VARIANTS(0x72800000),
+                 kFmtRegR, 4, 0, kFmtBitBlt, 20, 5, kFmtBitBlt, 22, 21,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE0,
+                 "movk", "!0r, #!1d!2M", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Movn3rdM), SF_VARIANTS(0x12800000),
+                 kFmtRegR, 4, 0, kFmtBitBlt, 20, 5, kFmtBitBlt, 22, 21,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0,
+                 "movn", "!0r, #!1d!2M", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Movz3rdM), SF_VARIANTS(0x52800000),
+                 kFmtRegR, 4, 0, kFmtBitBlt, 20, 5, kFmtBitBlt, 22, 21,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0,
+                 "movz", "!0r, #!1d!2M", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Mov2rr), SF_VARIANTS(0x2a0003e0),
+                 kFmtRegR, 4, 0, kFmtRegR, 20, 16, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "mov", "!0r, !1r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Mvn2rr), SF_VARIANTS(0x2a2003e0),
+                 kFmtRegR, 4, 0, kFmtRegR, 20, 16, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "mvn", "!0r, !1r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Mul3rrr), SF_VARIANTS(0x1b007c00),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "mul", "!0r, !1r, !2r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Neg3rro), SF_VARIANTS(0x4b0003e0),
+                 kFmtRegR, 4, 0, kFmtRegR, 20, 16, kFmtShift, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "neg", "!0r, !1r!2o", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Orr3Rrl), SF_VARIANTS(0x32000000),
+                 kFmtRegROrSp, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 22, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "orr", "!0R, !1r, #!2l", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Orr4rrro), SF_VARIANTS(0x2a000000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
+                 "orr", "!0r, !1r, !2r!3o", kFixupNone),
+    ENCODING_MAP(kA64Ret, NO_VARIANTS(0xd65f03c0),
+                 kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, NO_OPERAND | IS_BRANCH,
+                 "ret", "", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Rev2rr), CUSTOM_VARIANTS(0x5ac00800, 0xdac00c00),
+                 kFmtRegR, 11, 8, kFmtRegR, 19, 16, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "rev", "!0r, !1r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Rev162rr), SF_VARIANTS(0xfa90f0b0),
+                 kFmtRegR, 11, 8, kFmtRegR, 19, 16, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "rev16", "!0r, !1r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ror3rrr), SF_VARIANTS(0x1ac02c00),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "ror", "!0r, !1r, !2r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Sbc3rrr), SF_VARIANTS(0x5a000000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "sbc", "!0r, !1r, !2r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Sbfm4rrdd), SF_N_VARIANTS(0x13000000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 21, 16,
+                 kFmtBitBlt, 15, 10, IS_QUAD_OP | REG_DEF0_USE1,
+                 "sbfm", "!0r, !1r, #!2d, #!3d", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Scvtf2fw), FLOAT_VARIANTS(0x1e220000),
+                 kFmtRegF, 4, 0, kFmtRegW, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "scvtf", "!0f, !1w", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Scvtf2fx), FLOAT_VARIANTS(0x9e220000),
+                 kFmtRegF, 4, 0, kFmtRegX, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "scvtf", "!0f, !1x", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Sdiv3rrr), SF_VARIANTS(0x1ac00c00),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "sdiv", "!0r, !1r, !2r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Smaddl4xwwx), NO_VARIANTS(0x9b200000),
+                 kFmtRegX, 4, 0, kFmtRegW, 9, 5, kFmtRegW, 20, 16,
+                 kFmtRegX, -1, -1, IS_QUAD_OP | REG_DEF0_USE123,
+                 "smaddl", "!0x, !1w, !2w, !3x", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Stp4rrXD), SF_VARIANTS(0x29000000),
+                 kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
+                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_DEF2 | REG_USE012 | IS_STORE,
+                 "stp", "!0r, !1r, [!2X, #!3D]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64StpPost4rrXD), CUSTOM_VARIANTS(0x28800000, 0xa8800000),
+                 kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
+                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_DEF2 | REG_USE012 | IS_STORE,
+                 "stp", "!0r, !1r, [!2X], #!3D", kFixupNone),
+    ENCODING_MAP(WIDE(kA64StpPre4rrXD), CUSTOM_VARIANTS(0x29800000, 0xa9800000),
+                 kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
+                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_DEF2 | REG_USE012 | IS_STORE,
+                 "stp", "!0r, !1r, [!2X, #!3D]!!", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Str3fXD), CUSTOM_VARIANTS(0xbd000000, 0xfd000000),
+                 kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "str", "!0f, [!1X, #!2D]", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Str4fXxG), CUSTOM_VARIANTS(0xbc206800, 0xfc206800),
+                 kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_USE012 | IS_STORE,
+                 "str", "!0f, [!1X, !2x!3G]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Str3rXD), SIZE_VARIANTS(0xb9000000),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "str", "!0r, [!1X, #!2D]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Str4rXxG), SIZE_VARIANTS(0xb8206800),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_USE012 | IS_STORE,
+                 "str", "!0r, [!1X, !2x!3G]", kFixupNone),
+    ENCODING_MAP(kA64Strb3wXd, NO_VARIANTS(0x39000000),
+                 kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "strb", "!0w, [!1X, #!2d]", kFixupNone),
+    ENCODING_MAP(kA64Strb3wXx, NO_VARIANTS(0x38206800),
+                 kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE,
+                 "strb", "!0w, [!1X, !2x]", kFixupNone),
+    ENCODING_MAP(kA64Strh3wXF, NO_VARIANTS(0x79000000),
+                 kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "strh", "!0w, [!1X, #!2F]", kFixupNone),
+    ENCODING_MAP(kA64Strh4wXxd, NO_VARIANTS(0x78206800),
+                 kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_USE012 | IS_STORE,
+                 "strh", "!0w, [!1X, !2x, lsl #!3d]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64StrPost3rXd), SIZE_VARIANTS(0xb8000400),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | REG_DEF1 | IS_STORE,
+                 "str", "!0r, [!1X], #!2d", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Stur3fXd), CUSTOM_VARIANTS(0xbc000000, 0xfc000000),
+                 kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "stur", "!0f, [!1X, #!2d]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Stur3rXd), SIZE_VARIANTS(0xb8000000),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "stur", "!0r, [!1X, #!2d]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Stxr3wrX), SIZE_VARIANTS(0x88007c00),
+                 kFmtRegW, 20, 16, kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_STORE,
+                 "stxr", "!0w, !1r, [!2X]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Sub4RRdT), SF_VARIANTS(0x51000000),
+                 kFmtRegROrSp, 4, 0, kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtBitBlt, 23, 22, IS_QUAD_OP | REG_DEF0_USE1,
+                 "sub", "!0R, !1R, #!2d!3T", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Sub4rrro), SF_VARIANTS(0x4b000000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
+                 "sub", "!0r, !1r, !2r!3o", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Subs3rRd), SF_VARIANTS(0x71000000),
+                 kFmtRegR, 4, 0, kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
+                 "subs", "!0r, !1R, #!2d", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Tst3rro), SF_VARIANTS(0x6a000000),
+                 kFmtRegR, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1,
+                 kFmtUnused, -1, -1, IS_QUAD_OP | REG_USE01 | SETS_CCODES,
+                 "tst", "!0r, !1r!2o", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ubfm4rrdd), SF_N_VARIANTS(0x53000000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 21, 16,
+                 kFmtBitBlt, 15, 10, IS_QUAD_OP | REG_DEF0_USE1,
+                 "ubfm", "!0r, !1r, !2d, !3d", kFixupNone),
+};
+
+// new_lir replaces orig_lir in the pcrel_fixup list.
+void Arm64Mir2Lir::ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir) {
+  new_lir->u.a.pcrel_next = orig_lir->u.a.pcrel_next;
+  if (UNLIKELY(prev_lir == NULL)) {
+    first_fixup_ = new_lir;
+  } else {
+    prev_lir->u.a.pcrel_next = new_lir;
+  }
+  orig_lir->flags.fixup = kFixupNone;
+}
+
+// new_lir is inserted before orig_lir in the pcrel_fixup list.
+void Arm64Mir2Lir::InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir) {
+  new_lir->u.a.pcrel_next = orig_lir;
+  if (UNLIKELY(prev_lir == NULL)) {
+    first_fixup_ = new_lir;
+  } else {
+    DCHECK(prev_lir->u.a.pcrel_next == orig_lir);
+    prev_lir->u.a.pcrel_next = new_lir;
+  }
+}
+
+/* Nop, used for aligning code. Nop is an alias for hint #0. */
+#define PADDING_NOP (UINT32_C(0xd503201f))
+
+uint8_t* Arm64Mir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) {
+  for (; lir != nullptr; lir = NEXT_LIR(lir)) {
+    bool opcode_is_wide = IS_WIDE(lir->opcode);
+    ArmOpcode opcode = UNWIDE(lir->opcode);
+
+    if (UNLIKELY(IsPseudoLirOp(opcode))) {
+      continue;
+    }
+
+    if (LIKELY(!lir->flags.is_nop)) {
+      const ArmEncodingMap *encoder = &EncodingMap[opcode];
+
+      // Select the right variant of the skeleton.
+      uint32_t bits = opcode_is_wide ? encoder->xskeleton : encoder->wskeleton;
+      DCHECK(!opcode_is_wide || IS_WIDE(encoder->opcode));
+
+      for (int i = 0; i < 4; i++) {
+        ArmEncodingKind kind = encoder->field_loc[i].kind;
+        uint32_t operand = lir->operands[i];
+        uint32_t value;
+
+        if (LIKELY(static_cast<unsigned>(kind) <= kFmtBitBlt)) {
+          // Note: this will handle kFmtReg* and kFmtBitBlt.
+
+          if (static_cast<unsigned>(kind) < kFmtBitBlt) {
+            bool is_zero = A64_REG_IS_ZR(operand);
+
+            if (kIsDebugBuild) {
+              // Register usage checks: First establish register usage requirements based on the
+              // format in `kind'.
+              bool want_float = false;
+              bool want_64_bit = false;
+              bool want_size_match = false;
+              bool want_zero = false;
+              switch (kind) {
+                case kFmtRegX:
+                  want_64_bit = true;
+                  // Intentional fall-through.
+                case kFmtRegW:
+                  want_size_match = true;
+                  // Intentional fall-through.
+                case kFmtRegR:
+                  want_zero = true;
+                  break;
+                case kFmtRegXOrSp:
+                  want_64_bit = true;
+                  // Intentional fall-through.
+                case kFmtRegWOrSp:
+                  want_size_match = true;
+                  break;
+                case kFmtRegROrSp:
+                  break;
+                case kFmtRegD:
+                  want_64_bit = true;
+                  // Intentional fall-through.
+                case kFmtRegS:
+                  want_size_match = true;
+                  // Intentional fall-through.
+                case kFmtRegF:
+                  want_float = true;
+                  break;
+                default:
+                  LOG(FATAL) << "Bad fmt for arg n. " << i << " of " << encoder->name
+                             << " (" << kind << ")";
+                  break;
+              }
+
+              // Now check that the requirements are satisfied.
+              RegStorage reg(operand);
+              const char *expected = nullptr;
+              if (want_float) {
+                if (!reg.IsFloat()) {
+                  expected = "float register";
+                } else if (want_size_match && (reg.IsDouble() != want_64_bit)) {
+                  expected = (want_64_bit) ? "double register" : "single register";
+                }
+              } else {
+                if (reg.IsFloat()) {
+                  expected = "core register";
+                } else if (want_size_match && (reg.Is64Bit() != want_64_bit)) {
+                  expected = (want_64_bit) ? "x-register" : "w-register";
+                } else if (reg.GetRegNum() == 31 && is_zero == want_zero) {
+                  expected = (want_zero) ? "zero-register" : "sp-register";
+                }
+              }
+
+              // TODO(Arm64): if !want_size_match, then we still should compare the size of the
+              //   register with the size required by the instruction width (kA64Wide).
+
+              // Fail, if `expected' contains an unsatisfied requirement.
+              if (expected != nullptr) {
+                // TODO(Arm64): make this FATAL.
+                LOG(WARNING) << "Bad argument n. " << i << " of " << encoder->name
+                             << ". Expected " << expected << ", got 0x" << std::hex << operand;
+              }
+            }
+
+            // TODO(Arm64): this may or may not be necessary, depending on how wzr, xzr are
+            //   defined.
+            if (is_zero) {
+              operand = 31;
+            }
+          }
+
+          value = (operand << encoder->field_loc[i].start) &
+              ((1 << (encoder->field_loc[i].end + 1)) - 1);
+          bits |= value;
+        } else {
+          switch (kind) {
+            case kFmtSkip:
+              break;  // Nothing to do, but continue to next.
+            case kFmtUnused:
+              i = 4;  // Done, break out of the enclosing loop.
+              break;
+            case kFmtShift:
+              // Intentional fallthrough.
+            case kFmtExtend:
+              DCHECK_EQ((operand & (1 << 6)) == 0, kind == kFmtShift);
+              value = (operand & 0x3f) << 10;
+              value |= ((operand & 0x1c0) >> 6) << 21;
+              bits |= value;
+              break;
+            case kFmtImm21:
+              value = (operand & 0x3) << 29;
+              value |= ((operand & 0x1ffffc) >> 2) << 5;
+              bits |= value;
+              break;
+            default:
+              LOG(FATAL) << "Bad fmt for arg. " << i << " in " << encoder->name
+                         << " (" << kind << ")";
+          }
+        }
+      }
+
+      DCHECK_EQ(encoder->size, 4);
+      write_pos[0] = (bits & 0xff);
+      write_pos[1] = ((bits >> 8) & 0xff);
+      write_pos[2] = ((bits >> 16) & 0xff);
+      write_pos[3] = ((bits >> 24) & 0xff);
+      write_pos += 4;
+    }
+  }
+
+  return write_pos;
+}
+
+// Align data offset on 8 byte boundary: it will only contain double-word items, as word immediates
+// are better set directly from the code (they will require no more than 2 instructions).
+#define ALIGNED_DATA_OFFSET(offset) (((offset) + 0x7) & ~0x7)
+
+// Assemble the LIR into binary instruction format.
+void Arm64Mir2Lir::AssembleLIR() {
+  LIR* lir;
+  LIR* prev_lir;
+  cu_->NewTimingSplit("Assemble");
+  int assembler_retries = 0;
+  CodeOffset starting_offset = LinkFixupInsns(first_lir_insn_, last_lir_insn_, 0);
+  data_offset_ = ALIGNED_DATA_OFFSET(starting_offset);
+  int32_t offset_adjustment;
+  AssignDataOffsets();
+
+  /*
+   * Note: generation must be 1 on first pass (to distinguish from initialized state of 0
+   * for non-visited nodes). Start at zero here, and bit will be flipped to 1 on entry to the loop.
+   */
+  int generation = 0;
+  while (true) {
+    // TODO(Arm64): check whether passes and offset adjustments are really necessary.
+    //   Currently they aren't, as - in the fixups below - LIR are never inserted.
+    //   Things can be different if jump ranges above 1 MB need to be supported.
+    //   If they are not, then we can get rid of the assembler retry logic.
+
+    offset_adjustment = 0;
+    AssemblerStatus res = kSuccess;  // Assume success
+    generation ^= 1;
+    // Note: nodes requiring possible fixup linked in ascending order.
+    lir = first_fixup_;
+    prev_lir = NULL;
+    while (lir != NULL) {
+      /*
+       * NOTE: the lir being considered here will be encoded following the switch (so long as
+       * we're not in a retry situation).  However, any new non-pc_rel instructions inserted
+       * due to retry must be explicitly encoded at the time of insertion.  Note that
+       * inserted instructions don't need use/def flags, but do need size and pc-rel status
+       * properly updated.
+       */
+      lir->offset += offset_adjustment;
+      // During pass, allows us to tell whether a node has been updated with offset_adjustment yet.
+      lir->flags.generation = generation;
+      switch (static_cast<FixupKind>(lir->flags.fixup)) {
+        case kFixupLabel:
+        case kFixupNone:
+        case kFixupVLoad:
+          break;
+        case kFixupT1Branch: {
+          LIR *target_lir = lir->target;
+          DCHECK(target_lir);
+          CodeOffset pc = lir->offset;
+          CodeOffset target = target_lir->offset +
+              ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
+          int32_t delta = target - pc;
+          if (!((delta & 0x3) == 0 && IS_SIGNED_IMM19(delta >> 2))) {
+            LOG(FATAL) << "Invalid jump range in kFixupT1Branch";
+          }
+          lir->operands[0] = delta >> 2;
+          break;
+        }
+        case kFixupLoad:
+        case kFixupCBxZ:
+        case kFixupCondBranch: {
+          LIR *target_lir = lir->target;
+          DCHECK(target_lir);
+          CodeOffset pc = lir->offset;
+          CodeOffset target = target_lir->offset +
+              ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
+          int32_t delta = target - pc;
+          if (!((delta & 0x3) == 0 && IS_SIGNED_IMM19(delta >> 2))) {
+            LOG(FATAL) << "Invalid jump range in kFixupLoad";
+          }
+          lir->operands[1] = delta >> 2;
+          break;
+        }
+        case kFixupAdr: {
+          LIR* target_lir = lir->target;
+          int32_t delta;
+          if (target_lir) {
+            CodeOffset target_offs = ((target_lir->flags.generation == lir->flags.generation) ?
+                                      0 : offset_adjustment) + target_lir->offset;
+            delta = target_offs - lir->offset;
+          } else if (lir->operands[2] >= 0) {
+            EmbeddedData* tab = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[2]));
+            delta = tab->offset + offset_adjustment - lir->offset;
+          } else {
+            // No fixup: this usage allows to retrieve the current PC.
+            delta = lir->operands[1];
+          }
+          if (!IS_SIGNED_IMM21(delta)) {
+            LOG(FATAL) << "Jump range above 1MB in kFixupAdr";
+          }
+          lir->operands[1] = delta;
+          break;
+        }
+        default:
+          LOG(FATAL) << "Unexpected case " << lir->flags.fixup;
+      }
+      prev_lir = lir;
+      lir = lir->u.a.pcrel_next;
+    }
+
+    if (res == kSuccess) {
+      break;
+    } else {
+      assembler_retries++;
+      if (assembler_retries > MAX_ASSEMBLER_RETRIES) {
+        CodegenDump();
+        LOG(FATAL) << "Assembler error - too many retries";
+      }
+      starting_offset += offset_adjustment;
+      data_offset_ = ALIGNED_DATA_OFFSET(starting_offset);
+      AssignDataOffsets();
+    }
+  }
+
+  // Build the CodeBuffer.
+  DCHECK_LE(data_offset_, total_size_);
+  code_buffer_.reserve(total_size_);
+  code_buffer_.resize(starting_offset);
+  uint8_t* write_pos = &code_buffer_[0];
+  write_pos = EncodeLIRs(write_pos, first_lir_insn_);
+  DCHECK_EQ(static_cast<CodeOffset>(write_pos - &code_buffer_[0]), starting_offset);
+
+  DCHECK_EQ(data_offset_, ALIGNED_DATA_OFFSET(code_buffer_.size()));
+
+  // Install literals
+  InstallLiteralPools();
+
+  // Install switch tables
+  InstallSwitchTables();
+
+  // Install fill array data
+  InstallFillArrayData();
+
+  // Create the mapping table and native offset to reference map.
+  cu_->NewTimingSplit("PcMappingTable");
+  CreateMappingTables();
+
+  cu_->NewTimingSplit("GcMap");
+  CreateNativeGcMap();
+}
+
+int Arm64Mir2Lir::GetInsnSize(LIR* lir) {
+  ArmOpcode opcode = UNWIDE(lir->opcode);
+  DCHECK(!IsPseudoLirOp(opcode));
+  return EncodingMap[opcode].size;
+}
+
+// Encode instruction bit pattern and assign offsets.
+uint32_t Arm64Mir2Lir::LinkFixupInsns(LIR* head_lir, LIR* tail_lir, uint32_t offset) {
+  LIR* end_lir = tail_lir->next;
+
+  LIR* last_fixup = NULL;
+  for (LIR* lir = head_lir; lir != end_lir; lir = NEXT_LIR(lir)) {
+    ArmOpcode opcode = UNWIDE(lir->opcode);
+    if (!lir->flags.is_nop) {
+      if (lir->flags.fixup != kFixupNone) {
+        if (!IsPseudoLirOp(opcode)) {
+          lir->flags.size = EncodingMap[opcode].size;
+          lir->flags.fixup = EncodingMap[opcode].fixup;
+        } else {
+          DCHECK_NE(static_cast<int>(opcode), kPseudoPseudoAlign4);
+          lir->flags.size = 0;
+          lir->flags.fixup = kFixupLabel;
+        }
+        // Link into the fixup chain.
+        lir->flags.use_def_invalid = true;
+        lir->u.a.pcrel_next = NULL;
+        if (first_fixup_ == NULL) {
+          first_fixup_ = lir;
+        } else {
+          last_fixup->u.a.pcrel_next = lir;
+        }
+        last_fixup = lir;
+        lir->offset = offset;
+      }
+      offset += lir->flags.size;
+    }
+  }
+  return offset;
+}
+
+void Arm64Mir2Lir::AssignDataOffsets() {
+  /* Set up offsets for literals */
+  CodeOffset offset = data_offset_;
+
+  offset = AssignLiteralOffset(offset);
+
+  offset = AssignSwitchTablesOffset(offset);
+
+  total_size_ = AssignFillArrayDataOffset(offset);
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
new file mode 100644
index 0000000..c210816
--- /dev/null
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -0,0 +1,432 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This file contains codegen for the Thumb2 ISA. */
+
+#include "arm64_lir.h"
+#include "codegen_arm64.h"
+#include "dex/quick/mir_to_lir-inl.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+
+namespace art {
+
+bool Arm64Mir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir,
+                                  const InlineMethod& special) {
+  return Mir2Lir::GenSpecialCase(bb, mir, special);
+}
+
+/*
+ * The sparse table in the literal pool is an array of <key,displacement>
+ * pairs.  For each set, we'll load them as a pair using ldp.
+ * The test loop will look something like:
+ *
+ *   adr   r_base, <table>
+ *   ldr   r_val, [rA64_SP, v_reg_off]
+ *   mov   r_idx, #table_size
+ * loop:
+ *   cbz   r_idx, quit
+ *   ldp   r_key, r_disp, [r_base], #8
+ *   sub   r_idx, #1
+ *   cmp   r_val, r_key
+ *   b.ne  loop
+ *   adr   r_base, #0        ; This is the instruction from which we compute displacements
+ *   add   r_base, r_disp
+ *   br    r_base
+ * quit:
+ */
+void Arm64Mir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset,
+                                   RegLocation rl_src) {
+  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
+  if (cu_->verbose) {
+    DumpSparseSwitchTable(table);
+  }
+  // Add the table to the list - we'll process it later
+  SwitchTable *tab_rec =
+      static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData));
+  tab_rec->table = table;
+  tab_rec->vaddr = current_dalvik_offset_;
+  uint32_t size = table[1];
+  tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR));
+  switch_tables_.Insert(tab_rec);
+
+  // Get the switch value
+  rl_src = LoadValue(rl_src, kCoreReg);
+  RegStorage r_base = AllocTemp();
+  // Allocate key and disp temps.
+  RegStorage r_key = AllocTemp();
+  RegStorage r_disp = AllocTemp();
+  // Materialize a pointer to the switch table
+  NewLIR3(kA64Adr2xd, r_base.GetReg(), 0, WrapPointer(tab_rec));
+  // Set up r_idx
+  RegStorage r_idx = AllocTemp();
+  LoadConstant(r_idx, size);
+
+  // Entry of loop.
+  LIR* loop_entry = NewLIR0(kPseudoTargetLabel);
+  LIR* branch_out = NewLIR2(kA64Cbz2rt, r_idx.GetReg(), 0);
+
+  // Load next key/disp.
+  NewLIR4(kA64LdpPost4rrXD, r_key.GetReg(), r_disp.GetReg(), r_base.GetReg(), 2);
+  OpRegRegImm(kOpSub, r_idx, r_idx, 1);
+
+  // Go to next case, if key does not match.
+  OpRegReg(kOpCmp, r_key, rl_src.reg);
+  OpCondBranch(kCondNe, loop_entry);
+
+  // Key does match: branch to case label.
+  LIR* switch_label = NewLIR3(kA64Adr2xd, r_base.GetReg(), 0, -1);
+  tab_rec->anchor = switch_label;
+
+  // Add displacement to base branch address and go!
+  OpRegRegRegShift(kOpAdd, r_base.GetReg(), r_base.GetReg(), r_disp.GetReg(),
+                   ENCODE_NO_SHIFT, true);
+  NewLIR1(kA64Br1x, r_base.GetReg());
+
+  // Loop exit label.
+  LIR* loop_exit = NewLIR0(kPseudoTargetLabel);
+  branch_out->target = loop_exit;
+}
+
+
+void Arm64Mir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset,
+                                 RegLocation rl_src) {
+  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
+  if (cu_->verbose) {
+    DumpPackedSwitchTable(table);
+  }
+  // Add the table to the list - we'll process it later
+  SwitchTable *tab_rec =
+      static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable),  kArenaAllocData));
+  tab_rec->table = table;
+  tab_rec->vaddr = current_dalvik_offset_;
+  uint32_t size = table[1];
+  tab_rec->targets =
+      static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR));
+  switch_tables_.Insert(tab_rec);
+
+  // Get the switch value
+  rl_src = LoadValue(rl_src, kCoreReg);
+  RegStorage table_base = AllocTemp();
+  // Materialize a pointer to the switch table
+  NewLIR3(kA64Adr2xd, table_base.GetReg(), 0, WrapPointer(tab_rec));
+  int low_key = s4FromSwitchData(&table[2]);
+  RegStorage key_reg;
+  // Remove the bias, if necessary
+  if (low_key == 0) {
+    key_reg = rl_src.reg;
+  } else {
+    key_reg = AllocTemp();
+    OpRegRegImm(kOpSub, key_reg, rl_src.reg, low_key);
+  }
+  // Bounds check - if < 0 or >= size continue following switch
+  OpRegImm(kOpCmp, key_reg, size - 1);
+  LIR* branch_over = OpCondBranch(kCondHi, NULL);
+
+  // Load the displacement from the switch table
+  RegStorage disp_reg = AllocTemp();
+  LoadBaseIndexed(table_base, key_reg, disp_reg, 2, k32);
+
+  // Get base branch address.
+  RegStorage branch_reg = AllocTemp();
+  LIR* switch_label = NewLIR3(kA64Adr2xd, branch_reg.GetReg(), 0, -1);
+  tab_rec->anchor = switch_label;
+
+  // Add displacement to base branch address and go!
+  OpRegRegRegShift(kOpAdd, branch_reg.GetReg(), branch_reg.GetReg(), disp_reg.GetReg(),
+                   ENCODE_NO_SHIFT, true);
+  NewLIR1(kA64Br1x, branch_reg.GetReg());
+
+  // branch_over target here
+  LIR* target = NewLIR0(kPseudoTargetLabel);
+  branch_over->target = target;
+}
+
+/*
+ * Array data table format:
+ *  ushort ident = 0x0300   magic value
+ *  ushort width            width of each element in the table
+ *  uint   size             number of elements in the table
+ *  ubyte  data[size*width] table of data values (may contain a single-byte
+ *                          padding at the end)
+ *
+ * Total size is 4+(width * size + 1)/2 16-bit code units.
+ */
+void Arm64Mir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) {
+  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
+  // Add the table to the list - we'll process it later
+  FillArrayData *tab_rec =
+      static_cast<FillArrayData*>(arena_->Alloc(sizeof(FillArrayData), kArenaAllocData));
+  tab_rec->table = table;
+  tab_rec->vaddr = current_dalvik_offset_;
+  uint16_t width = tab_rec->table[1];
+  uint32_t size = tab_rec->table[2] | ((static_cast<uint32_t>(tab_rec->table[3])) << 16);
+  tab_rec->size = (size * width) + 8;
+
+  fill_array_data_.Insert(tab_rec);
+
+  // Making a call - use explicit registers
+  FlushAllRegs();   /* Everything to home location */
+  LoadValueDirectFixed(rl_src, rs_x0);
+  LoadWordDisp(rs_rA64_SELF, A64_QUICK_ENTRYPOINT_INT_OFFS(pHandleFillArrayData),
+               rs_rA64_LR);
+  // Materialize a pointer to the fill data image
+  NewLIR3(kA64Adr2xd, rx1, 0, WrapPointer(tab_rec));
+  ClobberCallerSave();
+  LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR);
+  MarkSafepointPC(call_inst);
+}
+
+/*
+ * Handle unlocked -> thin locked transition inline or else call out to quick entrypoint. For more
+ * details see monitor.cc.
+ */
+void Arm64Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
+  FlushAllRegs();
+  // FIXME: need separate LoadValues for object references.
+  LoadValueDirectFixed(rl_src, rs_x0);  // Get obj
+  LockCallTemps();  // Prepare for explicit register usage
+  constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
+  if (kArchVariantHasGoodBranchPredictor) {
+    LIR* null_check_branch = nullptr;
+    if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
+      null_check_branch = nullptr;  // No null check.
+    } else {
+      // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
+      if (Runtime::Current()->ExplicitNullChecks()) {
+        null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL);
+      }
+    }
+    Load32Disp(rs_rA64_SELF, A64_THREAD_THIN_LOCK_ID_OFFSET, rs_x2);
+    NewLIR3(kA64Ldxr2rX, rx1, rx0, mirror::Object::MonitorOffset().Int32Value() >> 2);
+    MarkPossibleNullPointerException(opt_flags);
+    LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_x1, 0, NULL);
+    NewLIR4(kA64Stxr3wrX, rx1, rx2, rx0, mirror::Object::MonitorOffset().Int32Value() >> 2);
+    LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_x1, 0, NULL);
+
+
+    LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
+    not_unlocked_branch->target = slow_path_target;
+    if (null_check_branch != nullptr) {
+      null_check_branch->target = slow_path_target;
+    }
+    // TODO: move to a slow path.
+    // Go expensive route - artLockObjectFromCode(obj);
+    LoadWordDisp(rs_rA64_SELF, A64_QUICK_ENTRYPOINT_INT_OFFS(pLockObject), rs_rA64_LR);
+    ClobberCallerSave();
+    LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR);
+    MarkSafepointPC(call_inst);
+
+    LIR* success_target = NewLIR0(kPseudoTargetLabel);
+    lock_success_branch->target = success_target;
+    GenMemBarrier(kLoadLoad);
+  } else {
+    // Explicit null-check as slow-path is entered using an IT.
+    GenNullCheck(rs_x0, opt_flags);
+    Load32Disp(rs_rA64_SELF, A64_THREAD_THIN_LOCK_ID_OFFSET, rs_x2);
+    MarkPossibleNullPointerException(opt_flags);
+    NewLIR3(kA64Ldxr2rX, rx1, rx0, mirror::Object::MonitorOffset().Int32Value() >> 2);
+    OpRegImm(kOpCmp, rs_x1, 0);
+    OpIT(kCondEq, "");
+    NewLIR4(kA64Stxr3wrX/*eq*/, rx1, rx2, rx0, mirror::Object::MonitorOffset().Int32Value() >> 2);
+    OpRegImm(kOpCmp, rs_x1, 0);
+    OpIT(kCondNe, "T");
+    // Go expensive route - artLockObjectFromCode(self, obj);
+    LoadWordDisp/*ne*/(rs_rA64_SELF, A64_QUICK_ENTRYPOINT_INT_OFFS(pLockObject), rs_rA64_LR);
+    ClobberCallerSave();
+    LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rA64_LR);
+    MarkSafepointPC(call_inst);
+    GenMemBarrier(kLoadLoad);
+  }
+}
+
+/*
+ * Handle thin locked -> unlocked transition inline or else call out to quick entrypoint. For more
+ * details see monitor.cc. Note the code below doesn't use ldrex/strex as the code holds the lock
+ * and can only give away ownership if its suspended.
+ */
+void Arm64Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
+  FlushAllRegs();
+  LoadValueDirectFixed(rl_src, rs_x0);  // Get obj
+  LockCallTemps();  // Prepare for explicit register usage
+  LIR* null_check_branch = nullptr;
+  Load32Disp(rs_rA64_SELF, A64_THREAD_THIN_LOCK_ID_OFFSET, rs_x2);
+  constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
+  if (kArchVariantHasGoodBranchPredictor) {
+    if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
+      null_check_branch = nullptr;  // No null check.
+    } else {
+      // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
+      if (Runtime::Current()->ExplicitNullChecks()) {
+        null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL);
+      }
+    }
+    Load32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_x1);
+    MarkPossibleNullPointerException(opt_flags);
+    LoadConstantNoClobber(rs_x3, 0);
+    LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_x1, rs_x2, NULL);
+    Store32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_x3);
+    LIR* unlock_success_branch = OpUnconditionalBranch(NULL);
+
+    LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
+    slow_unlock_branch->target = slow_path_target;
+    if (null_check_branch != nullptr) {
+      null_check_branch->target = slow_path_target;
+    }
+    // TODO: move to a slow path.
+    // Go expensive route - artUnlockObjectFromCode(obj);
+    LoadWordDisp(rs_rA64_SELF, A64_QUICK_ENTRYPOINT_INT_OFFS(pUnlockObject), rs_rA64_LR);
+    ClobberCallerSave();
+    LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR);
+    MarkSafepointPC(call_inst);
+
+    LIR* success_target = NewLIR0(kPseudoTargetLabel);
+    unlock_success_branch->target = success_target;
+    GenMemBarrier(kStoreLoad);
+  } else {
+    // Explicit null-check as slow-path is entered using an IT.
+    GenNullCheck(rs_x0, opt_flags);
+    Load32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_x1);  // Get lock
+    MarkPossibleNullPointerException(opt_flags);
+    Load32Disp(rs_rA64_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_x2);
+    LoadConstantNoClobber(rs_x3, 0);
+    // Is lock unheld on lock or held by us (==thread_id) on unlock?
+    OpRegReg(kOpCmp, rs_x1, rs_x2);
+    OpIT(kCondEq, "EE");
+    Store32Disp/*eq*/(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_x3);
+    // Go expensive route - UnlockObjectFromCode(obj);
+    LoadWordDisp/*ne*/(rs_rA64_SELF, A64_QUICK_ENTRYPOINT_INT_OFFS(pUnlockObject), rs_rA64_LR);
+    ClobberCallerSave();
+    LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rA64_LR);
+    MarkSafepointPC(call_inst);
+    GenMemBarrier(kStoreLoad);
+  }
+}
+
+void Arm64Mir2Lir::GenMoveException(RegLocation rl_dest) {
+  int ex_offset = A64_THREAD_EXCEPTION_INT_OFFS;
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  RegStorage reset_reg = AllocTemp();
+  Load32Disp(rs_rA64_SELF, ex_offset, rl_result.reg);
+  LoadConstant(reset_reg, 0);
+  Store32Disp(rs_rA64_SELF, ex_offset, reset_reg);
+  FreeTemp(reset_reg);
+  StoreValue(rl_dest, rl_result);
+}
+
+/*
+ * Mark garbage collection card. Skip if the value we're storing is null.
+ */
+void Arm64Mir2Lir::MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) {
+  RegStorage reg_card_base = AllocTemp();
+  RegStorage reg_card_no = AllocTemp();
+  LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL);
+  LoadWordDisp(rs_rA64_SELF, A64_THREAD_CARD_TABLE_INT_OFFS, reg_card_base);
+  OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
+  StoreBaseIndexed(reg_card_base, reg_card_no, reg_card_base, 0, kUnsignedByte);
+  LIR* target = NewLIR0(kPseudoTargetLabel);
+  branch_over->target = target;
+  FreeTemp(reg_card_base);
+  FreeTemp(reg_card_no);
+}
+
+void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
+  /*
+   * On entry, x0, x1, x2 & x3 are live.  Let the register allocation
+   * mechanism know so it doesn't try to use any of them when
+   * expanding the frame or flushing.  This leaves the utility
+   * code with a single temp: r12.  This should be enough.
+   */
+  LockTemp(rs_x0);
+  LockTemp(rs_x1);
+  LockTemp(rs_x2);
+  LockTemp(rs_x3);
+
+  /*
+   * We can safely skip the stack overflow check if we're
+   * a leaf *and* our frame size < fudge factor.
+   */
+  bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
+                            (static_cast<size_t>(frame_size_) <
+                            Thread::kStackOverflowReservedBytes));
+  NewLIR0(kPseudoMethodEntry);
+
+  if (!skip_overflow_check) {
+    LoadWordDisp(rs_rA64_SELF, A64_THREAD_STACK_END_INT_OFFS, rs_x12);
+    OpRegImm64(kOpSub, rs_rA64_SP, frame_size_, /*is_wide*/true);
+    if (Runtime::Current()->ExplicitStackOverflowChecks()) {
+      /* Load stack limit */
+      // TODO(Arm64): fix the line below:
+      // GenRegRegCheck(kCondUlt, rA64_SP, r12, kThrowStackOverflow);
+    } else {
+      // Implicit stack overflow check.
+      // Generate a load from [sp, #-framesize].  If this is in the stack
+      // redzone we will get a segmentation fault.
+      // TODO(Arm64): does the following really work or do we need a reg != rA64_ZR?
+      Load32Disp(rs_rA64_SP, 0, rs_wzr);
+      MarkPossibleStackOverflowException();
+    }
+  } else if (frame_size_ > 0) {
+    OpRegImm64(kOpSub, rs_rA64_SP, frame_size_, /*is_wide*/true);
+  }
+
+  /* Spill core callee saves */
+  if (core_spill_mask_) {
+    SpillCoreRegs(rs_rA64_SP, frame_size_, core_spill_mask_);
+  }
+  /* Need to spill any FP regs? */
+  if (num_fp_spills_) {
+    /*
+     * NOTE: fp spills are a little different from core spills in that
+     * they are pushed as a contiguous block.  When promoting from
+     * the fp set, we must allocate all singles from s16..highest-promoted
+     */
+    // TODO(Arm64): SpillFPRegs(rA64_SP, frame_size_, core_spill_mask_);
+  }
+
+  FlushIns(ArgLocs, rl_method);
+
+  FreeTemp(rs_x0);
+  FreeTemp(rs_x1);
+  FreeTemp(rs_x2);
+  FreeTemp(rs_x3);
+}
+
+void Arm64Mir2Lir::GenExitSequence() {
+  /*
+   * In the exit path, r0/r1 are live - make sure they aren't
+   * allocated by the register utilities as temps.
+   */
+  LockTemp(rs_x0);
+  LockTemp(rs_x1);
+
+  NewLIR0(kPseudoMethodExit);
+  /* Need to restore any FP callee saves? */
+  if (num_fp_spills_) {
+    // TODO(Arm64): UnspillFPRegs(num_fp_spills_);
+  }
+  if (core_spill_mask_) {
+    UnSpillCoreRegs(rs_rA64_SP, frame_size_, core_spill_mask_);
+  }
+
+  OpRegImm64(kOpAdd, rs_rA64_SP, frame_size_, /*is_wide*/true);
+  NewLIR0(kA64Ret);
+}
+
+void Arm64Mir2Lir::GenSpecialExitSequence() {
+  NewLIR0(kA64Ret);
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
new file mode 100644
index 0000000..903be10
--- /dev/null
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -0,0 +1,223 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_QUICK_ARM64_CODEGEN_ARM64_H_
+#define ART_COMPILER_DEX_QUICK_ARM64_CODEGEN_ARM64_H_
+
+#include "arm64_lir.h"
+#include "dex/compiler_internals.h"
+
+namespace art {
+
+class Arm64Mir2Lir : public Mir2Lir {
+  public:
+    Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
+
+    // Required for target - codegen helpers.
+    bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src,
+                            RegLocation rl_dest, int lit);
+    bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
+    LIR* CheckSuspendUsingLoad() OVERRIDE;
+    RegStorage LoadHelper(A64ThreadOffset offset);
+    LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
+                      OpSize size) OVERRIDE;
+    LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale,
+                         OpSize size) OVERRIDE;
+    LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
+                             RegStorage r_dest, OpSize size) OVERRIDE;
+    LIR* LoadConstantNoClobber(RegStorage r_dest, int value);
+    LIR* LoadConstantWide(RegStorage r_dest, int64_t value);
+    LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
+                       OpSize size) OVERRIDE;
+    LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale,
+                          OpSize size) OVERRIDE;
+    LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
+                              RegStorage r_src, OpSize size) OVERRIDE;
+    void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
+
+    // Required for target - register utilities.
+    RegStorage AllocTypedTemp(bool fp_hint, int reg_class);
+    RegStorage AllocTypedTempWide(bool fp_hint, int reg_class);
+    RegStorage TargetReg(SpecialTargetRegister reg);
+    RegStorage GetArgMappingToPhysicalReg(int arg_num);
+    RegLocation GetReturnAlt();
+    RegLocation GetReturnWideAlt();
+    RegLocation LocCReturn();
+    RegLocation LocCReturnDouble();
+    RegLocation LocCReturnFloat();
+    RegLocation LocCReturnWide();
+    uint64_t GetRegMaskCommon(RegStorage reg);
+    void AdjustSpillMask();
+    void ClobberCallerSave();
+    void FreeCallTemps();
+    void FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free);
+    void LockCallTemps();
+    void MarkPreservedSingle(int v_reg, RegStorage reg);
+    void MarkPreservedDouble(int v_reg, RegStorage reg);
+    void CompilerInitializeRegAlloc();
+    RegStorage AllocPreservedDouble(int s_reg);
+
+    // Required for target - miscellaneous.
+    void AssembleLIR();
+    uint32_t LinkFixupInsns(LIR* head_lir, LIR* tail_lir, CodeOffset offset);
+    int AssignInsnOffsets();
+    void AssignOffsets();
+    uint8_t* EncodeLIRs(uint8_t* write_pos, LIR* lir);
+    void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix);
+    void SetupTargetResourceMasks(LIR* lir, uint64_t flags);
+    const char* GetTargetInstFmt(int opcode);
+    const char* GetTargetInstName(int opcode);
+    std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr);
+    uint64_t GetPCUseDefEncoding();
+    uint64_t GetTargetInstFlags(int opcode);
+    int GetInsnSize(LIR* lir);
+    bool IsUnconditionalBranch(LIR* lir);
+
+    // Required for target - Dalvik-level generators.
+    void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
+                           RegLocation rl_src1, RegLocation rl_src2);
+    void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
+                     RegLocation rl_index, RegLocation rl_dest, int scale);
+    void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index,
+                     RegLocation rl_src, int scale, bool card_mark);
+    void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
+                           RegLocation rl_src1, RegLocation rl_shift);
+    void GenLongOp(OpKind op, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                    RegLocation rl_src2);
+    void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                    RegLocation rl_src2);
+    void GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                    RegLocation rl_src2);
+    void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                          RegLocation rl_src2);
+    void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                         RegLocation rl_src2);
+    void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                  RegLocation rl_src2);
+    void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
+    bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
+    bool GenInlinedMinMaxInt(CallInfo* info, bool is_min);
+    bool GenInlinedSqrt(CallInfo* info);
+    bool GenInlinedPeek(CallInfo* info, OpSize size);
+    bool GenInlinedPoke(CallInfo* info, OpSize size);
+    void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
+    void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                   RegLocation rl_src2);
+    void GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                    RegLocation rl_src2);
+    void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                    RegLocation rl_src2);
+    RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div);
+    RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div);
+    void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenDivZeroCheckWide(RegStorage reg);
+    void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method);
+    void GenExitSequence();
+    void GenSpecialExitSequence();
+    void GenFillArrayData(DexOffset table_offset, RegLocation rl_src);
+    void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double);
+    void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
+    void GenSelect(BasicBlock* bb, MIR* mir);
+    void GenMemBarrier(MemBarrierKind barrier_kind);
+    void GenMonitorEnter(int opt_flags, RegLocation rl_src);
+    void GenMonitorExit(int opt_flags, RegLocation rl_src);
+    void GenMoveException(RegLocation rl_dest);
+    void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit,
+                                       int first_bit, int second_bit);
+    void GenNegDouble(RegLocation rl_dest, RegLocation rl_src);
+    void GenNegFloat(RegLocation rl_dest, RegLocation rl_src);
+    void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
+    void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
+    bool GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& special);
+
+    uint32_t GenPairWise(uint32_t reg_mask, int* reg1, int* reg2);
+    void UnSpillCoreRegs(RegStorage base, int offset, uint32_t reg_mask);
+    void SpillCoreRegs(RegStorage base, int offset, uint32_t reg_mask);
+
+    // Required for target - single operation generators.
+    LIR* OpUnconditionalBranch(LIR* target);
+    LIR* OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target);
+    LIR* OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target);
+    LIR* OpCondBranch(ConditionCode cc, LIR* target);
+    LIR* OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target);
+    LIR* OpFpRegCopy(RegStorage r_dest, RegStorage r_src);
+    LIR* OpIT(ConditionCode cond, const char* guide);
+    void OpEndIT(LIR* it);
+    LIR* OpMem(OpKind op, RegStorage r_base, int disp);
+    LIR* OpPcRelLoad(RegStorage reg, LIR* target);
+    LIR* OpReg(OpKind op, RegStorage r_dest_src);
+    void OpRegCopy(RegStorage r_dest, RegStorage r_src);
+    LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src);
+    LIR* OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value, bool is_wide);
+    LIR* OpRegImm(OpKind op, RegStorage r_dest_src1, int value);
+    LIR* OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset);
+    LIR* OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2);
+    LIR* OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type);
+    LIR* OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type);
+    LIR* OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src);
+    LIR* OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value);
+    LIR* OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2);
+    LIR* OpTestSuspend(LIR* target);
+    LIR* OpThreadMem(OpKind op, A64ThreadOffset thread_offset);
+    LIR* OpVldm(RegStorage r_base, int count);
+    LIR* OpVstm(RegStorage r_base, int count);
+    void OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset);
+    void OpRegCopyWide(RegStorage dest, RegStorage src);
+    void OpTlsCmp(A64ThreadOffset offset, int val);
+
+    LIR* LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size);
+    LIR* StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src, OpSize size);
+    LIR* OpRegRegRegShift(OpKind op, int r_dest, int r_src1, int r_src2, int shift,
+                          bool is_wide = false);
+    LIR* OpRegRegShift(OpKind op, int r_dest_src1, int r_src2, int shift, bool is_wide = false);
+    static const ArmEncodingMap EncodingMap[kA64Last];
+    int EncodeShift(int code, int amount);
+    int EncodeExtend(int extend_type, int amount);
+    bool IsExtendEncoding(int encoded_value);
+    int EncodeLogicalImmediate(bool is_wide, uint64_t value);
+    uint64_t DecodeLogicalImmediate(bool is_wide, int value);
+
+    ArmConditionCode ArmConditionEncoding(ConditionCode code);
+    bool InexpensiveConstantInt(int32_t value);
+    bool InexpensiveConstantFloat(int32_t value);
+    bool InexpensiveConstantLong(int64_t value);
+    bool InexpensiveConstantDouble(int64_t value);
+
+    void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
+    int LoadArgRegs(CallInfo* info, int call_state,
+                    NextCallInsn next_call_insn,
+                    const MethodReference& target_method,
+                    uint32_t vtable_idx,
+                    uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
+                    bool skip_this);
+
+  private:
+    void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val,
+                                  ConditionCode ccode);
+    LIR* LoadFPConstantValue(int r_dest, int32_t value);
+    LIR* LoadFPConstantValueWide(int r_dest, int64_t value);
+    void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
+    void InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
+    void AssignDataOffsets();
+    RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
+                          bool is_div, bool check_zero);
+    RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_QUICK_ARM64_CODEGEN_ARM64_H_
diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc
new file mode 100644
index 0000000..c2a550e
--- /dev/null
+++ b/compiler/dex/quick/arm64/fp_arm64.cc
@@ -0,0 +1,322 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm64_lir.h"
+#include "codegen_arm64.h"
+#include "dex/quick/mir_to_lir-inl.h"
+
+namespace art {
+
+void Arm64Mir2Lir::GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest,
+                                   RegLocation rl_src1, RegLocation rl_src2) {
+  int op = kA64Brk1d;
+  RegLocation rl_result;
+
+  /*
+   * Don't attempt to optimize register usage since these opcodes call out to
+   * the handlers.
+   */
+  switch (opcode) {
+    case Instruction::ADD_FLOAT_2ADDR:
+    case Instruction::ADD_FLOAT:
+      op = kA64Fadd3fff;
+      break;
+    case Instruction::SUB_FLOAT_2ADDR:
+    case Instruction::SUB_FLOAT:
+      op = kA64Fsub3fff;
+      break;
+    case Instruction::DIV_FLOAT_2ADDR:
+    case Instruction::DIV_FLOAT:
+      op = kA64Fdiv3fff;
+      break;
+    case Instruction::MUL_FLOAT_2ADDR:
+    case Instruction::MUL_FLOAT:
+      op = kA64Fmul3fff;
+      break;
+    case Instruction::REM_FLOAT_2ADDR:
+    case Instruction::REM_FLOAT:
+      FlushAllRegs();   // Send everything to home location
+      CallRuntimeHelperRegLocationRegLocation(A64_QUICK_ENTRYPOINT_OFFSET(pFmodf), rl_src1, rl_src2,
+                                              false);
+      rl_result = GetReturn(true);
+      StoreValue(rl_dest, rl_result);
+      return;
+    case Instruction::NEG_FLOAT:
+      GenNegFloat(rl_dest, rl_src1);
+      return;
+    default:
+      LOG(FATAL) << "Unexpected opcode: " << opcode;
+  }
+  rl_src1 = LoadValue(rl_src1, kFPReg);
+  rl_src2 = LoadValue(rl_src2, kFPReg);
+  rl_result = EvalLoc(rl_dest, kFPReg, true);
+  NewLIR3(op, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+  StoreValue(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenArithOpDouble(Instruction::Code opcode,
+                                    RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
+  int op = kA64Brk1d;
+  RegLocation rl_result;
+
+  switch (opcode) {
+    case Instruction::ADD_DOUBLE_2ADDR:
+    case Instruction::ADD_DOUBLE:
+      op = kA64Fadd3fff;
+      break;
+    case Instruction::SUB_DOUBLE_2ADDR:
+    case Instruction::SUB_DOUBLE:
+      op = kA64Fsub3fff;
+      break;
+    case Instruction::DIV_DOUBLE_2ADDR:
+    case Instruction::DIV_DOUBLE:
+      op = kA64Fdiv3fff;
+      break;
+    case Instruction::MUL_DOUBLE_2ADDR:
+    case Instruction::MUL_DOUBLE:
+      op = kA64Fmul3fff;
+      break;
+    case Instruction::REM_DOUBLE_2ADDR:
+    case Instruction::REM_DOUBLE:
+      FlushAllRegs();   // Send everything to home location
+      CallRuntimeHelperRegLocationRegLocation(A64_QUICK_ENTRYPOINT_OFFSET(pFmod), rl_src1, rl_src2,
+                                              false);
+      rl_result = GetReturnWide(true);
+      StoreValueWide(rl_dest, rl_result);
+      return;
+    case Instruction::NEG_DOUBLE:
+      GenNegDouble(rl_dest, rl_src1);
+      return;
+    default:
+      LOG(FATAL) << "Unexpected opcode: " << opcode;
+  }
+
+  rl_src1 = LoadValueWide(rl_src1, kFPReg);
+  DCHECK(rl_src1.wide);
+  rl_src2 = LoadValueWide(rl_src2, kFPReg);
+  DCHECK(rl_src2.wide);
+  rl_result = EvalLoc(rl_dest, kFPReg, true);
+  DCHECK(rl_dest.wide);
+  DCHECK(rl_result.wide);
+  NewLIR3(FWIDE(op), rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+  StoreValueWide(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenConversion(Instruction::Code opcode,
+                                 RegLocation rl_dest, RegLocation rl_src) {
+  int op = kA64Brk1d;
+  RegLocation rl_result;
+
+  switch (opcode) {
+    case Instruction::INT_TO_FLOAT:
+      op = kA64Scvtf2fw;
+      break;
+    case Instruction::FLOAT_TO_INT:
+      op = kA64Fcvtzs2wf;
+      break;
+    case Instruction::DOUBLE_TO_FLOAT:
+      op = kA64Fcvt2sS;
+      break;
+    case Instruction::FLOAT_TO_DOUBLE:
+      op = kA64Fcvt2Ss;
+      break;
+    case Instruction::INT_TO_DOUBLE:
+      op = FWIDE(kA64Scvtf2fw);
+      break;
+    case Instruction::DOUBLE_TO_INT:
+      op = FWIDE(kA64Fcvtzs2wf);
+      break;
+    case Instruction::LONG_TO_DOUBLE:
+      op = FWIDE(kA64Scvtf2fx);
+      break;
+    case Instruction::FLOAT_TO_LONG:
+      op = kA64Fcvtzs2xf;
+      break;
+    case Instruction::LONG_TO_FLOAT:
+      op = kA64Scvtf2fx;
+      break;
+    case Instruction::DOUBLE_TO_LONG:
+      op = FWIDE(kA64Fcvtzs2xf);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected opcode: " << opcode;
+  }
+
+  if (rl_src.wide) {
+    rl_src = LoadValueWide(rl_src, kFPReg);
+  } else {
+    rl_src = LoadValue(rl_src, kFPReg);
+  }
+
+  rl_result = EvalLoc(rl_dest, kFPReg, true);
+  NewLIR2(op, rl_result.reg.GetReg(), rl_src.reg.GetReg());
+
+  if (rl_dest.wide) {
+    StoreValueWide(rl_dest, rl_result);
+  } else {
+    StoreValue(rl_dest, rl_result);
+  }
+}
+
+void Arm64Mir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias,
+                                     bool is_double) {
+  LIR* target = &block_label_list_[bb->taken];
+  RegLocation rl_src1;
+  RegLocation rl_src2;
+  if (is_double) {
+    rl_src1 = mir_graph_->GetSrcWide(mir, 0);
+    rl_src2 = mir_graph_->GetSrcWide(mir, 2);
+    rl_src1 = LoadValueWide(rl_src1, kFPReg);
+    rl_src2 = LoadValueWide(rl_src2, kFPReg);
+    NewLIR2(FWIDE(kA64Fcmp2ff), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+  } else {
+    rl_src1 = mir_graph_->GetSrc(mir, 0);
+    rl_src2 = mir_graph_->GetSrc(mir, 1);
+    rl_src1 = LoadValue(rl_src1, kFPReg);
+    rl_src2 = LoadValue(rl_src2, kFPReg);
+    NewLIR2(kA64Fcmp2ff, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+  }
+  ConditionCode ccode = mir->meta.ccode;
+  switch (ccode) {
+    case kCondEq:
+    case kCondNe:
+      break;
+    case kCondLt:
+      if (gt_bias) {
+        ccode = kCondMi;
+      }
+      break;
+    case kCondLe:
+      if (gt_bias) {
+        ccode = kCondLs;
+      }
+      break;
+    case kCondGt:
+      if (gt_bias) {
+        ccode = kCondHi;
+      }
+      break;
+    case kCondGe:
+      if (gt_bias) {
+        ccode = kCondUge;
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unexpected ccode: " << ccode;
+  }
+  OpCondBranch(ccode, target);
+}
+
+
+void Arm64Mir2Lir::GenCmpFP(Instruction::Code opcode, RegLocation rl_dest,
+                            RegLocation rl_src1, RegLocation rl_src2) {
+  bool is_double = false;
+  int default_result = -1;
+  RegLocation rl_result;
+
+  switch (opcode) {
+    case Instruction::CMPL_FLOAT:
+      is_double = false;
+      default_result = -1;
+      break;
+    case Instruction::CMPG_FLOAT:
+      is_double = false;
+      default_result = 1;
+      break;
+    case Instruction::CMPL_DOUBLE:
+      is_double = true;
+      default_result = -1;
+      break;
+    case Instruction::CMPG_DOUBLE:
+      is_double = true;
+      default_result = 1;
+      break;
+    default:
+      LOG(FATAL) << "Unexpected opcode: " << opcode;
+  }
+  if (is_double) {
+    rl_src1 = LoadValueWide(rl_src1, kFPReg);
+    rl_src2 = LoadValueWide(rl_src2, kFPReg);
+    // In case result vreg is also a src vreg, break association to avoid useless copy by EvalLoc()
+    ClobberSReg(rl_dest.s_reg_low);
+    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    LoadConstant(rl_result.reg, default_result);
+    NewLIR2(FWIDE(kA64Fcmp2ff), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+  } else {
+    rl_src1 = LoadValue(rl_src1, kFPReg);
+    rl_src2 = LoadValue(rl_src2, kFPReg);
+    // In case result vreg is also a srcvreg, break association to avoid useless copy by EvalLoc()
+    ClobberSReg(rl_dest.s_reg_low);
+    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    LoadConstant(rl_result.reg, default_result);
+    NewLIR2(kA64Fcmp2ff, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+  }
+  DCHECK(!rl_result.reg.IsFloat());
+
+  // TODO(Arm64): should we rather do this?
+  // csinc wD, wzr, wzr, eq
+  // csneg wD, wD, wD, le
+  // (which requires 2 instructions rather than 3)
+
+  // Rd = if cond then Rd else -Rd.
+  NewLIR4(kA64Csneg4rrrc, rl_result.reg.GetReg(), rl_result.reg.GetReg(),
+          rl_result.reg.GetReg(), (default_result == 1) ? kArmCondPl : kArmCondLe);
+  NewLIR4(kA64Csel4rrrc, rl_result.reg.GetReg(), rwzr, rl_result.reg.GetReg(),
+          kArmCondEq);
+  StoreValue(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenNegFloat(RegLocation rl_dest, RegLocation rl_src) {
+  RegLocation rl_result;
+  rl_src = LoadValue(rl_src, kFPReg);
+  rl_result = EvalLoc(rl_dest, kFPReg, true);
+  NewLIR2(kA64Fneg2ff, rl_result.reg.GetReg(), rl_src.reg.GetReg());
+  StoreValue(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) {
+  RegLocation rl_result;
+  rl_src = LoadValueWide(rl_src, kFPReg);
+  rl_result = EvalLoc(rl_dest, kFPReg, true);
+  NewLIR2(FWIDE(kA64Fneg2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg());
+  StoreValueWide(rl_dest, rl_result);
+}
+
+bool Arm64Mir2Lir::GenInlinedSqrt(CallInfo* info) {
+  // TODO(Arm64): implement this.
+  UNIMPLEMENTED(FATAL) << "GenInlinedSqrt not implemented for Arm64";
+
+  DCHECK_EQ(cu_->instruction_set, kArm64);
+  LIR *branch;
+  RegLocation rl_src = info->args[0];
+  RegLocation rl_dest = InlineTargetWide(info);  // double place for result
+  rl_src = LoadValueWide(rl_src, kFPReg);
+  RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
+  NewLIR2(FWIDE(kA64Fsqrt2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg());
+  NewLIR2(FWIDE(kA64Fcmp2ff), rl_result.reg.GetReg(), rl_result.reg.GetReg());
+  branch = NewLIR2(kA64B2ct, kArmCondEq, 0);
+  ClobberCallerSave();
+  LockCallTemps();  // Using fixed registers
+  RegStorage r_tgt = LoadHelper(A64_QUICK_ENTRYPOINT_OFFSET(pSqrt));
+  // NewLIR3(kThumb2Fmrrd, r0, r1, rl_src.reg.GetReg());
+  NewLIR1(kA64Blr1x, r_tgt.GetReg());
+  // NewLIR3(kThumb2Fmdrr, rl_result.reg.GetReg(), r0, r1);
+  branch->target = NewLIR0(kPseudoTargetLabel);
+  StoreValueWide(rl_dest, rl_result);
+  return true;
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
new file mode 100644
index 0000000..709f583
--- /dev/null
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -0,0 +1,1224 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This file contains codegen for the Thumb2 ISA. */
+
+#include "arm64_lir.h"
+#include "codegen_arm64.h"
+#include "dex/quick/mir_to_lir-inl.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "mirror/array.h"
+
+namespace art {
+
+LIR* Arm64Mir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) {
+  OpRegReg(kOpCmp, src1, src2);
+  return OpCondBranch(cond, target);
+}
+
+// TODO(Arm64): remove this.
+LIR* Arm64Mir2Lir::OpIT(ConditionCode ccode, const char* guide) {
+  LOG(FATAL) << "Unexpected use of OpIT for Arm64";
+  return NULL;
+}
+
+void Arm64Mir2Lir::OpEndIT(LIR* it) {
+  LOG(FATAL) << "Unexpected use of OpEndIT for Arm64";
+}
+
+/*
+ * 64-bit 3way compare function.
+ *     cmp   xA, xB
+ *     csinc wC, wzr, wzr, eq
+ *     csneg wC, wC, wC, le
+ */
+void Arm64Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
+                              RegLocation rl_src2) {
+  RegLocation rl_result;
+  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
+  rl_result = EvalLoc(rl_dest, kCoreReg, true);
+
+  OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
+  NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondEq);
+  NewLIR4(kA64Csneg4rrrc, rl_result.reg.GetReg(), rl_result.reg.GetReg(),
+          rl_result.reg.GetReg(), kArmCondLe);
+  StoreValue(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
+                                            int64_t val, ConditionCode ccode) {
+  LIR* taken = &block_label_list_[bb->taken];
+  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+
+  if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) {
+    ArmOpcode opcode = (ccode == kCondEq) ? kA64Cbz2rt : kA64Cbnz2rt;
+    LIR* branch = NewLIR2(WIDE(opcode), rl_src1.reg.GetLowReg(), 0);
+    branch->target = taken;
+  } else {
+    OpRegImm64(kOpCmp, rl_src1.reg, val, /*is_wide*/true);
+    OpCondBranch(ccode, taken);
+  }
+}
+
+void Arm64Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
+  // TODO(Arm64): implement this.
+  UNIMPLEMENTED(FATAL);
+
+  RegLocation rl_result;
+  RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
+  RegLocation rl_dest = mir_graph_->GetDest(mir);
+  rl_src = LoadValue(rl_src, kCoreReg);
+  ConditionCode ccode = mir->meta.ccode;
+  if (mir->ssa_rep->num_uses == 1) {
+    // CONST case
+    int true_val = mir->dalvikInsn.vB;
+    int false_val = mir->dalvikInsn.vC;
+    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    // Change kCondNe to kCondEq for the special cases below.
+    if (ccode == kCondNe) {
+      ccode = kCondEq;
+      std::swap(true_val, false_val);
+    }
+    bool cheap_false_val = InexpensiveConstantInt(false_val);
+    if (cheap_false_val && ccode == kCondEq && (true_val == 0 || true_val == -1)) {
+      OpRegRegImm(kOpSub, rl_result.reg, rl_src.reg, -true_val);
+      DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
+      OpIT(true_val == 0 ? kCondNe : kCondUge, "");
+      LoadConstant(rl_result.reg, false_val);
+      GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
+    } else if (cheap_false_val && ccode == kCondEq && true_val == 1) {
+      OpRegRegImm(kOpRsub, rl_result.reg, rl_src.reg, 1);
+      DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
+      OpIT(kCondLs, "");
+      LoadConstant(rl_result.reg, false_val);
+      GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
+    } else if (cheap_false_val && InexpensiveConstantInt(true_val)) {
+      OpRegImm(kOpCmp, rl_src.reg, 0);
+      OpIT(ccode, "E");
+      LoadConstant(rl_result.reg, true_val);
+      LoadConstant(rl_result.reg, false_val);
+      GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
+    } else {
+      // Unlikely case - could be tuned.
+      RegStorage t_reg1 = AllocTemp();
+      RegStorage t_reg2 = AllocTemp();
+      LoadConstant(t_reg1, true_val);
+      LoadConstant(t_reg2, false_val);
+      OpRegImm(kOpCmp, rl_src.reg, 0);
+      OpIT(ccode, "E");
+      OpRegCopy(rl_result.reg, t_reg1);
+      OpRegCopy(rl_result.reg, t_reg2);
+      GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
+    }
+  } else {
+    // MOVE case
+    RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]];
+    RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]];
+    rl_true = LoadValue(rl_true, kCoreReg);
+    rl_false = LoadValue(rl_false, kCoreReg);
+    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    OpRegImm(kOpCmp, rl_src.reg, 0);
+    if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) {  // Is the "true" case already in place?
+      OpIT(NegateComparison(ccode), "");
+      OpRegCopy(rl_result.reg, rl_false.reg);
+    } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) {  // False case in place?
+      OpIT(ccode, "");
+      OpRegCopy(rl_result.reg, rl_true.reg);
+    } else {  // Normal - select between the two.
+      OpIT(ccode, "E");
+      OpRegCopy(rl_result.reg, rl_true.reg);
+      OpRegCopy(rl_result.reg, rl_false.reg);
+    }
+    GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
+  }
+  StoreValue(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
+  // TODO(Arm64): implement this.
+  UNIMPLEMENTED(FATAL);
+
+  RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
+  RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
+  // Normalize such that if either operand is constant, src2 will be constant.
+  ConditionCode ccode = mir->meta.ccode;
+  if (rl_src1.is_const) {
+    std::swap(rl_src1, rl_src2);
+    ccode = FlipComparisonOrder(ccode);
+  }
+  if (rl_src2.is_const) {
+    RegLocation rl_temp = UpdateLocWide(rl_src2);
+    // Do special compare/branch against simple const operand if not already in registers.
+    int64_t val = mir_graph_->ConstantValueWide(rl_src2);
+    if ((rl_temp.location != kLocPhysReg)
+     /*&& ((ModifiedImmediate(Low32Bits(val)) >= 0) && (ModifiedImmediate(High32Bits(val)) >= 0))*/) {
+      GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode);
+      return;
+    }
+  }
+  LIR* taken = &block_label_list_[bb->taken];
+  LIR* not_taken = &block_label_list_[bb->fall_through];
+  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
+  OpRegReg(kOpCmp, rl_src1.reg.GetHigh(), rl_src2.reg.GetHigh());
+  switch (ccode) {
+    case kCondEq:
+      OpCondBranch(kCondNe, not_taken);
+      break;
+    case kCondNe:
+      OpCondBranch(kCondNe, taken);
+      break;
+    case kCondLt:
+      OpCondBranch(kCondLt, taken);
+      OpCondBranch(kCondGt, not_taken);
+      ccode = kCondUlt;
+      break;
+    case kCondLe:
+      OpCondBranch(kCondLt, taken);
+      OpCondBranch(kCondGt, not_taken);
+      ccode = kCondLs;
+      break;
+    case kCondGt:
+      OpCondBranch(kCondGt, taken);
+      OpCondBranch(kCondLt, not_taken);
+      ccode = kCondHi;
+      break;
+    case kCondGe:
+      OpCondBranch(kCondGt, taken);
+      OpCondBranch(kCondLt, not_taken);
+      ccode = kCondUge;
+      break;
+    default:
+      LOG(FATAL) << "Unexpected ccode: " << ccode;
+  }
+  OpRegReg(kOpCmp, rl_src1.reg.GetLow(), rl_src2.reg.GetLow());
+  OpCondBranch(ccode, taken);
+}
+
+/*
+ * Generate a register comparison to an immediate and branch.  Caller
+ * is responsible for setting branch target field.
+ */
+LIR* Arm64Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value,
+                                  LIR* target) {
+  LIR* branch;
+  ArmConditionCode arm_cond = ArmConditionEncoding(cond);
+  if (check_value == 0 && (arm_cond == kArmCondEq || arm_cond == kArmCondNe)) {
+    ArmOpcode opcode = (arm_cond == kArmCondEq) ? kA64Cbz2rt : kA64Cbnz2rt;
+    branch = NewLIR2(opcode, reg.GetReg(), 0);
+  } else {
+    OpRegImm(kOpCmp, reg, check_value);
+    branch = NewLIR2(kA64B2ct, arm_cond, 0);
+  }
+  branch->target = target;
+  return branch;
+}
+
+LIR* Arm64Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
+  bool dest_is_fp = r_dest.IsFloat();
+  bool src_is_fp = r_src.IsFloat();
+  ArmOpcode opcode = kA64Brk1d;
+  LIR* res;
+
+  if (LIKELY(dest_is_fp == src_is_fp)) {
+    if (LIKELY(!dest_is_fp)) {
+      // Core/core copy.
+      // Copies involving the sp register require a different instruction.
+      opcode = UNLIKELY(A64_REG_IS_SP(r_dest.GetReg())) ? kA64Add4RRdT : kA64Mov2rr;
+
+      // TODO(Arm64): kA64Add4RRdT formally has 4 args, but is used as a 2 args instruction.
+      //   This currently works because the other arguments are set to 0 by default. We should
+      //   rather introduce an alias kA64Mov2RR.
+
+      // core/core copy. Do a x/x copy only if both registers are x.
+      if (r_dest.Is64Bit() && r_src.Is64Bit()) {
+        opcode = WIDE(opcode);
+      }
+    } else {
+      // Float/float copy.
+      bool dest_is_double = r_dest.IsDouble();
+      bool src_is_double = r_src.IsDouble();
+
+      // We do not do float/double or double/float casts here.
+      DCHECK_EQ(dest_is_double, src_is_double);
+
+      // Homogeneous float/float copy.
+      opcode = (dest_is_double) ? FWIDE(kA64Fmov2ff) : kA64Fmov2ff;
+    }
+  } else {
+    // Inhomogeneous register copy.
+    if (dest_is_fp) {
+      if (r_dest.IsDouble()) {
+        opcode = kA64Fmov2Sx;
+      } else {
+        DCHECK(r_src.IsSingle());
+        opcode = kA64Fmov2sw;
+      }
+    } else {
+      if (r_src.IsDouble()) {
+        opcode = kA64Fmov2xS;
+      } else {
+        DCHECK(r_dest.Is32Bit());
+        opcode = kA64Fmov2ws;
+      }
+    }
+  }
+
+  res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg());
+
+  if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
+    res->flags.is_nop = true;
+  }
+
+  return res;
+}
+
+void Arm64Mir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
+  if (r_dest != r_src) {
+    LIR* res = OpRegCopyNoInsert(r_dest, r_src);
+    AppendLIR(res);
+  }
+}
+
+void Arm64Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
+  OpRegCopy(r_dest, r_src);
+}
+
+// Table of magic divisors
+struct MagicTable {
+  uint32_t magic;
+  uint32_t shift;
+  DividePattern pattern;
+};
+
+static const MagicTable magic_table[] = {
+  {0, 0, DivideNone},        // 0
+  {0, 0, DivideNone},        // 1
+  {0, 0, DivideNone},        // 2
+  {0x55555556, 0, Divide3},  // 3
+  {0, 0, DivideNone},        // 4
+  {0x66666667, 1, Divide5},  // 5
+  {0x2AAAAAAB, 0, Divide3},  // 6
+  {0x92492493, 2, Divide7},  // 7
+  {0, 0, DivideNone},        // 8
+  {0x38E38E39, 1, Divide5},  // 9
+  {0x66666667, 2, Divide5},  // 10
+  {0x2E8BA2E9, 1, Divide5},  // 11
+  {0x2AAAAAAB, 1, Divide5},  // 12
+  {0x4EC4EC4F, 2, Divide5},  // 13
+  {0x92492493, 3, Divide7},  // 14
+  {0x88888889, 3, Divide7},  // 15
+};
+
+// Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4)
+bool Arm64Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
+                                    RegLocation rl_src, RegLocation rl_dest, int lit) {
+  // TODO(Arm64): fix this for Arm64. Note: may be worth revisiting the magic table.
+  //   It should be possible subtracting one from all its entries, and using smaddl
+  //   to counteract this. The advantage is that integers should then be easier to
+  //   encode as logical immediates (0x55555555 rather than 0x55555556).
+  UNIMPLEMENTED(FATAL);
+
+  if ((lit < 0) || (lit >= static_cast<int>(sizeof(magic_table)/sizeof(magic_table[0])))) {
+    return false;
+  }
+  DividePattern pattern = magic_table[lit].pattern;
+  if (pattern == DivideNone) {
+    return false;
+  }
+  // Tuning: add rem patterns
+  if (!is_div) {
+    return false;
+  }
+
+  RegStorage r_magic = AllocTemp();
+  LoadConstant(r_magic, magic_table[lit].magic);
+  rl_src = LoadValue(rl_src, kCoreReg);
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  RegStorage r_hi = AllocTemp();
+  RegStorage r_lo = AllocTemp();
+  NewLIR4(kA64Smaddl4xwwx, r_lo.GetReg(), r_magic.GetReg(), rl_src.reg.GetReg(), rxzr);
+  switch (pattern) {
+    case Divide3:
+      OpRegRegRegShift(kOpSub, rl_result.reg.GetReg(), r_hi.GetReg(),
+               rl_src.reg.GetReg(), EncodeShift(kA64Asr, 31));
+      break;
+    case Divide5:
+      OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31);
+      OpRegRegRegShift(kOpRsub, rl_result.reg.GetReg(), r_lo.GetReg(), r_hi.GetReg(),
+               EncodeShift(kA64Asr, magic_table[lit].shift));
+      break;
+    case Divide7:
+      OpRegReg(kOpAdd, r_hi, rl_src.reg);
+      OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31);
+      OpRegRegRegShift(kOpRsub, rl_result.reg.GetReg(), r_lo.GetReg(), r_hi.GetReg(),
+               EncodeShift(kA64Asr, magic_table[lit].shift));
+      break;
+    default:
+      LOG(FATAL) << "Unexpected pattern: " << pattern;
+  }
+  StoreValue(rl_dest, rl_result);
+  return true;
+}
+
+bool Arm64Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) {
+  LOG(FATAL) << "Unexpected use of EasyMultiply for Arm64";
+  return false;
+}
+
+RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
+                      RegLocation rl_src2, bool is_div, bool check_zero) {
+  LOG(FATAL) << "Unexpected use of GenDivRem for Arm64";
+  return rl_dest;
+}
+
+RegLocation Arm64Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) {
+  LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm64";
+  return rl_dest;
+}
+
+RegLocation Arm64Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg1, int lit, bool is_div) {
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+
+  // Put the literal in a temp.
+  RegStorage lit_temp = AllocTemp();
+  LoadConstant(lit_temp, lit);
+  // Use the generic case for div/rem with arg2 in a register.
+  // TODO: The literal temp can be freed earlier during a modulus to reduce reg pressure.
+  rl_result = GenDivRem(rl_result, reg1, lit_temp, is_div);
+  FreeTemp(lit_temp);
+
+  return rl_result;
+}
+
+RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg1, RegStorage reg2,
+                                  bool is_div) {
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  if (is_div) {
+    // Simple case, use sdiv instruction.
+    OpRegRegReg(kOpDiv, rl_result.reg, reg1, reg2);
+  } else {
+    // Remainder case, use the following code:
+    // temp = reg1 / reg2      - integer division
+    // temp = temp * reg2
+    // dest = reg1 - temp
+
+    RegStorage temp = AllocTemp();
+    OpRegRegReg(kOpDiv, temp, reg1, reg2);
+    OpRegReg(kOpMul, temp, reg2);
+    OpRegRegReg(kOpSub, rl_result.reg, reg1, temp);
+    FreeTemp(temp);
+  }
+
+  return rl_result;
+}
+
+bool Arm64Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) {
+  // TODO(Arm64): implement this.
+  UNIMPLEMENTED(FATAL);
+
+  DCHECK_EQ(cu_->instruction_set, kThumb2);
+  RegLocation rl_src1 = info->args[0];
+  RegLocation rl_src2 = info->args[1];
+  rl_src1 = LoadValue(rl_src1, kCoreReg);
+  rl_src2 = LoadValue(rl_src2, kCoreReg);
+  RegLocation rl_dest = InlineTarget(info);
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
+  OpIT((is_min) ? kCondGt : kCondLt, "E");
+  OpRegReg(kOpMov, rl_result.reg, rl_src2.reg);
+  OpRegReg(kOpMov, rl_result.reg, rl_src1.reg);
+  GenBarrier();
+  StoreValue(rl_dest, rl_result);
+  return true;
+}
+
+bool Arm64Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
+  // TODO(Arm64): implement this.
+  UNIMPLEMENTED(WARNING);
+
+  RegLocation rl_src_address = info->args[0];  // long address
+  rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[1]
+  RegLocation rl_dest = InlineTarget(info);
+  RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  if (size == k64) {
+    // Fake unaligned LDRD by two unaligned LDR instructions on ARMv7 with SCTLR.A set to 0.
+    if (rl_address.reg.GetReg() != rl_result.reg.GetLowReg()) {
+      LoadWordDisp(rl_address.reg, 0, rl_result.reg.GetLow());
+      LoadWordDisp(rl_address.reg, 4, rl_result.reg.GetHigh());
+    } else {
+      LoadWordDisp(rl_address.reg, 4, rl_result.reg.GetHigh());
+      LoadWordDisp(rl_address.reg, 0, rl_result.reg.GetLow());
+    }
+    StoreValueWide(rl_dest, rl_result);
+  } else {
+    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
+    // Unaligned load with LDR and LDRSH is allowed on ARMv7 with SCTLR.A set to 0.
+    LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size);
+    StoreValue(rl_dest, rl_result);
+  }
+  return true;
+}
+
+bool Arm64Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
+  // TODO(Arm64): implement this.
+  UNIMPLEMENTED(WARNING);
+
+  RegLocation rl_src_address = info->args[0];  // long address
+  rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[1]
+  RegLocation rl_src_value = info->args[2];  // [size] value
+  RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
+  if (size == k64) {
+    // Fake unaligned STRD by two unaligned STR instructions on ARMv7 with SCTLR.A set to 0.
+    RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg);
+    StoreBaseDisp(rl_address.reg, 0, rl_value.reg.GetLow(), k32);
+    StoreBaseDisp(rl_address.reg, 4, rl_value.reg.GetHigh(), k32);
+  } else {
+    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
+    // Unaligned store with STR and STRSH is allowed on ARMv7 with SCTLR.A set to 0.
+    RegLocation rl_value = LoadValue(rl_src_value, kCoreReg);
+    StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size);
+  }
+  return true;
+}
+
+void Arm64Mir2Lir::OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset) {
+  LOG(FATAL) << "Unexpected use of OpLea for Arm64";
+}
+
+void Arm64Mir2Lir::OpTlsCmp(A64ThreadOffset offset, int val) {
+  LOG(FATAL) << "Unexpected use of OpTlsCmp for Arm64";
+}
+
+bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
+  // TODO(Arm64): implement this.
+  UNIMPLEMENTED(WARNING);
+
+  DCHECK_EQ(cu_->instruction_set, kThumb2);
+  // Unused - RegLocation rl_src_unsafe = info->args[0];
+  RegLocation rl_src_obj = info->args[1];  // Object - known non-null
+  RegLocation rl_src_offset = info->args[2];  // long low
+  rl_src_offset = NarrowRegLoc(rl_src_offset);  // ignore high half in info->args[3]
+  RegLocation rl_src_expected = info->args[4];  // int, long or Object
+  // If is_long, high half is in info->args[5]
+  RegLocation rl_src_new_value = info->args[is_long ? 6 : 5];  // int, long or Object
+  // If is_long, high half is in info->args[7]
+  RegLocation rl_dest = InlineTarget(info);  // boolean place for result
+
+  // We have only 5 temporary registers available and actually only 4 if the InlineTarget
+  // above locked one of the temps. For a straightforward CAS64 we need 7 registers:
+  // r_ptr (1), new_value (2), expected(2) and ldrexd result (2). If neither expected nor
+  // new_value is in a non-temp core register we shall reload them in the ldrex/strex loop
+  // into the same temps, reducing the number of required temps down to 5. We shall work
+  // around the potentially locked temp by using LR for r_ptr, unconditionally.
+  // TODO: Pass information about the need for more temps to the stack frame generation
+  // code so that we can rely on being able to allocate enough temps.
+  DCHECK(!GetRegInfo(rs_rA64_LR)->IsTemp());
+  MarkTemp(rs_rA64_LR);
+  FreeTemp(rs_rA64_LR);
+  LockTemp(rs_rA64_LR);
+  bool load_early = true;
+  if (is_long) {
+    RegStorage expected_reg = rl_src_expected.reg.IsPair() ? rl_src_expected.reg.GetLow() :
+        rl_src_expected.reg;
+    RegStorage new_val_reg = rl_src_new_value.reg.IsPair() ? rl_src_new_value.reg.GetLow() :
+        rl_src_new_value.reg;
+    bool expected_is_core_reg = rl_src_expected.location == kLocPhysReg && !expected_reg.IsFloat();
+    bool new_value_is_core_reg = rl_src_new_value.location == kLocPhysReg && !new_val_reg.IsFloat();
+    bool expected_is_good_reg = expected_is_core_reg && !IsTemp(expected_reg);
+    bool new_value_is_good_reg = new_value_is_core_reg && !IsTemp(new_val_reg);
+
+    if (!expected_is_good_reg && !new_value_is_good_reg) {
+      // None of expected/new_value is non-temp reg, need to load both late
+      load_early = false;
+      // Make sure they are not in the temp regs and the load will not be skipped.
+      if (expected_is_core_reg) {
+        FlushRegWide(rl_src_expected.reg);
+        ClobberSReg(rl_src_expected.s_reg_low);
+        ClobberSReg(GetSRegHi(rl_src_expected.s_reg_low));
+        rl_src_expected.location = kLocDalvikFrame;
+      }
+      if (new_value_is_core_reg) {
+        FlushRegWide(rl_src_new_value.reg);
+        ClobberSReg(rl_src_new_value.s_reg_low);
+        ClobberSReg(GetSRegHi(rl_src_new_value.s_reg_low));
+        rl_src_new_value.location = kLocDalvikFrame;
+      }
+    }
+  }
+
+  // Release store semantics, get the barrier out of the way.  TODO: revisit
+  GenMemBarrier(kStoreLoad);
+
+  RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
+  RegLocation rl_new_value;
+  if (!is_long) {
+    rl_new_value = LoadValue(rl_src_new_value, kCoreReg);
+  } else if (load_early) {
+    rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
+  }
+
+  if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
+    // Mark card for object assuming new value is stored.
+    MarkGCCard(rl_new_value.reg, rl_object.reg);
+  }
+
+  RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
+
+  RegStorage r_ptr = rs_rA64_LR;
+  OpRegRegReg(kOpAdd, r_ptr, rl_object.reg, rl_offset.reg);
+
+  // Free now unneeded rl_object and rl_offset to give more temps.
+  ClobberSReg(rl_object.s_reg_low);
+  FreeTemp(rl_object.reg);
+  ClobberSReg(rl_offset.s_reg_low);
+  FreeTemp(rl_offset.reg);
+
+  RegLocation rl_expected;
+  if (!is_long) {
+    rl_expected = LoadValue(rl_src_expected, kCoreReg);
+  } else if (load_early) {
+    rl_expected = LoadValueWide(rl_src_expected, kCoreReg);
+  } else {
+    // NOTE: partially defined rl_expected & rl_new_value - but we just want the regs.
+    int low_reg = AllocTemp().GetReg();
+    int high_reg = AllocTemp().GetReg();
+    rl_new_value.reg = RegStorage(RegStorage::k64BitPair, low_reg, high_reg);
+    rl_expected = rl_new_value;
+  }
+
+  // do {
+  //   tmp = [r_ptr] - expected;
+  // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
+  // result = tmp != 0;
+
+  RegStorage r_tmp = AllocTemp();
+  LIR* target = NewLIR0(kPseudoTargetLabel);
+
+  if (is_long) {
+    RegStorage r_tmp_high = AllocTemp();
+    if (!load_early) {
+      LoadValueDirectWide(rl_src_expected, rl_expected.reg);
+    }
+    NewLIR3(kA64Ldxr2rX, r_tmp.GetReg(), r_tmp_high.GetReg(), r_ptr.GetReg());
+    OpRegReg(kOpSub, r_tmp, rl_expected.reg.GetLow());
+    OpRegReg(kOpSub, r_tmp_high, rl_expected.reg.GetHigh());
+    if (!load_early) {
+      LoadValueDirectWide(rl_src_new_value, rl_new_value.reg);
+    }
+
+    LIR* branch1 = OpCmpImmBranch(kCondNe, r_tmp, 0, NULL);
+    LIR* branch2 = OpCmpImmBranch(kCondNe, r_tmp_high, 0, NULL);
+    NewLIR4(WIDE(kA64Stxr3wrX) /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(),
+            rl_new_value.reg.GetHighReg(), r_ptr.GetReg());
+    LIR* target2 = NewLIR0(kPseudoTargetLabel);
+    branch1->target = target2;
+    branch2->target = target2;
+    FreeTemp(r_tmp_high);  // Now unneeded
+
+  } else {
+    NewLIR3(kA64Ldxr2rX, r_tmp.GetReg(), r_ptr.GetReg(), 0);
+    OpRegReg(kOpSub, r_tmp, rl_expected.reg);
+    DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
+    OpIT(kCondEq, "T");
+    NewLIR4(kA64Stxr3wrX /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg(), 0);
+  }
+
+  // Still one conditional left from OpIT(kCondEq, "T") from either branch
+  OpRegImm(kOpCmp /* eq */, r_tmp, 1);
+  OpCondBranch(kCondEq, target);
+
+  if (!load_early) {
+    FreeTemp(rl_expected.reg);  // Now unneeded.
+  }
+
+  // result := (tmp1 != 0) ? 0 : 1;
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  OpRegRegImm(kOpRsub, rl_result.reg, r_tmp, 1);
+  DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
+  OpIT(kCondUlt, "");
+  LoadConstant(rl_result.reg, 0); /* cc */
+  FreeTemp(r_tmp);  // Now unneeded.
+
+  StoreValue(rl_dest, rl_result);
+
+  // Now, restore lr to its non-temp status.
+  Clobber(rs_rA64_LR);
+  UnmarkTemp(rs_rA64_LR);
+  return true;
+}
+
+LIR* Arm64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
+  return RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp), reg.GetReg(), 0, 0, 0, 0, target);
+}
+
+LIR* Arm64Mir2Lir::OpVldm(RegStorage r_base, int count) {
+  LOG(FATAL) << "Unexpected use of OpVldm for Arm64";
+  return NULL;
+}
+
+LIR* Arm64Mir2Lir::OpVstm(RegStorage r_base, int count) {
+  LOG(FATAL) << "Unexpected use of OpVstm for Arm64";
+  return NULL;
+}
+
+void Arm64Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
+                                               RegLocation rl_result, int lit,
+                                               int first_bit, int second_bit) {
+  OpRegRegRegShift(kOpAdd, rl_result.reg.GetReg(), rl_src.reg.GetReg(), rl_src.reg.GetReg(),
+                   EncodeShift(kA64Lsl, second_bit - first_bit));
+  if (first_bit != 0) {
+    OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit);
+  }
+}
+
+void Arm64Mir2Lir::GenDivZeroCheckWide(RegStorage reg) {
+  DCHECK(reg.IsPair());   // TODO: support k64BitSolo.
+  OpRegImm64(kOpCmp, reg, 0, /*is_wide*/true);
+  GenDivZeroCheck(kCondEq);
+}
+
+// TODO(Arm64): the function below should go.
+// Test suspend flag, return target of taken suspend branch
+LIR* Arm64Mir2Lir::OpTestSuspend(LIR* target) {
+  NewLIR3(kA64Subs3rRd, rA64_SUSPEND, rA64_SUSPEND, 1);
+  return OpCondBranch((target == NULL) ? kCondEq : kCondNe, target);
+}
+
+// Decrement register and branch on condition
+LIR* Arm64Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) {
+  // Combine sub & test using sub setflags encoding here
+  OpRegRegImm(kOpSub, reg, reg, 1);  // For value == 1, this should set flags.
+  DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
+  return OpCondBranch(c_code, target);
+}
+
+void Arm64Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
+#if ANDROID_SMP != 0
+  // Start off with using the last LIR as the barrier. If it is not enough, then we will generate one.
+  LIR* barrier = last_lir_insn_;
+
+  int dmb_flavor;
+  // TODO: revisit Arm barrier kinds
+  switch (barrier_kind) {
+    case kLoadStore: dmb_flavor = kISH; break;
+    case kLoadLoad: dmb_flavor = kISH; break;
+    case kStoreStore: dmb_flavor = kISHST; break;
+    case kStoreLoad: dmb_flavor = kISH; break;
+    default:
+      LOG(FATAL) << "Unexpected MemBarrierKind: " << barrier_kind;
+      dmb_flavor = kSY;  // quiet gcc.
+      break;
+  }
+
+  // If the same barrier already exists, don't generate another.
+  if (barrier == nullptr
+      || (barrier->opcode != kA64Dmb1B || barrier->operands[0] != dmb_flavor)) {
+    barrier = NewLIR1(kA64Dmb1B, dmb_flavor);
+  }
+
+  // At this point we must have a memory barrier. Mark it as a scheduling barrier as well.
+  DCHECK(!barrier->flags.use_def_invalid);
+  barrier->u.m.def_mask = ENCODE_ALL;
+#endif
+}
+
+void Arm64Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
+  rl_src = LoadValueWide(rl_src, kCoreReg);
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  RegStorage z_reg = AllocTemp();
+  LoadConstantNoClobber(z_reg, 0);
+  // Check for destructive overlap
+  if (rl_result.reg.GetLowReg() == rl_src.reg.GetHighReg()) {
+    RegStorage t_reg = AllocTemp();
+    OpRegRegReg(kOpSub, rl_result.reg.GetLow(), z_reg, rl_src.reg.GetLow());
+    OpRegRegReg(kOpSbc, rl_result.reg.GetHigh(), z_reg, t_reg);
+    FreeTemp(t_reg);
+  } else {
+    OpRegRegReg(kOpSub, rl_result.reg.GetLow(), z_reg, rl_src.reg.GetLow());
+    OpRegRegReg(kOpSbc, rl_result.reg.GetHigh(), z_reg, rl_src.reg.GetHigh());
+  }
+  FreeTemp(z_reg);
+  StoreValueWide(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenLongOp(OpKind op, RegLocation rl_dest, RegLocation rl_src1,
+                             RegLocation rl_src2) {
+  RegLocation rl_result;
+  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
+  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+  OpRegRegRegShift(op, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg(),
+                   ENCODE_NO_SHIFT, /*is_wide*/ true);
+  StoreValueWide(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest,
+                              RegLocation rl_src1, RegLocation rl_src2) {
+  GenLongOp(kOpMul, rl_dest, rl_src1, rl_src2);
+}
+
+void Arm64Mir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                              RegLocation rl_src2) {
+  GenLongOp(kOpAdd, rl_dest, rl_src1, rl_src2);
+}
+
+void Arm64Mir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                            RegLocation rl_src2) {
+  GenLongOp(kOpSub, rl_dest, rl_src1, rl_src2);
+}
+
+void Arm64Mir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                            RegLocation rl_src2) {
+  GenLongOp(kOpAnd, rl_dest, rl_src1, rl_src2);
+}
+
+void Arm64Mir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                           RegLocation rl_src2) {
+  GenLongOp(kOpOr, rl_dest, rl_src1, rl_src2);
+}
+
+void Arm64Mir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                            RegLocation rl_src2) {
+  GenLongOp(kOpXor, rl_dest, rl_src1, rl_src2);
+}
+
+/*
+ * Generate array load
+ */
+void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
+                             RegLocation rl_index, RegLocation rl_dest, int scale) {
+  // TODO(Arm64): check this.
+  UNIMPLEMENTED(WARNING);
+
+  RegisterClass reg_class = RegClassBySize(size);
+  int len_offset = mirror::Array::LengthOffset().Int32Value();
+  int data_offset;
+  RegLocation rl_result;
+  bool constant_index = rl_index.is_const;
+  rl_array = LoadValue(rl_array, kCoreReg);
+  if (!constant_index) {
+    rl_index = LoadValue(rl_index, kCoreReg);
+  }
+
+  if (rl_dest.wide) {
+    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
+  } else {
+    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
+  }
+
+  // If index is constant, just fold it into the data offset
+  if (constant_index) {
+    data_offset += mir_graph_->ConstantValue(rl_index) << scale;
+  }
+
+  /* null object? */
+  GenNullCheck(rl_array.reg, opt_flags);
+
+  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
+  RegStorage reg_len;
+  if (needs_range_check) {
+    reg_len = AllocTemp();
+    /* Get len */
+    Load32Disp(rl_array.reg, len_offset, reg_len);
+    MarkPossibleNullPointerException(opt_flags);
+  } else {
+    ForceImplicitNullCheck(rl_array.reg, opt_flags);
+  }
+  if (rl_dest.wide || rl_dest.fp || constant_index) {
+    RegStorage reg_ptr;
+    if (constant_index) {
+      reg_ptr = rl_array.reg;  // NOTE: must not alter reg_ptr in constant case.
+    } else {
+      // No special indexed operation, lea + load w/ displacement
+      reg_ptr = AllocTemp();
+      OpRegRegRegShift(kOpAdd, reg_ptr.GetReg(), rl_array.reg.GetReg(), rl_index.reg.GetReg(),
+                       EncodeShift(kA64Lsl, scale));
+      FreeTemp(rl_index.reg);
+    }
+    rl_result = EvalLoc(rl_dest, reg_class, true);
+
+    if (needs_range_check) {
+      if (constant_index) {
+        GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len);
+      } else {
+        GenArrayBoundsCheck(rl_index.reg, reg_len);
+      }
+      FreeTemp(reg_len);
+    }
+    LoadBaseDisp(reg_ptr, data_offset, rl_result.reg, size);
+    MarkPossibleNullPointerException(opt_flags);
+    if (!constant_index) {
+      FreeTemp(reg_ptr);
+    }
+    if (rl_dest.wide) {
+      StoreValueWide(rl_dest, rl_result);
+    } else {
+      StoreValue(rl_dest, rl_result);
+    }
+  } else {
+    // Offset base, then use indexed load
+    RegStorage reg_ptr = AllocTemp();
+    OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
+    FreeTemp(rl_array.reg);
+    rl_result = EvalLoc(rl_dest, reg_class, true);
+
+    if (needs_range_check) {
+      GenArrayBoundsCheck(rl_index.reg, reg_len);
+      FreeTemp(reg_len);
+    }
+    LoadBaseIndexed(reg_ptr, rl_index.reg, rl_result.reg, scale, size);
+    MarkPossibleNullPointerException(opt_flags);
+    FreeTemp(reg_ptr);
+    StoreValue(rl_dest, rl_result);
+  }
+}
+
+/*
+ * Generate array store
+ *
+ */
+void Arm64Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
+                             RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
+  // TODO(Arm64): check this.
+  UNIMPLEMENTED(WARNING);
+
+  RegisterClass reg_class = RegClassBySize(size);
+  int len_offset = mirror::Array::LengthOffset().Int32Value();
+  bool constant_index = rl_index.is_const;
+
+  int data_offset;
+  if (size == k64 || size == kDouble) {
+    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
+  } else {
+    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
+  }
+
+  // If index is constant, just fold it into the data offset.
+  if (constant_index) {
+    data_offset += mir_graph_->ConstantValue(rl_index) << scale;
+  }
+
+  rl_array = LoadValue(rl_array, kCoreReg);
+  if (!constant_index) {
+    rl_index = LoadValue(rl_index, kCoreReg);
+  }
+
+  RegStorage reg_ptr;
+  bool allocated_reg_ptr_temp = false;
+  if (constant_index) {
+    reg_ptr = rl_array.reg;
+  } else if (IsTemp(rl_array.reg) && !card_mark) {
+    Clobber(rl_array.reg);
+    reg_ptr = rl_array.reg;
+  } else {
+    allocated_reg_ptr_temp = true;
+    reg_ptr = AllocTemp();
+  }
+
+  /* null object? */
+  GenNullCheck(rl_array.reg, opt_flags);
+
+  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
+  RegStorage reg_len;
+  if (needs_range_check) {
+    reg_len = AllocTemp();
+    // NOTE: max live temps(4) here.
+    /* Get len */
+    Load32Disp(rl_array.reg, len_offset, reg_len);
+    MarkPossibleNullPointerException(opt_flags);
+  } else {
+    ForceImplicitNullCheck(rl_array.reg, opt_flags);
+  }
+  /* at this point, reg_ptr points to array, 2 live temps */
+  if (rl_src.wide || rl_src.fp || constant_index) {
+    if (rl_src.wide) {
+      rl_src = LoadValueWide(rl_src, reg_class);
+    } else {
+      rl_src = LoadValue(rl_src, reg_class);
+    }
+    if (!constant_index) {
+      OpRegRegRegShift(kOpAdd, reg_ptr.GetReg(), rl_array.reg.GetReg(), rl_index.reg.GetReg(),
+                       EncodeShift(kA64Lsl, scale));
+    }
+    if (needs_range_check) {
+      if (constant_index) {
+        GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len);
+      } else {
+        GenArrayBoundsCheck(rl_index.reg, reg_len);
+      }
+      FreeTemp(reg_len);
+    }
+
+    StoreBaseDisp(reg_ptr, data_offset, rl_src.reg, size);
+    MarkPossibleNullPointerException(opt_flags);
+  } else {
+    /* reg_ptr -> array data */
+    OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
+    rl_src = LoadValue(rl_src, reg_class);
+    if (needs_range_check) {
+      GenArrayBoundsCheck(rl_index.reg, reg_len);
+      FreeTemp(reg_len);
+    }
+    StoreBaseIndexed(reg_ptr, rl_index.reg, rl_src.reg, scale, size);
+    MarkPossibleNullPointerException(opt_flags);
+  }
+  if (allocated_reg_ptr_temp) {
+    FreeTemp(reg_ptr);
+  }
+  if (card_mark) {
+    MarkGCCard(rl_src.reg, rl_array.reg);
+  }
+}
+
+
+void Arm64Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode,
+                                   RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift) {
+  // TODO(Arm64): check this.
+  UNIMPLEMENTED(WARNING);
+
+  rl_src = LoadValueWide(rl_src, kCoreReg);
+  // Per spec, we only care about low 6 bits of shift amount.
+  int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
+  if (shift_amount == 0) {
+    StoreValueWide(rl_dest, rl_src);
+    return;
+  }
+  if (BadOverlap(rl_src, rl_dest)) {
+    GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift);
+    return;
+  }
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  switch (opcode) {
+    case Instruction::SHL_LONG:
+    case Instruction::SHL_LONG_2ADDR:
+      if (shift_amount == 1) {
+        OpRegRegReg(kOpAdd, rl_result.reg.GetLow(), rl_src.reg.GetLow(), rl_src.reg.GetLow());
+        OpRegRegReg(kOpAdc, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), rl_src.reg.GetHigh());
+      } else if (shift_amount == 32) {
+        OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg);
+        LoadConstant(rl_result.reg.GetLow(), 0);
+      } else if (shift_amount > 31) {
+        OpRegRegImm(kOpLsl, rl_result.reg.GetHigh(), rl_src.reg.GetLow(), shift_amount - 32);
+        LoadConstant(rl_result.reg.GetLow(), 0);
+      } else {
+        OpRegRegImm(kOpLsl, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);
+        OpRegRegRegShift(kOpOr, rl_result.reg.GetHighReg(), rl_result.reg.GetHighReg(), rl_src.reg.GetLowReg(),
+                         EncodeShift(kA64Lsr, 32 - shift_amount));
+        OpRegRegImm(kOpLsl, rl_result.reg.GetLow(), rl_src.reg.GetLow(), shift_amount);
+      }
+      break;
+    case Instruction::SHR_LONG:
+    case Instruction::SHR_LONG_2ADDR:
+      if (shift_amount == 32) {
+        OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
+        OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 31);
+      } else if (shift_amount > 31) {
+        OpRegRegImm(kOpAsr, rl_result.reg.GetLow(), rl_src.reg.GetHigh(), shift_amount - 32);
+        OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 31);
+      } else {
+        RegStorage t_reg = AllocTemp();
+        OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetLow(), shift_amount);
+        OpRegRegRegShift(kOpOr, rl_result.reg.GetLowReg(), t_reg.GetReg(), rl_src.reg.GetHighReg(),
+                         EncodeShift(kA64Lsl, 32 - shift_amount));
+        FreeTemp(t_reg);
+        OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);
+      }
+      break;
+    case Instruction::USHR_LONG:
+    case Instruction::USHR_LONG_2ADDR:
+      if (shift_amount == 32) {
+        OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
+        LoadConstant(rl_result.reg.GetHigh(), 0);
+      } else if (shift_amount > 31) {
+        OpRegRegImm(kOpLsr, rl_result.reg.GetLow(), rl_src.reg.GetHigh(), shift_amount - 32);
+        LoadConstant(rl_result.reg.GetHigh(), 0);
+      } else {
+        RegStorage t_reg = AllocTemp();
+        OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetLow(), shift_amount);
+        OpRegRegRegShift(kOpOr, rl_result.reg.GetLowReg(), t_reg.GetReg(), rl_src.reg.GetHighReg(),
+                         EncodeShift(kA64Lsl, 32 - shift_amount));
+        FreeTemp(t_reg);
+        OpRegRegImm(kOpLsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unexpected case";
+  }
+  StoreValueWide(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
+                                     RegLocation rl_src1, RegLocation rl_src2) {
+  // TODO(Arm64): implement this.
+  UNIMPLEMENTED(WARNING);
+
+  if ((opcode == Instruction::SUB_LONG_2ADDR) || (opcode == Instruction::SUB_LONG)) {
+    if (!rl_src2.is_const) {
+      // Don't bother with special handling for subtract from immediate.
+      GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
+      return;
+    }
+  } else {
+    // Normalize
+    if (!rl_src2.is_const) {
+      DCHECK(rl_src1.is_const);
+      std::swap(rl_src1, rl_src2);
+    }
+  }
+  if (BadOverlap(rl_src1, rl_dest)) {
+    GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
+    return;
+  }
+  DCHECK(rl_src2.is_const);
+  // TODO(Arm64): implement this.
+  //  int64_t val = mir_graph_->ConstantValueWide(rl_src2);
+  int32_t mod_imm_lo = -1;  // ModifiedImmediate(val_lo);
+  int32_t mod_imm_hi = -1;  // ModifiedImmediate(val_hi);
+
+  // Only a subset of add/sub immediate instructions set carry - so bail if we don't fit
+  switch (opcode) {
+    case Instruction::ADD_LONG:
+    case Instruction::ADD_LONG_2ADDR:
+    case Instruction::SUB_LONG:
+    case Instruction::SUB_LONG_2ADDR:
+      if ((mod_imm_lo < 0) || (mod_imm_hi < 0)) {
+        GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
+        return;
+      }
+      break;
+    default:
+      break;
+  }
+  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  // NOTE: once we've done the EvalLoc on dest, we can no longer bail.
+  switch (opcode) {
+#if 0
+    case Instruction::ADD_LONG:
+    case Instruction::ADD_LONG_2ADDR:
+      NewLIR3(kThumb2AddRRI8M, rl_result.reg.GetLowReg(), rl_src1.reg.GetLowReg(), mod_imm_lo);
+      NewLIR3(kThumb2AdcRRI8M, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), mod_imm_hi);
+      break;
+    case Instruction::OR_LONG:
+    case Instruction::OR_LONG_2ADDR:
+      if ((val_lo != 0) || (rl_result.reg.GetLowReg() != rl_src1.reg.GetLowReg())) {
+        OpRegRegImm(kOpOr, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo);
+      }
+      if ((val_hi != 0) || (rl_result.reg.GetHighReg() != rl_src1.reg.GetHighReg())) {
+        OpRegRegImm(kOpOr, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi);
+      }
+      break;
+    case Instruction::XOR_LONG:
+    case Instruction::XOR_LONG_2ADDR:
+      OpRegRegImm(kOpXor, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo);
+      OpRegRegImm(kOpXor, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi);
+      break;
+    case Instruction::AND_LONG:
+    case Instruction::AND_LONG_2ADDR:
+      if ((val_lo != 0xffffffff) || (rl_result.reg.GetLowReg() != rl_src1.reg.GetLowReg())) {
+        OpRegRegImm(kOpAnd, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo);
+      }
+      if ((val_hi != 0xffffffff) || (rl_result.reg.GetHighReg() != rl_src1.reg.GetHighReg())) {
+        OpRegRegImm(kOpAnd, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi);
+      }
+      break;
+    case Instruction::SUB_LONG_2ADDR:
+    case Instruction::SUB_LONG:
+      NewLIR3(kThumb2SubRRI8M, rl_result.reg.GetLowReg(), rl_src1.reg.GetLowReg(), mod_imm_lo);
+      NewLIR3(kThumb2SbcRRI8M, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), mod_imm_hi);
+      break;
+#endif
+    default:
+      LOG(FATAL) << "Unexpected opcode " << opcode;
+  }
+  StoreValueWide(rl_dest, rl_result);
+}
+
+/**
+ * @brief Split a register list in pairs or registers.
+ *
+ * Given a list of registers in @p reg_mask, split the list in pairs. Use as follows:
+ * @code
+ *   int reg1 = -1, reg2 = -1;
+ *   while (reg_mask) {
+ *     reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
+ *     if (UNLIKELY(reg2 < 0)) {
+ *       // Single register in reg1.
+ *     } else {
+ *       // Pair in reg1, reg2.
+ *     }
+ *   }
+ * @endcode
+ */
+uint32_t Arm64Mir2Lir::GenPairWise(uint32_t reg_mask, int* reg1, int* reg2) {
+  // Find first register.
+  int first_bit_set = __builtin_ctz(reg_mask) + 1;
+  int reg = *reg1 + first_bit_set;
+  reg_mask >>= first_bit_set;
+
+  if (LIKELY(reg_mask)) {
+    // Save the first register, find the second and use the pair opcode.
+    int second_bit_set = __builtin_ctz(reg_mask) + 1;
+    *reg2 = reg;
+    reg_mask >>= second_bit_set;
+    *reg1 = reg + second_bit_set;
+    return reg_mask;
+  }
+
+  // Use the single opcode, as we just have one register.
+  *reg1 = reg;
+  *reg2 = -1;
+  return reg_mask;
+}
+
+void Arm64Mir2Lir::UnSpillCoreRegs(RegStorage base, int offset, uint32_t reg_mask) {
+  int reg1 = -1, reg2 = -1;
+  const int pop_log2_size = 3;
+
+  for (offset = (offset >> pop_log2_size) - 1; reg_mask; offset--) {
+     reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
+    if (UNLIKELY(reg2 < 0)) {
+      // TODO(Arm64): replace Solo32 with Solo64, once rxN are defined properly.
+      NewLIR3(WIDE(kA64Ldr3rXD), RegStorage::Solo32(reg1).GetReg(), base.GetReg(), offset);
+    } else {
+      // TODO(Arm64): replace Solo32 with Solo64 (twice below), once rxN are defined properly.
+      NewLIR4(WIDE(kA64Ldp4rrXD), RegStorage::Solo32(reg1).GetReg(),
+              RegStorage::Solo32(reg2).GetReg(), base.GetReg(), offset);
+    }
+  }
+}
+
+void Arm64Mir2Lir::SpillCoreRegs(RegStorage base, int offset, uint32_t reg_mask) {
+  int reg1 = -1, reg2 = -1;
+  const int pop_log2_size = 3;
+
+  for (offset = (offset >> pop_log2_size) - 1; reg_mask; offset--) {
+    reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
+    if (UNLIKELY(reg2 < 0)) {
+      // TODO(Arm64): replace Solo32 with Solo64, once rxN are defined properly.
+      NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo32(reg1).GetReg(), base.GetReg(), offset);
+    } else {
+      // TODO(Arm64): replace Solo32 with Solo64 (twice below), once rxN are defined properly.
+      NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo32(reg1).GetReg(),
+              RegStorage::Solo32(reg2).GetReg(), base.GetReg(), offset);
+    }
+  }
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
new file mode 100644
index 0000000..7e07e15
--- /dev/null
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -0,0 +1,940 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "codegen_arm64.h"
+
+#include <inttypes.h>
+
+#include <string>
+
+#include "dex/compiler_internals.h"
+#include "dex/quick/mir_to_lir-inl.h"
+
+namespace art {
+
+// TODO: rework this when c++11 support allows.
+static const RegStorage core_regs_arr[] =
+    {rs_x0, rs_x1, rs_x2, rs_x3, rs_x4, rs_x5, rs_x6, rs_x7,
+     rs_x8, rs_x9, rs_x10, rs_x11, rs_x12, rs_x13, rs_x14, rs_x15,
+     rs_x16, rs_x17, rs_x18, rs_x19, rs_x20, rs_x21, rs_x22, rs_x23,
+     rs_x24, rs_x25, rs_x26, rs_x27, rs_x28, rs_x29, rs_x30, rs_x31};
+static const RegStorage sp_regs_arr[] =
+    {rs_f0, rs_f1, rs_f2, rs_f3, rs_f4, rs_f5, rs_f6, rs_f7,
+     rs_f8, rs_f9, rs_f10, rs_f11, rs_f12, rs_f13, rs_f14, rs_f15,
+     rs_f16, rs_f17, rs_f18, rs_f19, rs_f20, rs_f21, rs_f22, rs_f23,
+     rs_f24, rs_f25, rs_f26, rs_f27, rs_f28, rs_f29, rs_f30, rs_f31};
+static const RegStorage dp_regs_arr[] =
+    {rs_d0, rs_d1, rs_d2, rs_d3, rs_d4, rs_d5, rs_d6, rs_d7,
+     rs_d8, rs_d9, rs_d10, rs_d11, rs_d12, rs_d13, rs_d14, rs_d15};
+static const RegStorage reserved_regs_arr[] =
+    {rs_rA64_SUSPEND, rs_rA64_SELF, rs_rA64_SP, rs_rA64_LR};
+static const RegStorage core_temps_arr[] =
+    {rs_x0, rs_x1, rs_x2, rs_x3, rs_x12};
+static const RegStorage sp_temps_arr[] =
+    {rs_f0, rs_f1, rs_f2, rs_f3, rs_f4, rs_f5, rs_f6, rs_f7,
+     rs_f8, rs_f9, rs_f10, rs_f11, rs_f12, rs_f13, rs_f14, rs_f15};
+static const RegStorage dp_temps_arr[] =
+    {rs_d0, rs_d1, rs_d2, rs_d3, rs_d4, rs_d5, rs_d6, rs_d7};
+
+static const std::vector<RegStorage> core_regs(core_regs_arr,
+    core_regs_arr + arraysize(core_regs_arr));
+static const std::vector<RegStorage> sp_regs(sp_regs_arr,
+    sp_regs_arr + arraysize(sp_regs_arr));
+static const std::vector<RegStorage> dp_regs(dp_regs_arr,
+    dp_regs_arr + arraysize(dp_regs_arr));
+static const std::vector<RegStorage> reserved_regs(reserved_regs_arr,
+    reserved_regs_arr + arraysize(reserved_regs_arr));
+static const std::vector<RegStorage> core_temps(core_temps_arr,
+    core_temps_arr + arraysize(core_temps_arr));
+static const std::vector<RegStorage> sp_temps(sp_temps_arr, sp_temps_arr + arraysize(sp_temps_arr));
+static const std::vector<RegStorage> dp_temps(dp_temps_arr, dp_temps_arr + arraysize(dp_temps_arr));
+
+RegLocation Arm64Mir2Lir::LocCReturn() {
+  return arm_loc_c_return;
+}
+
+RegLocation Arm64Mir2Lir::LocCReturnWide() {
+  return arm_loc_c_return_wide;
+}
+
+RegLocation Arm64Mir2Lir::LocCReturnFloat() {
+  return arm_loc_c_return_float;
+}
+
+RegLocation Arm64Mir2Lir::LocCReturnDouble() {
+  return arm_loc_c_return_double;
+}
+
+// Return a target-dependent special register.
+RegStorage Arm64Mir2Lir::TargetReg(SpecialTargetRegister reg) {
+  // TODO(Arm64): this function doesn't work for hard-float ABI.
+  RegStorage res_reg = RegStorage::InvalidReg();
+  switch (reg) {
+    case kSelf: res_reg = rs_rA64_SELF; break;
+    case kSuspend: res_reg = rs_rA64_SUSPEND; break;
+    case kLr: res_reg =  rs_rA64_LR; break;
+    case kPc: res_reg = RegStorage::InvalidReg(); break;
+    case kSp: res_reg =  rs_rA64_SP; break;
+    case kArg0: res_reg = rs_x0; break;
+    case kArg1: res_reg = rs_x1; break;
+    case kArg2: res_reg = rs_x2; break;
+    case kArg3: res_reg = rs_x3; break;
+    case kFArg0: res_reg = rs_f0; break;
+    case kFArg1: res_reg = rs_f1; break;
+    case kFArg2: res_reg = rs_f2; break;
+    case kFArg3: res_reg = rs_f3; break;
+    case kRet0: res_reg = rs_x0; break;
+    case kRet1: res_reg = rs_x0; break;
+    case kInvokeTgt: res_reg = rs_rA64_LR; break;
+    case kHiddenArg: res_reg = rs_x12; break;
+    case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break;
+    case kCount: res_reg = RegStorage::InvalidReg(); break;
+  }
+  return res_reg;
+}
+
+RegStorage Arm64Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
+  return RegStorage::InvalidReg();
+}
+
+/*
+ * Decode the register id. This routine makes assumptions on the encoding made by RegStorage.
+ */
+uint64_t Arm64Mir2Lir::GetRegMaskCommon(RegStorage reg) {
+  // TODO(Arm64): this function depends too much on the internal RegStorage encoding. Refactor.
+
+  int reg_raw = reg.GetRawBits();
+  // Check if the shape mask is zero (i.e. invalid).
+  if (UNLIKELY(reg == rs_wzr || reg == rs_xzr)) {
+    // The zero register is not a true register. It is just an immediate zero.
+    return 0;
+  }
+
+  return UINT64_C(1) << (reg_raw & RegStorage::kRegTypeMask);
+}
+
+uint64_t Arm64Mir2Lir::GetPCUseDefEncoding() {
+  LOG(FATAL) << "Unexpected call to GetPCUseDefEncoding for Arm64";
+  return 0ULL;
+}
+
+// Arm64 specific setup.  TODO: inline?:
+void Arm64Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) {
+  DCHECK_EQ(cu_->instruction_set, kArm64);
+  DCHECK(!lir->flags.use_def_invalid);
+
+  // These flags are somewhat uncommon - bypass if we can.
+  if ((flags & (REG_DEF_SP | REG_USE_SP | REG_DEF_LR)) != 0) {
+    if (flags & REG_DEF_SP) {
+      lir->u.m.def_mask |= ENCODE_ARM_REG_SP;
+    }
+
+    if (flags & REG_USE_SP) {
+      lir->u.m.use_mask |= ENCODE_ARM_REG_SP;
+    }
+
+    if (flags & REG_DEF_LR) {
+      lir->u.m.def_mask |= ENCODE_ARM_REG_LR;
+    }
+  }
+}
+
+ArmConditionCode Arm64Mir2Lir::ArmConditionEncoding(ConditionCode ccode) {
+  ArmConditionCode res;
+  switch (ccode) {
+    case kCondEq: res = kArmCondEq; break;
+    case kCondNe: res = kArmCondNe; break;
+    case kCondCs: res = kArmCondCs; break;
+    case kCondCc: res = kArmCondCc; break;
+    case kCondUlt: res = kArmCondCc; break;
+    case kCondUge: res = kArmCondCs; break;
+    case kCondMi: res = kArmCondMi; break;
+    case kCondPl: res = kArmCondPl; break;
+    case kCondVs: res = kArmCondVs; break;
+    case kCondVc: res = kArmCondVc; break;
+    case kCondHi: res = kArmCondHi; break;
+    case kCondLs: res = kArmCondLs; break;
+    case kCondGe: res = kArmCondGe; break;
+    case kCondLt: res = kArmCondLt; break;
+    case kCondGt: res = kArmCondGt; break;
+    case kCondLe: res = kArmCondLe; break;
+    case kCondAl: res = kArmCondAl; break;
+    case kCondNv: res = kArmCondNv; break;
+    default:
+      LOG(FATAL) << "Bad condition code " << ccode;
+      res = static_cast<ArmConditionCode>(0);  // Quiet gcc
+  }
+  return res;
+}
+
+static const char *shift_names[4] = {
+  "lsl",
+  "lsr",
+  "asr",
+  "ror"
+};
+
+static const char* extend_names[8] = {
+  "uxtb",
+  "uxth",
+  "uxtw",
+  "uxtx",
+  "sxtb",
+  "sxth",
+  "sxtw",
+  "sxtx",
+};
+
+/* Decode and print a register extension (e.g. ", uxtb #1") */
+static void DecodeRegExtendOrShift(int operand, char *buf, size_t buf_size) {
+  if ((operand & (1 << 6)) == 0) {
+    const char *shift_name = shift_names[(operand >> 7) & 0x3];
+    int amount = operand & 0x3f;
+    snprintf(buf, buf_size, ", %s #%d", shift_name, amount);
+  } else {
+    const char *extend_name = extend_names[(operand >> 3) & 0x7];
+    int amount = operand & 0x7;
+    if (amount == 0) {
+      snprintf(buf, buf_size, ", %s", extend_name);
+    } else {
+      snprintf(buf, buf_size, ", %s #%d", extend_name, amount);
+    }
+  }
+}
+
+#define BIT_MASK(w) ((UINT64_C(1) << (w)) - UINT64_C(1))
+
+static uint64_t RotateRight(uint64_t value, unsigned rotate, unsigned width) {
+  DCHECK_LE(width, 64U);
+  rotate &= 63;
+  value = value & BIT_MASK(width);
+  return ((value & BIT_MASK(rotate)) << (width - rotate)) | (value >> rotate);
+}
+
+static uint64_t RepeatBitsAcrossReg(bool is_wide, uint64_t value, unsigned width) {
+  unsigned i;
+  unsigned reg_size = (is_wide) ? 64 : 32;
+  uint64_t result = value & BIT_MASK(width);
+  DCHECK_NE(width, reg_size);
+  for (i = width; i < reg_size; i *= 2) {
+    result |= (result << i);
+  }
+  DCHECK_EQ(i, reg_size);
+  return result;
+}
+
+/**
+ * @brief Decode an immediate in the form required by logical instructions.
+ *
+ * @param is_wide Whether @p value encodes a 64-bit (as opposed to 32-bit) immediate.
+ * @param value The encoded logical immediates that is to be decoded.
+ * @return The decoded logical immediate.
+ * @note This is the inverse of Arm64Mir2Lir::EncodeLogicalImmediate().
+ */
+uint64_t Arm64Mir2Lir::DecodeLogicalImmediate(bool is_wide, int value) {
+  unsigned n     = (value >> 12) & 0x01;
+  unsigned imm_r = (value >>  6) & 0x3f;
+  unsigned imm_s = (value >>  0) & 0x3f;
+
+  // An integer is constructed from the n, imm_s and imm_r bits according to
+  // the following table:
+  //
+  // N   imms immr  size S             R
+  // 1 ssssss rrrrrr 64  UInt(ssssss) UInt(rrrrrr)
+  // 0 0sssss xrrrrr 32  UInt(sssss)  UInt(rrrrr)
+  // 0 10ssss xxrrrr 16  UInt(ssss)   UInt(rrrr)
+  // 0 110sss xxxrrr 8   UInt(sss)    UInt(rrr)
+  // 0 1110ss xxxxrr 4   UInt(ss)     UInt(rr)
+  // 0 11110s xxxxxr 2   UInt(s)      UInt(r)
+  // (s bits must not be all set)
+  //
+  // A pattern is constructed of size bits, where the least significant S+1
+  // bits are set. The pattern is rotated right by R, and repeated across a
+  // 32 or 64-bit value, depending on destination register width.
+
+  if (n == 1) {
+    DCHECK_NE(imm_s, 0x3fU);
+    uint64_t bits = BIT_MASK(imm_s + 1);
+    return RotateRight(bits, imm_r, 64);
+  } else {
+    DCHECK_NE((imm_s >> 1), 0x1fU);
+    for (unsigned width = 0x20; width >= 0x2; width >>= 1) {
+      if ((imm_s & width) == 0) {
+        unsigned mask = (unsigned)(width - 1);
+        DCHECK_NE((imm_s & mask), mask);
+        uint64_t bits = BIT_MASK((imm_s & mask) + 1);
+        return RepeatBitsAcrossReg(is_wide, RotateRight(bits, imm_r & mask, width), width);
+      }
+    }
+  }
+  return 0;
+}
+
+/**
+ * @brief Decode an 8-bit single point number encoded with EncodeImmSingle().
+ */
+static float DecodeImmSingle(uint8_t small_float) {
+  int mantissa = (small_float & 0x0f) + 0x10;
+  int sign = ((small_float & 0x80) == 0) ? 1 : -1;
+  float signed_mantissa = static_cast<float>(sign*mantissa);
+  int exponent = (((small_float >> 4) & 0x7) + 4) & 0x7;
+  return signed_mantissa*static_cast<float>(1 << exponent)*0.0078125f;
+}
+
+static const char* cc_names[] = {"eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
+                                 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"};
+/*
+ * Interpret a format string and build a string no longer than size
+ * See format key in assemble_arm64.cc.
+ */
+std::string Arm64Mir2Lir::BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr) {
+  std::string buf;
+  const char* fmt_end = &fmt[strlen(fmt)];
+  char tbuf[256];
+  const char* name;
+  char nc;
+  while (fmt < fmt_end) {
+    int operand;
+    if (*fmt == '!') {
+      fmt++;
+      DCHECK_LT(fmt, fmt_end);
+      nc = *fmt++;
+      if (nc == '!') {
+        strcpy(tbuf, "!");
+      } else {
+         DCHECK_LT(fmt, fmt_end);
+         DCHECK_LT(static_cast<unsigned>(nc-'0'), 4U);
+         operand = lir->operands[nc-'0'];
+         switch (*fmt++) {
+           case 'e':  {
+               // Omit ", uxtw #0" in strings like "add w0, w1, w3, uxtw #0" and
+               // ", uxtx #0" in strings like "add x0, x1, x3, uxtx #0"
+               int omittable = ((IS_WIDE(lir->opcode)) ? EncodeExtend(kA64Uxtw, 0) :
+                                EncodeExtend(kA64Uxtw, 0));
+               if (LIKELY(operand == omittable)) {
+                 strcpy(tbuf, "");
+               } else {
+                 DecodeRegExtendOrShift(operand, tbuf, arraysize(tbuf));
+               }
+             }
+             break;
+           case 'o':
+             // Omit ", lsl #0"
+             if (LIKELY(operand == EncodeShift(kA64Lsl, 0))) {
+               strcpy(tbuf, "");
+             } else {
+               DecodeRegExtendOrShift(operand, tbuf, arraysize(tbuf));
+             }
+             break;
+           case 'B':
+             switch (operand) {
+               case kSY:
+                 name = "sy";
+                 break;
+               case kST:
+                 name = "st";
+                 break;
+               case kISH:
+                 name = "ish";
+                 break;
+               case kISHST:
+                 name = "ishst";
+                 break;
+               case kNSH:
+                 name = "nsh";
+                 break;
+               case kNSHST:
+                 name = "shst";
+                 break;
+               default:
+                 name = "DecodeError2";
+                 break;
+             }
+             strcpy(tbuf, name);
+             break;
+           case 's':
+             snprintf(tbuf, arraysize(tbuf), "s%d", operand & ARM_FP_REG_MASK);
+             break;
+           case 'S':
+             snprintf(tbuf, arraysize(tbuf), "d%d", operand & ARM_FP_REG_MASK);
+             break;
+           case 'f':
+             snprintf(tbuf, arraysize(tbuf), "%c%d", (IS_FWIDE(lir->opcode)) ? 'd' : 's',
+                      operand & ARM_FP_REG_MASK);
+             break;
+           case 'l': {
+               bool is_wide = IS_WIDE(lir->opcode);
+               uint64_t imm = DecodeLogicalImmediate(is_wide, operand);
+               snprintf(tbuf, arraysize(tbuf), "%" PRId64 " (%#" PRIx64 ")", imm, imm);
+             }
+             break;
+           case 'I':
+             snprintf(tbuf, arraysize(tbuf), "%f", DecodeImmSingle(operand));
+             break;
+           case 'M':
+             if (LIKELY(operand == 0))
+               strcpy(tbuf, "");
+             else
+               snprintf(tbuf, arraysize(tbuf), ", lsl #%d", 16*operand);
+             break;
+           case 'd':
+             snprintf(tbuf, arraysize(tbuf), "%d", operand);
+             break;
+           case 'w':
+             if (LIKELY(operand != rwzr))
+               snprintf(tbuf, arraysize(tbuf), "w%d", operand & RegStorage::kRegNumMask);
+             else
+               strcpy(tbuf, "wzr");
+             break;
+           case 'W':
+             if (LIKELY(operand != rwsp))
+               snprintf(tbuf, arraysize(tbuf), "w%d", operand & RegStorage::kRegNumMask);
+             else
+               strcpy(tbuf, "wsp");
+             break;
+           case 'x':
+             if (LIKELY(operand != rxzr))
+               snprintf(tbuf, arraysize(tbuf), "x%d", operand & RegStorage::kRegNumMask);
+             else
+               strcpy(tbuf, "xzr");
+             break;
+           case 'X':
+             if (LIKELY(operand != rsp))
+               snprintf(tbuf, arraysize(tbuf), "x%d", operand & RegStorage::kRegNumMask);
+             else
+               strcpy(tbuf, "sp");
+             break;
+           case 'D':
+             snprintf(tbuf, arraysize(tbuf), "%d", operand*((IS_WIDE(lir->opcode)) ? 8 : 4));
+             break;
+           case 'E':
+             snprintf(tbuf, arraysize(tbuf), "%d", operand*4);
+             break;
+           case 'F':
+             snprintf(tbuf, arraysize(tbuf), "%d", operand*2);
+             break;
+           case 'G':
+             if (LIKELY(operand == 0))
+               strcpy(tbuf, "");
+             else
+               strcpy(tbuf, (IS_WIDE(lir->opcode)) ? ", lsl #3" : ", lsl #2");
+             break;
+           case 'c':
+             strcpy(tbuf, cc_names[operand]);
+             break;
+           case 't':
+             snprintf(tbuf, arraysize(tbuf), "0x%08" PRIxPTR " (L%p)",
+                 reinterpret_cast<uintptr_t>(base_addr) + lir->offset + (operand << 2),
+                 lir->target);
+             break;
+           case 'r': {
+               bool is_wide = IS_WIDE(lir->opcode);
+               if (LIKELY(operand != rwzr && operand != rxzr)) {
+                 snprintf(tbuf, arraysize(tbuf), "%c%d", (is_wide) ? 'x' : 'w',
+                          operand & RegStorage::kRegNumMask);
+               } else {
+                 strcpy(tbuf, (is_wide) ? "xzr" : "wzr");
+               }
+             }
+             break;
+           case 'R': {
+               bool is_wide = IS_WIDE(lir->opcode);
+               if (LIKELY(operand != rwsp || operand != rsp)) {
+                 snprintf(tbuf, arraysize(tbuf), "%c%d", (is_wide) ? 'x' : 'w',
+                          operand & RegStorage::kRegNumMask);
+               } else {
+                 strcpy(tbuf, (is_wide) ? "sp" : "wsp");
+               }
+             }
+             break;
+           case 'p':
+             snprintf(tbuf, arraysize(tbuf), ".+%d (addr %#" PRIxPTR ")", 4*operand,
+                      reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4*operand);
+             break;
+           case 'T':
+             if (LIKELY(operand == 0))
+               strcpy(tbuf, "");
+             else if (operand == 1)
+               strcpy(tbuf, ", lsl #12");
+             else
+               strcpy(tbuf, ", DecodeError3");
+             break;
+           default:
+             strcpy(tbuf, "DecodeError1");
+             break;
+        }
+        buf += tbuf;
+      }
+    } else {
+       buf += *fmt++;
+    }
+  }
+  return buf;
+}
+
+void Arm64Mir2Lir::DumpResourceMask(LIR* arm_lir, uint64_t mask, const char* prefix) {
+  char buf[256];
+  buf[0] = 0;
+
+  if (mask == ENCODE_ALL) {
+    strcpy(buf, "all");
+  } else {
+    char num[8];
+    int i;
+
+    for (i = 0; i < kArmRegEnd; i++) {
+      if (mask & (1ULL << i)) {
+        snprintf(num, arraysize(num), "%d ", i);
+        strcat(buf, num);
+      }
+    }
+
+    if (mask & ENCODE_CCODE) {
+      strcat(buf, "cc ");
+    }
+    if (mask & ENCODE_FP_STATUS) {
+      strcat(buf, "fpcc ");
+    }
+
+    /* Memory bits */
+    if (arm_lir && (mask & ENCODE_DALVIK_REG)) {
+      snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s",
+               DECODE_ALIAS_INFO_REG(arm_lir->flags.alias_info),
+               DECODE_ALIAS_INFO_WIDE(arm_lir->flags.alias_info) ? "(+1)" : "");
+    }
+    if (mask & ENCODE_LITERAL) {
+      strcat(buf, "lit ");
+    }
+
+    if (mask & ENCODE_HEAP_REF) {
+      strcat(buf, "heap ");
+    }
+    if (mask & ENCODE_MUST_NOT_ALIAS) {
+      strcat(buf, "noalias ");
+    }
+  }
+  if (buf[0]) {
+    LOG(INFO) << prefix << ": " << buf;
+  }
+}
+
+bool Arm64Mir2Lir::IsUnconditionalBranch(LIR* lir) {
+  return (lir->opcode == kA64B1t);
+}
+
+Arm64Mir2Lir::Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
+    : Mir2Lir(cu, mir_graph, arena) {
+  // Sanity check - make sure encoding map lines up.
+  for (int i = 0; i < kA64Last; i++) {
+    if (UNWIDE(Arm64Mir2Lir::EncodingMap[i].opcode) != i) {
+      LOG(FATAL) << "Encoding order for " << Arm64Mir2Lir::EncodingMap[i].name
+                 << " is wrong: expecting " << i << ", seeing "
+                 << static_cast<int>(Arm64Mir2Lir::EncodingMap[i].opcode);
+    }
+  }
+}
+
+Mir2Lir* Arm64CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
+                            ArenaAllocator* const arena) {
+  return new Arm64Mir2Lir(cu, mir_graph, arena);
+}
+
+// Alloc a pair of core registers, or a double.
+RegStorage Arm64Mir2Lir::AllocTypedTempWide(bool fp_hint, int reg_class) {
+  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
+    return AllocTempDouble();
+  } else {
+    RegStorage low_reg = AllocTemp();
+    RegStorage high_reg = AllocTemp();
+    return RegStorage::MakeRegPair(low_reg, high_reg);
+  }
+}
+
+RegStorage Arm64Mir2Lir::AllocTypedTemp(bool fp_hint, int reg_class) {
+  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg))
+    return AllocTempSingle();
+  return AllocTemp();
+}
+
+void Arm64Mir2Lir::CompilerInitializeRegAlloc() {
+  reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs, sp_regs, dp_regs, reserved_regs,
+                                        core_temps, sp_temps, dp_temps);
+
+  // Target-specific adjustments.
+
+  // Alias single precision floats to appropriate half of overlapping double.
+  GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->sp_regs_);
+  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
+    int sp_reg_num = info->GetReg().GetRegNum();
+    int dp_reg_num = sp_reg_num >> 1;
+    RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | dp_reg_num);
+    RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
+    // Double precision register's master storage should refer to itself.
+    DCHECK_EQ(dp_reg_info, dp_reg_info->Master());
+    // Redirect single precision's master storage to master.
+    info->SetMaster(dp_reg_info);
+    // Singles should show a single 32-bit mask bit, at first referring to the low half.
+    DCHECK_EQ(info->StorageMask(), 0x1U);
+    if (sp_reg_num & 1) {
+      // For odd singles, change to user the high word of the backing double.
+      info->SetStorageMask(0x2);
+    }
+  }
+
+  // TODO: re-enable this when we can safely save r4 over the suspension code path.
+  bool no_suspend = NO_SUSPEND;  // || !Runtime::Current()->ExplicitSuspendChecks();
+  if (no_suspend) {
+    GetRegInfo(rs_rA64_SUSPEND)->MarkFree();
+  }
+
+  // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods.
+  // TODO: adjust when we roll to hard float calling convention.
+  reg_pool_->next_core_reg_ = 2;
+  reg_pool_->next_sp_reg_ = 0;
+  reg_pool_->next_dp_reg_ = 0;
+}
+
+void Arm64Mir2Lir::FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) {
+  LOG(FATAL) << "Unexpected call to FreeRegLocTemps for Arm64";
+}
+
+/*
+ * TUNING: is true leaf?  Can't just use METHOD_IS_LEAF to determine as some
+ * instructions might call out to C/assembly helper functions.  Until
+ * machinery is in place, always spill lr.
+ */
+
+void Arm64Mir2Lir::AdjustSpillMask() {
+  core_spill_mask_ |= (1 << rs_rA64_LR.GetRegNum());
+  num_core_spills_++;
+}
+
+/*
+ * Mark a callee-save fp register as promoted.  Note that
+ * vpush/vpop uses contiguous register lists so we must
+ * include any holes in the mask.  Associate holes with
+ * Dalvik register INVALID_VREG (0xFFFFU).
+ */
+void Arm64Mir2Lir::MarkPreservedSingle(int v_reg, RegStorage reg) {
+  DCHECK_GE(reg.GetRegNum(), ARM_FP_CALLEE_SAVE_BASE);
+  int adjusted_reg_num = reg.GetRegNum() - ARM_FP_CALLEE_SAVE_BASE;
+  // Ensure fp_vmap_table is large enough
+  int table_size = fp_vmap_table_.size();
+  for (int i = table_size; i < (adjusted_reg_num + 1); i++) {
+    fp_vmap_table_.push_back(INVALID_VREG);
+  }
+  // Add the current mapping
+  fp_vmap_table_[adjusted_reg_num] = v_reg;
+  // Size of fp_vmap_table is high-water mark, use to set mask
+  num_fp_spills_ = fp_vmap_table_.size();
+  fp_spill_mask_ = ((1 << num_fp_spills_) - 1) << ARM_FP_CALLEE_SAVE_BASE;
+}
+
+void Arm64Mir2Lir::MarkPreservedDouble(int v_reg, RegStorage reg) {
+  // TEMP: perform as 2 singles.
+  int reg_num = reg.GetRegNum() << 1;
+  RegStorage lo = RegStorage::Solo32(RegStorage::kFloatingPoint | reg_num);
+  RegStorage hi = RegStorage::Solo32(RegStorage::kFloatingPoint | reg_num | 1);
+  MarkPreservedSingle(v_reg, lo);
+  MarkPreservedSingle(v_reg + 1, hi);
+}
+
+/* Clobber all regs that might be used by an external C call */
+void Arm64Mir2Lir::ClobberCallerSave() {
+  // TODO(Arm64): implement this.
+  UNIMPLEMENTED(WARNING);
+
+  Clobber(rs_x0);
+  Clobber(rs_x1);
+  Clobber(rs_x2);
+  Clobber(rs_x3);
+  Clobber(rs_x12);
+  Clobber(rs_x30);
+  Clobber(rs_f0);
+  Clobber(rs_f1);
+  Clobber(rs_f2);
+  Clobber(rs_f3);
+  Clobber(rs_f4);
+  Clobber(rs_f5);
+  Clobber(rs_f6);
+  Clobber(rs_f7);
+  Clobber(rs_f8);
+  Clobber(rs_f9);
+  Clobber(rs_f10);
+  Clobber(rs_f11);
+  Clobber(rs_f12);
+  Clobber(rs_f13);
+  Clobber(rs_f14);
+  Clobber(rs_f15);
+}
+
+RegLocation Arm64Mir2Lir::GetReturnWideAlt() {
+  RegLocation res = LocCReturnWide();
+  res.reg.SetReg(rx2);
+  res.reg.SetHighReg(rx3);
+  Clobber(rs_x2);
+  Clobber(rs_x3);
+  MarkInUse(rs_x2);
+  MarkInUse(rs_x3);
+  MarkWide(res.reg);
+  return res;
+}
+
+RegLocation Arm64Mir2Lir::GetReturnAlt() {
+  RegLocation res = LocCReturn();
+  res.reg.SetReg(rx1);
+  Clobber(rs_x1);
+  MarkInUse(rs_x1);
+  return res;
+}
+
+/* To be used when explicitly managing register use */
+void Arm64Mir2Lir::LockCallTemps() {
+  LockTemp(rs_x0);
+  LockTemp(rs_x1);
+  LockTemp(rs_x2);
+  LockTemp(rs_x3);
+}
+
+/* To be used when explicitly managing register use */
+void Arm64Mir2Lir::FreeCallTemps() {
+  FreeTemp(rs_x0);
+  FreeTemp(rs_x1);
+  FreeTemp(rs_x2);
+  FreeTemp(rs_x3);
+}
+
+RegStorage Arm64Mir2Lir::LoadHelper(A64ThreadOffset offset) {
+  // TODO(Arm64): use LoadWordDisp instead.
+  //   e.g. LoadWordDisp(rs_rA64_SELF, offset.Int32Value(), rs_rA64_LR);
+  LoadBaseDisp(rs_rA64_SELF, offset.Int32Value(), rs_rA64_LR, k64);
+  return rs_rA64_LR;
+}
+
+LIR* Arm64Mir2Lir::CheckSuspendUsingLoad() {
+  RegStorage tmp = rs_x0;
+  LoadWordDisp(rs_rA64_SELF, A64_THREAD_SUSPEND_TRIGGER_OFFSET, tmp);
+  LIR* load2 = LoadWordDisp(tmp, 0, tmp);
+  return load2;
+}
+
+uint64_t Arm64Mir2Lir::GetTargetInstFlags(int opcode) {
+  DCHECK(!IsPseudoLirOp(opcode));
+  return Arm64Mir2Lir::EncodingMap[UNWIDE(opcode)].flags;
+}
+
+const char* Arm64Mir2Lir::GetTargetInstName(int opcode) {
+  DCHECK(!IsPseudoLirOp(opcode));
+  return Arm64Mir2Lir::EncodingMap[UNWIDE(opcode)].name;
+}
+
+const char* Arm64Mir2Lir::GetTargetInstFmt(int opcode) {
+  DCHECK(!IsPseudoLirOp(opcode));
+  return Arm64Mir2Lir::EncodingMap[UNWIDE(opcode)].fmt;
+}
+
+/*
+ * Somewhat messy code here.  We want to allocate a pair of contiguous
+ * physical single-precision floating point registers starting with
+ * an even numbered reg.  It is possible that the paired s_reg (s_reg+1)
+ * has already been allocated - try to fit if possible.  Fail to
+ * allocate if we can't meet the requirements for the pair of
+ * s_reg<=sX[even] & (s_reg+1)<= sX+1.
+ */
+// TODO: needs rewrite to support non-backed 64-bit float regs.
+RegStorage Arm64Mir2Lir::AllocPreservedDouble(int s_reg) {
+  RegStorage res;
+  int v_reg = mir_graph_->SRegToVReg(s_reg);
+  int p_map_idx = SRegToPMap(s_reg);
+  if (promotion_map_[p_map_idx+1].fp_location == kLocPhysReg) {
+    // Upper reg is already allocated.  Can we fit?
+    int high_reg = promotion_map_[p_map_idx+1].FpReg;
+    if ((high_reg & 1) == 0) {
+      // High reg is even - fail.
+      return res;  // Invalid.
+    }
+    // Is the low reg of the pair free?
+    // FIXME: rework.
+    RegisterInfo* p = GetRegInfo(RegStorage::FloatSolo32(high_reg - 1));
+    if (p->InUse() || p->IsTemp()) {
+      // Already allocated or not preserved - fail.
+      return res;  // Invalid.
+    }
+    // OK - good to go.
+    res = RegStorage::FloatSolo64(p->GetReg().GetRegNum() >> 1);
+    p->MarkInUse();
+    MarkPreservedSingle(v_reg, p->GetReg());
+  } else {
+    /*
+     * TODO: until runtime support is in, make sure we avoid promoting the same vreg to
+     * different underlying physical registers.
+     */
+    GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->dp_regs_);
+    for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
+      if (!info->IsTemp() && !info->InUse()) {
+        res = info->GetReg();
+        info->MarkInUse();
+        MarkPreservedDouble(v_reg, info->GetReg());
+        break;
+      }
+    }
+  }
+  if (res.Valid()) {
+    promotion_map_[p_map_idx].fp_location = kLocPhysReg;
+    promotion_map_[p_map_idx].FpReg = res.DoubleToLowSingle().GetReg();
+    promotion_map_[p_map_idx+1].fp_location = kLocPhysReg;
+    promotion_map_[p_map_idx+1].FpReg = res.DoubleToHighSingle().GetReg();
+  }
+  return res;
+}
+
+// TODO(Arm64): reuse info in QuickArgumentVisitor?
+static RegStorage GetArgPhysicalReg(RegLocation* loc, int* num_gpr_used, int* num_fpr_used,
+                                    OpSize* op_size) {
+  if (loc->fp) {
+    int n = *num_fpr_used;
+    if (n < 8) {
+      *num_fpr_used = n + 1;
+      RegStorage::RegStorageKind reg_kind;
+      if (loc->wide) {
+        *op_size = kDouble;
+        reg_kind = RegStorage::k64BitSolo;
+      } else {
+        *op_size = kSingle;
+        reg_kind = RegStorage::k32BitSolo;
+      }
+      return RegStorage(RegStorage::kValid | reg_kind | RegStorage::kFloatingPoint | n);
+    }
+  } else {
+    int n = *num_gpr_used;
+    if (n < 7) {
+      *num_gpr_used = n + 1;
+      if (loc->wide) {
+        *op_size = k64;
+        return RegStorage::Solo64(n);
+      } else {
+        *op_size = k32;
+        return RegStorage::Solo32(n);
+      }
+    }
+  }
+
+  return RegStorage::InvalidReg();
+}
+
+/*
+ * If there are any ins passed in registers that have not been promoted
+ * to a callee-save register, flush them to the frame.  Perform initial
+ * assignment of promoted arguments.
+ *
+ * ArgLocs is an array of location records describing the incoming arguments
+ * with one location record per word of argument.
+ */
+void Arm64Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) {
+  int num_gpr_used = 1;
+  int num_fpr_used = 0;
+
+  /*
+   * Dummy up a RegLocation for the incoming Method*
+   * It will attempt to keep kArg0 live (or copy it to home location
+   * if promoted).
+   */
+  RegLocation rl_src = rl_method;
+  rl_src.location = kLocPhysReg;
+  rl_src.reg = TargetReg(kArg0);
+  rl_src.home = false;
+  MarkLive(rl_src);
+
+  // TODO(Arm64): compress the Method pointer?
+  StoreValueWide(rl_method, rl_src);
+
+  // If Method* has been promoted, explicitly flush
+  if (rl_method.location == kLocPhysReg) {
+    StoreWordDisp(TargetReg(kSp), 0, TargetReg(kArg0));
+  }
+
+  if (cu_->num_ins == 0) {
+    return;
+  }
+
+  int start_vreg = cu_->num_dalvik_registers - cu_->num_ins;
+  for (int i = 0; i < cu_->num_ins; i++) {
+    PromotionMap* v_map = &promotion_map_[start_vreg + i];
+    RegLocation* t_loc = &ArgLocs[i];
+    OpSize op_size;
+    RegStorage reg = GetArgPhysicalReg(t_loc, &num_gpr_used, &num_fpr_used, &op_size);
+
+    if (reg.Valid()) {
+      if ((v_map->core_location == kLocPhysReg) && !t_loc->fp) {
+        OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg);
+      } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) {
+        OpRegCopy(RegStorage::Solo32(v_map->FpReg), reg);
+      } else {
+        StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, op_size);
+        if (reg.Is64Bit()) {
+          if (SRegOffset(start_vreg + i) + 4 != SRegOffset(start_vreg + i + 1)) {
+            LOG(FATAL) << "64 bit value stored in non-consecutive 4 bytes slots";
+          }
+          i += 1;
+        }
+      }
+    } else {
+      // If arriving in frame & promoted
+      if (v_map->core_location == kLocPhysReg) {
+        LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i),
+                     RegStorage::Solo32(v_map->core_reg));
+      }
+      if (v_map->fp_location == kLocPhysReg) {
+        LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->FpReg));
+      }
+    }
+  }
+}
+
+int Arm64Mir2Lir::LoadArgRegs(CallInfo* info, int call_state,
+                              NextCallInsn next_call_insn,
+                              const MethodReference& target_method,
+                              uint32_t vtable_idx, uintptr_t direct_code,
+                              uintptr_t direct_method, InvokeType type, bool skip_this) {
+  int last_arg_reg = TargetReg(kArg3).GetReg();
+  int next_reg = TargetReg(kArg1).GetReg();
+  int next_arg = 0;
+  if (skip_this) {
+    next_reg++;
+    next_arg++;
+  }
+  for (; (next_reg <= last_arg_reg) && (next_arg < info->num_arg_words); next_reg++) {
+    RegLocation rl_arg = info->args[next_arg++];
+    rl_arg = UpdateRawLoc(rl_arg);
+    if (rl_arg.wide && (next_reg <= TargetReg(kArg2).GetReg())) {
+      RegStorage r_tmp(RegStorage::k64BitPair, next_reg, next_reg + 1);
+      LoadValueDirectWideFixed(rl_arg, r_tmp);
+      next_reg++;
+      next_arg++;
+    } else {
+      if (rl_arg.wide) {
+        rl_arg = NarrowRegLoc(rl_arg);
+        rl_arg.is_const = false;
+      }
+      LoadValueDirectFixed(rl_arg, RegStorage::Solo32(next_reg));
+    }
+    call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+                                direct_code, direct_method, type);
+  }
+  return call_state;
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
new file mode 100644
index 0000000..e46e201
--- /dev/null
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -0,0 +1,1072 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm64_lir.h"
+#include "codegen_arm64.h"
+#include "dex/quick/mir_to_lir-inl.h"
+
+namespace art {
+
+/* This file contains codegen for the A64 ISA. */
+
+static int32_t EncodeImmSingle(uint32_t bits) {
+  /*
+   * Valid values will have the form:
+   *
+   *   aBbb.bbbc.defg.h000.0000.0000.0000.0000
+   *
+   * where B = not(b). In other words, if b == 1, then B == 0 and viceversa.
+   */
+
+  // bits[19..0] are cleared.
+  if ((bits & 0x0007ffff) != 0)
+    return -1;
+
+  // bits[29..25] are all set or all cleared.
+  uint32_t b_pattern = (bits >> 16) & 0x3e00;
+  if (b_pattern != 0 && b_pattern != 0x3e00)
+    return -1;
+
+  // bit[30] and bit[29] are opposite.
+  if (((bits ^ (bits << 1)) & 0x40000000) == 0)
+    return -1;
+
+  // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000
+  // bit7: a000.0000
+  uint32_t bit7 = ((bits >> 31) & 0x1) << 7;
+  // bit6: 0b00.0000
+  uint32_t bit6 = ((bits >> 29) & 0x1) << 6;
+  // bit5_to_0: 00cd.efgh
+  uint32_t bit5_to_0 = (bits >> 19) & 0x3f;
+  return (bit7 | bit6 | bit5_to_0);
+}
+
+static int32_t EncodeImmDouble(uint64_t bits) {
+  /*
+   * Valid values will have the form:
+   *
+   *   aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
+   *   0000.0000.0000.0000.0000.0000.0000.0000
+   *
+   * where B = not(b).
+   */
+
+  // bits[47..0] are cleared.
+  if ((bits & UINT64_C(0xffffffffffff)) != 0)
+    return -1;
+
+  // bits[61..54] are all set or all cleared.
+  uint32_t b_pattern = (bits >> 48) & 0x3fc0;
+  if (b_pattern != 0 && b_pattern != 0x3fc0)
+    return -1;
+
+  // bit[62] and bit[61] are opposite.
+  if (((bits ^ (bits << 1)) & UINT64_C(0x4000000000000000)) == 0)
+    return -1;
+
+  // bit7: a000.0000
+  uint32_t bit7 = ((bits >> 63) & 0x1) << 7;
+  // bit6: 0b00.0000
+  uint32_t bit6 = ((bits >> 61) & 0x1) << 6;
+  // bit5_to_0: 00cd.efgh
+  uint32_t bit5_to_0 = (bits >> 48) & 0x3f;
+  return (bit7 | bit6 | bit5_to_0);
+}
+
+LIR* Arm64Mir2Lir::LoadFPConstantValue(int r_dest, int32_t value) {
+  DCHECK(RegStorage::IsSingle(r_dest));
+  if (value == 0) {
+    return NewLIR2(kA64Fmov2sw, r_dest, rwzr);
+  } else {
+    int32_t encoded_imm = EncodeImmSingle((uint32_t)value);
+    if (encoded_imm >= 0) {
+      return NewLIR2(kA64Fmov2fI, r_dest, encoded_imm);
+    }
+  }
+
+  LIR* data_target = ScanLiteralPool(literal_list_, value, 0);
+  if (data_target == NULL) {
+    data_target = AddWordData(&literal_list_, value);
+  }
+
+  LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kA64Ldr2fp,
+                            r_dest, 0, 0, 0, 0, data_target);
+  SetMemRefType(load_pc_rel, true, kLiteral);
+  AppendLIR(load_pc_rel);
+  return load_pc_rel;
+}
+
+LIR* Arm64Mir2Lir::LoadFPConstantValueWide(int r_dest, int64_t value) {
+  DCHECK(RegStorage::IsDouble(r_dest));
+  if (value == 0) {
+    return NewLIR2(kA64Fmov2Sx, r_dest, rwzr);
+  } else {
+    int32_t encoded_imm = EncodeImmDouble(value);
+    if (encoded_imm >= 0) {
+      return NewLIR2(FWIDE(kA64Fmov2fI), r_dest, encoded_imm);
+    }
+  }
+
+  // No short form - load from the literal pool.
+  int32_t val_lo = Low32Bits(value);
+  int32_t val_hi = High32Bits(value);
+  LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
+  if (data_target == NULL) {
+    data_target = AddWideData(&literal_list_, val_lo, val_hi);
+  }
+
+  DCHECK(RegStorage::IsFloat(r_dest));
+  LIR* load_pc_rel = RawLIR(current_dalvik_offset_, FWIDE(kA64Ldr2fp),
+                            r_dest, 0, 0, 0, 0, data_target);
+  SetMemRefType(load_pc_rel, true, kLiteral);
+  AppendLIR(load_pc_rel);
+  return load_pc_rel;
+}
+
+static int CountLeadingZeros(bool is_wide, uint64_t value) {
+  return (is_wide) ? __builtin_clzl(value) : __builtin_clz((uint32_t)value);
+}
+
+static int CountTrailingZeros(bool is_wide, uint64_t value) {
+  return (is_wide) ? __builtin_ctzl(value) : __builtin_ctz((uint32_t)value);
+}
+
+static int CountSetBits(bool is_wide, uint64_t value) {
+  return ((is_wide) ?
+          __builtin_popcountl(value) : __builtin_popcount((uint32_t)value));
+}
+
+/**
+ * @brief Try encoding an immediate in the form required by logical instructions.
+ *
+ * @param is_wide Whether @p value is a 64-bit (as opposed to 32-bit) value.
+ * @param value An integer to be encoded. This is interpreted as 64-bit if @p is_wide is true and as
+ *   32-bit if @p is_wide is false.
+ * @return A non-negative integer containing the encoded immediate or -1 if the encoding failed.
+ * @note This is the inverse of Arm64Mir2Lir::DecodeLogicalImmediate().
+ */
+int Arm64Mir2Lir::EncodeLogicalImmediate(bool is_wide, uint64_t value) {
+  unsigned n, imm_s, imm_r;
+
+  // Logical immediates are encoded using parameters n, imm_s and imm_r using
+  // the following table:
+  //
+  //  N   imms    immr    size        S             R
+  //  1  ssssss  rrrrrr    64    UInt(ssssss)  UInt(rrrrrr)
+  //  0  0sssss  xrrrrr    32    UInt(sssss)   UInt(rrrrr)
+  //  0  10ssss  xxrrrr    16    UInt(ssss)    UInt(rrrr)
+  //  0  110sss  xxxrrr     8    UInt(sss)     UInt(rrr)
+  //  0  1110ss  xxxxrr     4    UInt(ss)      UInt(rr)
+  //  0  11110s  xxxxxr     2    UInt(s)       UInt(r)
+  // (s bits must not be all set)
+  //
+  // A pattern is constructed of size bits, where the least significant S+1
+  // bits are set. The pattern is rotated right by R, and repeated across a
+  // 32 or 64-bit value, depending on destination register width.
+  //
+  // To test if an arbitary immediate can be encoded using this scheme, an
+  // iterative algorithm is used.
+  //
+
+  // 1. If the value has all set or all clear bits, it can't be encoded.
+  if (value == 0 || value == ~UINT64_C(0) ||
+      (!is_wide && (uint32_t)value == ~UINT32_C(0))) {
+    return -1;
+  }
+
+  unsigned lead_zero  = CountLeadingZeros(is_wide, value);
+  unsigned lead_one   = CountLeadingZeros(is_wide, ~value);
+  unsigned trail_zero = CountTrailingZeros(is_wide, value);
+  unsigned trail_one  = CountTrailingZeros(is_wide, ~value);
+  unsigned set_bits   = CountSetBits(is_wide, value);
+
+  // The fixed bits in the immediate s field.
+  // If width == 64 (X reg), start at 0xFFFFFF80.
+  // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit
+  // widths won't be executed.
+  unsigned width = (is_wide) ? 64 : 32;
+  int imm_s_fixed = (is_wide) ? -128 : -64;
+  int imm_s_mask = 0x3f;
+
+  for (;;) {
+    // 2. If the value is two bits wide, it can be encoded.
+    if (width == 2) {
+      n = 0;
+      imm_s = 0x3C;
+      imm_r = (value & 3) - 1;
+      break;
+    }
+
+    n = (width == 64) ? 1 : 0;
+    imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask);
+    if ((lead_zero + set_bits) == width) {
+      imm_r = 0;
+    } else {
+      imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one;
+    }
+
+    // 3. If the sum of leading zeros, trailing zeros and set bits is
+    //    equal to the bit width of the value, it can be encoded.
+    if (lead_zero + trail_zero + set_bits == width) {
+      break;
+    }
+
+    // 4. If the sum of leading ones, trailing ones and unset bits in the
+    //    value is equal to the bit width of the value, it can be encoded.
+    if (lead_one + trail_one + (width - set_bits) == width) {
+      break;
+    }
+
+    // 5. If the most-significant half of the bitwise value is equal to
+    //    the least-significant half, return to step 2 using the
+    //    least-significant half of the value.
+    uint64_t mask = (UINT64_C(1) << (width >> 1)) - 1;
+    if ((value & mask) == ((value >> (width >> 1)) & mask)) {
+      width >>= 1;
+      set_bits >>= 1;
+      imm_s_fixed >>= 1;
+      continue;
+    }
+
+    // 6. Otherwise, the value can't be encoded.
+    return -1;
+  }
+
+  return (n << 12 | imm_r << 6 | imm_s);
+}
+
+bool Arm64Mir2Lir::InexpensiveConstantInt(int32_t value) {
+  return false;  // (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0);
+}
+
+bool Arm64Mir2Lir::InexpensiveConstantFloat(int32_t value) {
+  return EncodeImmSingle(value) >= 0;
+}
+
+bool Arm64Mir2Lir::InexpensiveConstantLong(int64_t value) {
+  return InexpensiveConstantInt(High32Bits(value)) && InexpensiveConstantInt(Low32Bits(value));
+}
+
+bool Arm64Mir2Lir::InexpensiveConstantDouble(int64_t value) {
+  return EncodeImmDouble(value) >= 0;
+}
+
+/*
+ * Load a immediate using one single instruction when possible; otherwise
+ * use a pair of movz and movk instructions.
+ *
+ * No additional register clobbering operation performed. Use this version when
+ * 1) r_dest is freshly returned from AllocTemp or
+ * 2) The codegen is under fixed register usage
+ */
+LIR* Arm64Mir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) {
+  LIR* res;
+
+  if (r_dest.IsFloat()) {
+    return LoadFPConstantValue(r_dest.GetReg(), value);
+  }
+
+  // Loading SP/ZR with an immediate is not supported.
+  DCHECK_NE(r_dest.GetReg(), rwsp);
+  DCHECK_NE(r_dest.GetReg(), rwzr);
+
+  // Compute how many movk, movz instructions are needed to load the value.
+  uint16_t high_bits = High16Bits(value);
+  uint16_t low_bits = Low16Bits(value);
+
+  bool low_fast = ((uint16_t)(low_bits + 1) <= 1);
+  bool high_fast = ((uint16_t)(high_bits + 1) <= 1);
+
+  if (LIKELY(low_fast || high_fast)) {
+    // 1 instruction is enough to load the immediate.
+    if (LIKELY(low_bits == high_bits)) {
+      // Value is either 0 or -1: we can just use wzr.
+      ArmOpcode opcode = LIKELY(low_bits == 0) ? kA64Mov2rr : kA64Mvn2rr;
+      res = NewLIR2(opcode, r_dest.GetReg(), rwzr);
+    } else {
+      uint16_t uniform_bits, useful_bits;
+      int shift;
+
+      if (LIKELY(high_fast)) {
+        shift = 0;
+        uniform_bits = high_bits;
+        useful_bits = low_bits;
+      } else {
+        shift = 1;
+        uniform_bits = low_bits;
+        useful_bits = high_bits;
+      }
+
+      if (UNLIKELY(uniform_bits != 0)) {
+        res = NewLIR3(kA64Movn3rdM, r_dest.GetReg(), ~useful_bits, shift);
+      } else {
+        res = NewLIR3(kA64Movz3rdM, r_dest.GetReg(), useful_bits, shift);
+      }
+    }
+  } else {
+    // movk, movz require 2 instructions. Try detecting logical immediates.
+    int log_imm = EncodeLogicalImmediate(/*is_wide=*/false, value);
+    if (log_imm >= 0) {
+      res = NewLIR3(kA64Orr3Rrl, r_dest.GetReg(), rwzr, log_imm);
+    } else {
+      // Use 2 instructions.
+      res = NewLIR3(kA64Movz3rdM, r_dest.GetReg(), low_bits, 0);
+      NewLIR3(kA64Movk3rdM, r_dest.GetReg(), high_bits, 1);
+    }
+  }
+
+  return res;
+}
+
+LIR* Arm64Mir2Lir::OpUnconditionalBranch(LIR* target) {
+  LIR* res = NewLIR1(kA64B1t, 0 /* offset to be patched  during assembly */);
+  res->target = target;
+  return res;
+}
+
+LIR* Arm64Mir2Lir::OpCondBranch(ConditionCode cc, LIR* target) {
+  LIR* branch = NewLIR2(kA64B2ct, ArmConditionEncoding(cc),
+                        0 /* offset to be patched */);
+  branch->target = target;
+  return branch;
+}
+
+LIR* Arm64Mir2Lir::OpReg(OpKind op, RegStorage r_dest_src) {
+  ArmOpcode opcode = kA64Brk1d;
+  switch (op) {
+    case kOpBlx:
+      opcode = kA64Blr1x;
+      break;
+    // TODO(Arm64): port kThumbBx.
+    // case kOpBx:
+    //   opcode = kThumbBx;
+    //   break;
+    default:
+      LOG(FATAL) << "Bad opcode " << op;
+  }
+  return NewLIR1(opcode, r_dest_src.GetReg());
+}
+
+LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, int r_dest_src1, int r_src2,
+                                 int shift, bool is_wide) {
+  ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0);
+  ArmOpcode opcode = kA64Brk1d;
+
+  switch (OP_KIND_UNWIDE(op)) {
+    case kOpCmn:
+      opcode = kA64Cmn3Rro;
+      break;
+    case kOpCmp:
+      // TODO(Arm64): check the instruction above: "cmp w0, w1" is rendered as "cmp w0, w1, uxtb".
+      opcode = kA64Cmp3Rro;
+      break;
+    case kOpMov:
+      opcode = kA64Mov2rr;
+      break;
+    case kOpMvn:
+      opcode = kA64Mvn2rr;
+      break;
+    case kOpNeg:
+      opcode = kA64Neg3rro;
+      break;
+    case kOpTst:
+      opcode = kA64Tst3rro;
+      break;
+    case kOpRev:
+      DCHECK_EQ(shift, 0);
+      // Binary, but rm is encoded twice.
+      return NewLIR3(kA64Rev2rr | wide, r_dest_src1, r_src2, r_src2);
+      break;
+    case kOpRevsh:
+      // Binary, but rm is encoded twice.
+      return NewLIR3(kA64Rev162rr | wide, r_dest_src1, r_src2, r_src2);
+      break;
+    case kOp2Byte:
+      DCHECK_EQ(shift, ENCODE_NO_SHIFT);
+      // "sbfx r1, r2, #imm1, #imm2" is "sbfm r1, r2, #imm1, #(imm1 + imm2 - 1)".
+      // For now we use sbfm directly.
+      return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1, r_src2, 0, 7);
+    case kOp2Short:
+      DCHECK_EQ(shift, ENCODE_NO_SHIFT);
+      // For now we use sbfm rather than its alias, sbfx.
+      return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1, r_src2, 0, 15);
+    case kOp2Char:
+      // "ubfx r1, r2, #imm1, #imm2" is "ubfm r1, r2, #imm1, #(imm1 + imm2 - 1)".
+      // For now we use ubfm directly.
+      DCHECK_EQ(shift, ENCODE_NO_SHIFT);
+      return NewLIR4(kA64Ubfm4rrdd | wide, r_dest_src1, r_src2, 0, 15);
+    default:
+      return OpRegRegRegShift(op, r_dest_src1, r_dest_src1, r_src2, shift);
+  }
+
+  DCHECK(!IsPseudoLirOp(opcode));
+  if (EncodingMap[opcode].flags & IS_BINARY_OP) {
+    DCHECK_EQ(shift, ENCODE_NO_SHIFT);
+    return NewLIR2(opcode | wide, r_dest_src1, r_src2);
+  } else if (EncodingMap[opcode].flags & IS_TERTIARY_OP) {
+    ArmEncodingKind kind = EncodingMap[opcode].field_loc[2].kind;
+    if (kind == kFmtExtend || kind == kFmtShift) {
+      DCHECK_EQ(kind == kFmtExtend, IsExtendEncoding(shift));
+      return NewLIR3(opcode | wide, r_dest_src1, r_src2, shift);
+    }
+  }
+
+  LOG(FATAL) << "Unexpected encoding operand count";
+  return NULL;
+}
+
+LIR* Arm64Mir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) {
+  return OpRegRegShift(op, r_dest_src1.GetReg(), r_src2.GetReg(), ENCODE_NO_SHIFT,
+                       r_dest_src1.Is64Bit());
+}
+
+LIR* Arm64Mir2Lir::OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type) {
+  UNIMPLEMENTED(FATAL);
+  return nullptr;
+}
+
+LIR* Arm64Mir2Lir::OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type) {
+  UNIMPLEMENTED(FATAL);
+  return nullptr;
+}
+
+LIR* Arm64Mir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src) {
+  LOG(FATAL) << "Unexpected use of OpCondRegReg for Arm64";
+  return NULL;
+}
+
+LIR* Arm64Mir2Lir::OpRegRegRegShift(OpKind op, int r_dest, int r_src1,
+                                    int r_src2, int shift, bool is_wide) {
+  ArmOpcode opcode = kA64Brk1d;
+
+  switch (OP_KIND_UNWIDE(op)) {
+    case kOpAdd:
+      opcode = kA64Add4rrro;
+      break;
+    case kOpSub:
+      opcode = kA64Sub4rrro;
+      break;
+    // case kOpRsub:
+    //   opcode = kA64RsubWWW;
+    //   break;
+    case kOpAdc:
+      opcode = kA64Adc3rrr;
+      break;
+    case kOpAnd:
+      opcode = kA64And4rrro;
+      break;
+    case kOpXor:
+      opcode = kA64Eor4rrro;
+      break;
+    case kOpMul:
+      opcode = kA64Mul3rrr;
+      break;
+    case kOpDiv:
+      opcode = kA64Sdiv3rrr;
+      break;
+    case kOpOr:
+      opcode = kA64Orr4rrro;
+      break;
+    case kOpSbc:
+      opcode = kA64Sbc3rrr;
+      break;
+    case kOpLsl:
+      opcode = kA64Lsl3rrr;
+      break;
+    case kOpLsr:
+      opcode = kA64Lsr3rrr;
+      break;
+    case kOpAsr:
+      opcode = kA64Asr3rrr;
+      break;
+    case kOpRor:
+      opcode = kA64Ror3rrr;
+      break;
+    default:
+      LOG(FATAL) << "Bad opcode: " << op;
+      break;
+  }
+
+  // The instructions above belong to two kinds:
+  // - 4-operands instructions, where the last operand is a shift/extend immediate,
+  // - 3-operands instructions with no shift/extend.
+  ArmOpcode widened_opcode = (is_wide) ? WIDE(opcode) : opcode;
+  if (EncodingMap[opcode].flags & IS_QUAD_OP) {
+    DCHECK_EQ(shift, ENCODE_NO_SHIFT);
+    return NewLIR4(widened_opcode, r_dest, r_src1, r_src2, shift);
+  } else {
+    DCHECK(EncodingMap[opcode].flags & IS_TERTIARY_OP);
+    DCHECK_EQ(shift, ENCODE_NO_SHIFT);
+    return NewLIR3(widened_opcode, r_dest, r_src1, r_src2);
+  }
+}
+
+LIR* Arm64Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) {
+  return OpRegRegRegShift(op, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), ENCODE_NO_SHIFT);
+}
+
+LIR* Arm64Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value) {
+  LIR* res;
+  bool neg = (value < 0);
+  int64_t abs_value = (neg) ? -value : value;
+  ArmOpcode opcode = kA64Brk1d;
+  ArmOpcode alt_opcode = kA64Brk1d;
+  int32_t log_imm = -1;
+  bool is_wide = OP_KIND_IS_WIDE(op);
+  ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0);
+
+  switch (OP_KIND_UNWIDE(op)) {
+    case kOpLsl: {
+      // "lsl w1, w2, #imm" is an alias of "ubfm w1, w2, #(-imm MOD 32), #(31-imm)"
+      // and "lsl x1, x2, #imm" of "ubfm x1, x2, #(-imm MOD 32), #(31-imm)".
+      // For now, we just use ubfm directly.
+      int max_value = (is_wide) ? 64 : 32;
+      return NewLIR4(kA64Ubfm4rrdd | wide, r_dest.GetReg(), r_src1.GetReg(),
+                     (-value) & (max_value - 1), max_value - value);
+    }
+    case kOpLsr:
+      return NewLIR3(kA64Lsr3rrd | wide, r_dest.GetReg(), r_src1.GetReg(), value);
+    case kOpAsr:
+      return NewLIR3(kA64Asr3rrd | wide, r_dest.GetReg(), r_src1.GetReg(), value);
+    case kOpRor:
+      // "ror r1, r2, #imm" is an alias of "extr r1, r2, r2, #imm".
+      // For now, we just use extr directly.
+      return NewLIR4(kA64Extr4rrrd | wide, r_dest.GetReg(), r_src1.GetReg(), r_src1.GetReg(),
+                     value);
+    case kOpAdd:
+      neg = !neg;
+      // Note: intentional fallthrough
+    case kOpSub:
+      // Add and sub below read/write sp rather than xzr.
+      if (abs_value < 0x1000) {
+        opcode = (neg) ? kA64Add4RRdT : kA64Sub4RRdT;
+        return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value, 0);
+      } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) {
+        opcode = (neg) ? kA64Add4RRdT : kA64Sub4RRdT;
+        return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value >> 12, 1);
+      } else {
+        log_imm = -1;
+        alt_opcode = (neg) ? kA64Add4rrro : kA64Sub4rrro;
+      }
+      break;
+    // case kOpRsub:
+    //   opcode = kThumb2RsubRRI8M;
+    //   alt_opcode = kThumb2RsubRRR;
+    //   break;
+    case kOpAdc:
+      log_imm = -1;
+      alt_opcode = kA64Adc3rrr;
+      break;
+    case kOpSbc:
+      log_imm = -1;
+      alt_opcode = kA64Sbc3rrr;
+      break;
+    case kOpOr:
+      log_imm = EncodeLogicalImmediate(is_wide, value);
+      opcode = kA64Orr3Rrl;
+      alt_opcode = kA64Orr4rrro;
+      break;
+    case kOpAnd:
+      log_imm = EncodeLogicalImmediate(is_wide, value);
+      opcode = kA64And3Rrl;
+      alt_opcode = kA64And4rrro;
+      break;
+    case kOpXor:
+      log_imm = EncodeLogicalImmediate(is_wide, value);
+      opcode = kA64Eor3Rrl;
+      alt_opcode = kA64Eor4rrro;
+      break;
+    case kOpMul:
+      // TUNING: power of 2, shift & add
+      log_imm = -1;
+      alt_opcode = kA64Mul3rrr;
+      break;
+    default:
+      LOG(FATAL) << "Bad opcode: " << op;
+  }
+
+  if (log_imm >= 0) {
+    return NewLIR3(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), log_imm);
+  } else {
+    RegStorage r_scratch = AllocTemp();
+    LoadConstant(r_scratch, value);
+    if (EncodingMap[alt_opcode].flags & IS_QUAD_OP)
+      res = NewLIR4(alt_opcode, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), 0);
+    else
+      res = NewLIR3(alt_opcode, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg());
+    FreeTemp(r_scratch);
+    return res;
+  }
+}
+
+LIR* Arm64Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) {
+  return OpRegImm64(op, r_dest_src1, static_cast<int64_t>(value), /*is_wide*/false);
+}
+
+LIR* Arm64Mir2Lir::OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value, bool is_wide) {
+  ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0);
+  ArmOpcode opcode = kA64Brk1d;
+  ArmOpcode neg_opcode = kA64Brk1d;
+  bool shift;
+  bool neg = (value < 0);
+  uint64_t abs_value = (neg) ? -value : value;
+
+  if (LIKELY(abs_value < 0x1000)) {
+    // abs_value is a 12-bit immediate.
+    shift = false;
+  } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) {
+    // abs_value is a shifted 12-bit immediate.
+    shift = true;
+    abs_value >>= 12;
+  } else {
+    RegStorage r_tmp = AllocTemp();
+    LIR* res = LoadConstant(r_tmp, value);
+    OpRegReg(op, r_dest_src1, r_tmp);
+    FreeTemp(r_tmp);
+    return res;
+  }
+
+  switch (OP_KIND_UNWIDE(op)) {
+    case kOpAdd:
+      neg_opcode = kA64Sub4RRdT;
+      opcode = kA64Add4RRdT;
+      break;
+    case kOpSub:
+      neg_opcode = kA64Add4RRdT;
+      opcode = kA64Sub4RRdT;
+      break;
+    case kOpCmp:
+      neg_opcode = kA64Cmn3RdT;
+      opcode = kA64Cmp3RdT;
+      break;
+    default:
+      LOG(FATAL) << "Bad op-kind in OpRegImm: " << op;
+      break;
+  }
+
+  if (UNLIKELY(neg))
+    opcode = neg_opcode;
+
+  if (EncodingMap[opcode].flags & IS_QUAD_OP)
+    return NewLIR4(opcode | wide, r_dest_src1.GetReg(), r_dest_src1.GetReg(), abs_value,
+                   (shift) ? 1 : 0);
+  else
+    return NewLIR3(opcode | wide, r_dest_src1.GetReg(), abs_value, (shift) ? 1 : 0);
+}
+
+LIR* Arm64Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) {
+  if (r_dest.IsFloat()) {
+    return LoadFPConstantValueWide(r_dest.GetReg(), value);
+  } else {
+    // TODO(Arm64): check whether we can load the immediate with a short form.
+    //   e.g. via movz, movk or via logical immediate.
+
+    // No short form - load from the literal pool.
+    int32_t val_lo = Low32Bits(value);
+    int32_t val_hi = High32Bits(value);
+    LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
+    if (data_target == NULL) {
+      data_target = AddWideData(&literal_list_, val_lo, val_hi);
+    }
+
+    LIR* res = RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp),
+                      r_dest.GetReg(), 0, 0, 0, 0, data_target);
+    SetMemRefType(res, true, kLiteral);
+    AppendLIR(res);
+    return res;
+  }
+}
+
+int Arm64Mir2Lir::EncodeShift(int shift_type, int amount) {
+  return ((shift_type & 0x3) << 7) | (amount & 0x1f);
+}
+
+int Arm64Mir2Lir::EncodeExtend(int extend_type, int amount) {
+  return  (1 << 6) | ((extend_type & 0x7) << 3) | (amount & 0x7);
+}
+
+bool Arm64Mir2Lir::IsExtendEncoding(int encoded_value) {
+  return ((1 << 6) & encoded_value) != 0;
+}
+
+LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest,
+                                   int scale, OpSize size) {
+  LIR* load;
+  ArmOpcode opcode = kA64Brk1d;
+  ArmOpcode wide = kA64NotWide;
+
+  DCHECK(scale == 0 || scale == 1);
+
+  if (r_dest.IsFloat()) {
+    bool is_double = r_dest.IsDouble();
+    bool is_single = !is_double;
+    DCHECK_EQ(is_single, r_dest.IsSingle());
+
+    // If r_dest is a single, then size must be either k32 or kSingle.
+    // If r_dest is a double, then size must be either k64 or kDouble.
+    DCHECK(!is_single || size == k32 || size == kSingle);
+    DCHECK(!is_double || size == k64 || size == kDouble);
+    return NewLIR4((is_double) ? FWIDE(kA64Ldr4fXxG) : kA64Ldr4fXxG,
+                   r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(), scale);
+  }
+
+  switch (size) {
+    case kDouble:
+    case kWord:
+    case k64:
+      wide = kA64Wide;
+      // Intentional fall-trough.
+    case kSingle:
+    case k32:
+    case kReference:
+      opcode = kA64Ldr4rXxG;
+      break;
+    case kUnsignedHalf:
+      opcode = kA64Ldrh4wXxd;
+      break;
+    case kSignedHalf:
+      opcode = kA64Ldrsh4rXxd;
+      break;
+    case kUnsignedByte:
+      opcode = kA64Ldrb3wXx;
+      break;
+    case kSignedByte:
+      opcode = kA64Ldrsb3rXx;
+      break;
+    default:
+      LOG(FATAL) << "Bad size: " << size;
+  }
+
+  if (UNLIKELY((EncodingMap[opcode].flags & IS_TERTIARY_OP) != 0)) {
+    // Tertiary ops (e.g. ldrb, ldrsb) do not support scale.
+    DCHECK_EQ(scale, 0);
+    load = NewLIR3(opcode | wide, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg());
+  } else {
+    DCHECK(scale == 0 || scale == ((wide == kA64Wide) ? 3 : 2));
+    load = NewLIR4(opcode | wide, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(),
+                   (scale != 0) ? 1 : 0);
+  }
+
+  return load;
+}
+
+LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
+                                    int scale, OpSize size) {
+  LIR* store;
+  ArmOpcode opcode = kA64Brk1d;
+  ArmOpcode wide = kA64NotWide;
+
+  DCHECK(scale == 0 || scale == 1);
+
+  if (r_src.IsFloat()) {
+    bool is_double = r_src.IsDouble();
+    bool is_single = !is_double;
+    DCHECK_EQ(is_single, r_src.IsSingle());
+
+    // If r_src is a single, then size must be either k32 or kSingle.
+    // If r_src is a double, then size must be either k64 or kDouble.
+    DCHECK(!is_single || size == k32 || size == kSingle);
+    DCHECK(!is_double || size == k64 || size == kDouble);
+    return NewLIR4((is_double) ? FWIDE(kA64Str4fXxG) : kA64Str4fXxG,
+                   r_src.GetReg(), r_base.GetReg(), r_index.GetReg(), scale);
+  }
+
+  switch (size) {
+    case kDouble:     // Intentional fall-trough.
+    case kWord:       // Intentional fall-trough.
+    case k64:
+      opcode = kA64Str4rXxG;
+      wide = kA64Wide;
+      break;
+    case kSingle:     // Intentional fall-trough.
+    case k32:         // Intentional fall-trough.
+    case kReference:
+      opcode = kA64Str4rXxG;
+      break;
+    case kUnsignedHalf:
+    case kSignedHalf:
+      opcode = kA64Strh4wXxd;
+      break;
+    case kUnsignedByte:
+    case kSignedByte:
+      opcode = kA64Strb3wXx;
+      break;
+    default:
+      LOG(FATAL) << "Bad size: " << size;
+  }
+
+  if (UNLIKELY((EncodingMap[opcode].flags & IS_TERTIARY_OP) != 0)) {
+    // Tertiary ops (e.g. strb) do not support scale.
+    DCHECK_EQ(scale, 0);
+    store = NewLIR3(opcode | wide, r_src.GetReg(), r_base.GetReg(), r_index.GetReg());
+  } else {
+    store = NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(), scale);
+  }
+
+  return store;
+}
+
+/*
+ * Load value from base + displacement.  Optionally perform null check
+ * on base (which must have an associated s_reg and MIR).  If not
+ * performing null check, incoming MIR can be null.
+ */
+LIR* Arm64Mir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest,
+                                    OpSize size) {
+  LIR* load = NULL;
+  ArmOpcode opcode = kA64Brk1d;
+  bool short_form = false;
+  int encoded_disp = displacement;
+  switch (size) {
+    case kDouble:     // Intentional fall-through.
+    case kWord:       // Intentional fall-through.
+    case k64:
+      DCHECK_EQ(encoded_disp & 0x3, 0);
+      if (r_dest.IsFloat()) {
+        // Currently double values may be misaligned.
+        if ((displacement & 0x7) == 0 && displacement >= 0 && displacement <= 32760) {
+          // Can use scaled load.
+          opcode = FWIDE(kA64Ldr3fXD);
+          encoded_disp >>= 3;
+          short_form = true;
+        } else if (IS_SIGNED_IMM9(displacement)) {
+          // Can use unscaled load.
+          opcode = FWIDE(kA64Ldur3fXd);
+          short_form = true;
+        } else {
+          short_form = false;
+        }
+      } else {
+        // Currently long values may be misaligned.
+        if ((displacement & 0x7) == 0 && displacement >= 0 && displacement <= 32760) {
+          // Can use scaled store.
+          opcode = FWIDE(kA64Ldr3rXD);
+          encoded_disp >>= 3;
+          short_form = true;
+        } else if (IS_SIGNED_IMM9(displacement)) {
+          // Can use unscaled store.
+          opcode = FWIDE(kA64Ldur3rXd);
+          short_form = true;
+        }  // else: use long sequence (short_form = false).
+      }
+      break;
+    case kSingle:     // Intentional fall-through.
+    case k32:         // Intentional fall-trough.
+    case kReference:
+      if (r_dest.IsFloat()) {
+        opcode = kA64Ldr3fXD;
+        if (displacement <= 1020) {
+          short_form = true;
+          encoded_disp >>= 2;
+        }
+        break;
+      }
+      if (displacement <= 16380 && displacement >= 0) {
+        DCHECK_EQ((displacement & 0x3), 0);
+        short_form = true;
+        encoded_disp >>= 2;
+        opcode = kA64Ldr3rXD;
+      }
+      break;
+    case kUnsignedHalf:
+      if (displacement < 64 && displacement >= 0) {
+        DCHECK_EQ((displacement & 0x1), 0);
+        short_form = true;
+        encoded_disp >>= 1;
+        opcode = kA64Ldrh3wXF;
+      } else if (displacement < 4092 && displacement >= 0) {
+        short_form = true;
+        opcode = kA64Ldrh3wXF;
+      }
+      break;
+    case kSignedHalf:
+      short_form = true;
+      opcode = kA64Ldrsh3rXF;
+      break;
+    case kUnsignedByte:
+      short_form = true;
+      opcode = kA64Ldrb3wXd;
+      break;
+    case kSignedByte:
+      short_form = true;
+      opcode = kA64Ldrsb3rXd;
+      break;
+    default:
+      LOG(FATAL) << "Bad size: " << size;
+  }
+
+  if (short_form) {
+    load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), encoded_disp);
+  } else {
+    RegStorage reg_offset = AllocTemp();
+    LoadConstant(reg_offset, encoded_disp);
+    if (r_dest.IsFloat()) {
+      // No index ops - must use a long sequence.  Turn the offset into a direct pointer.
+      OpRegReg(kOpAdd, reg_offset, r_base);
+      load = LoadBaseDispBody(reg_offset, 0, r_dest, size);
+    } else {
+      load = LoadBaseIndexed(r_base, reg_offset, r_dest, 0, size);
+    }
+    FreeTemp(reg_offset);
+  }
+
+  // TODO: in future may need to differentiate Dalvik accesses w/ spills
+  if (r_base == rs_rA64_SP) {
+    AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit());
+  }
+  return load;
+}
+
+LIR* Arm64Mir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
+                                OpSize size) {
+  return LoadBaseDispBody(r_base, displacement, r_dest, size);
+}
+
+
+LIR* Arm64Mir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src,
+                                     OpSize size) {
+  LIR* store = NULL;
+  ArmOpcode opcode = kA64Brk1d;
+  bool short_form = false;
+  int encoded_disp = displacement;
+  switch (size) {
+    case kDouble:     // Intentional fall-through.
+    case kWord:       // Intentional fall-through.
+    case k64:
+      DCHECK_EQ(encoded_disp & 0x3, 0);
+      if (r_src.IsFloat()) {
+        // Currently double values may be misaligned.
+        if ((displacement & 0x7) == 0 && displacement >= 0 && displacement <= 32760) {
+          // Can use scaled store.
+          opcode = FWIDE(kA64Str3fXD);
+          encoded_disp >>= 3;
+          short_form = true;
+        } else if (IS_SIGNED_IMM9(displacement)) {
+          // Can use unscaled store.
+          opcode = FWIDE(kA64Stur3fXd);
+          short_form = true;
+        }  // else: use long sequence (short_form = false).
+      } else {
+        // Currently long values may be misaligned.
+        if ((displacement & 0x7) == 0 && displacement >= 0 && displacement <= 32760) {
+          // Can use scaled store.
+          opcode = FWIDE(kA64Str3rXD);
+          encoded_disp >>= 3;
+          short_form = true;
+        } else if (IS_SIGNED_IMM9(displacement)) {
+          // Can use unscaled store.
+          opcode = FWIDE(kA64Stur3rXd);
+          short_form = true;
+        }  // else: use long sequence (short_form = false).
+      }
+      break;
+    case kSingle:     // Intentional fall-through.
+    case k32:         // Intentional fall-trough.
+    case kReference:
+      if (r_src.IsFloat()) {
+        DCHECK(r_src.IsSingle());
+        DCHECK_EQ(encoded_disp & 0x3, 0);
+        opcode = kA64Str3fXD;
+        if (displacement <= 1020) {
+          short_form = true;
+          encoded_disp >>= 2;
+        }
+        break;
+      }
+
+      if (displacement <= 16380 && displacement >= 0) {
+        DCHECK_EQ((displacement & 0x3), 0);
+        short_form = true;
+        encoded_disp >>= 2;
+        opcode = kA64Str3rXD;
+      }
+      break;
+    case kUnsignedHalf:
+    case kSignedHalf:
+      DCHECK_EQ((displacement & 0x1), 0);
+      short_form = true;
+      encoded_disp >>= 1;
+      opcode = kA64Strh3wXF;
+      break;
+    case kUnsignedByte:
+    case kSignedByte:
+      short_form = true;
+      opcode = kA64Strb3wXd;
+      break;
+    default:
+      LOG(FATAL) << "Bad size: " << size;
+  }
+
+  if (short_form) {
+    store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), encoded_disp);
+  } else {
+    RegStorage r_scratch = AllocTemp();
+    LoadConstant(r_scratch, encoded_disp);
+    if (r_src.IsFloat()) {
+      // No index ops - must use a long sequence.  Turn the offset into a direct pointer.
+      OpRegReg(kOpAdd, r_scratch, r_base);
+      store = StoreBaseDispBody(r_scratch, 0, r_src, size);
+    } else {
+      store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size);
+    }
+    FreeTemp(r_scratch);
+  }
+
+  // TODO: In future, may need to differentiate Dalvik & spill accesses
+  if (r_base == rs_rA64_SP) {
+    AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit());
+  }
+  return store;
+}
+
+LIR* Arm64Mir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
+                               OpSize size) {
+  return StoreBaseDispBody(r_base, displacement, r_src, size);
+}
+
+LIR* Arm64Mir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) {
+  LOG(FATAL) << "Unexpected use of OpFpRegCopy for Arm64";
+  return NULL;
+}
+
+LIR* Arm64Mir2Lir::OpThreadMem(OpKind op, A64ThreadOffset thread_offset) {
+  LOG(FATAL) << "Unexpected use of OpThreadMem for Arm64";
+  return NULL;
+}
+
+LIR* Arm64Mir2Lir::OpMem(OpKind op, RegStorage r_base, int disp) {
+  LOG(FATAL) << "Unexpected use of OpMem for Arm64";
+  return NULL;
+}
+
+LIR* Arm64Mir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
+                                        int displacement, RegStorage r_src, OpSize size) {
+  LOG(FATAL) << "Unexpected use of StoreBaseIndexedDisp for Arm64";
+  return NULL;
+}
+
+LIR* Arm64Mir2Lir::OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset) {
+  LOG(FATAL) << "Unexpected use of OpRegMem for Arm64";
+  return NULL;
+}
+
+LIR* Arm64Mir2Lir::LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
+                                       int displacement, RegStorage r_dest, OpSize size) {
+  LOG(FATAL) << "Unexpected use of LoadBaseIndexedDisp for Arm64";
+  return NULL;
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 9f84e09..784dfaf 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -497,6 +497,7 @@
       case kX86_64:
         bx_offset = 0;
         break;
+      case kArm64:
       case kMips:
         bx_offset = tab_rec->anchor->offset;
         break;
@@ -558,7 +559,7 @@
 static int AssignLiteralPointerOffsetCommon(LIR* lir, CodeOffset offset,
                                             unsigned int element_size) {
   // Align to natural pointer size.
-  offset = (offset + (element_size - 1)) & ~(element_size - 1);
+  offset = RoundUp(offset, element_size);
   for (; lir != NULL; lir = lir->next) {
     lir->offset = offset;
     offset += element_size;
@@ -758,7 +759,7 @@
     tab_rec->offset = offset;
     offset += tab_rec->size;
     // word align
-    offset = (offset + 3) & ~3;
+    offset = RoundUp(offset, 4);
     }
   return offset;
 }
@@ -1027,7 +1028,7 @@
 
   UniquePtr<std::vector<uint8_t> > cfi_info(ReturnCallFrameInformation());
   CompiledMethod* result =
-      new CompiledMethod(*cu_->compiler_driver, cu_->instruction_set, code_buffer_, frame_size_,
+      new CompiledMethod(cu_->compiler_driver, cu_->instruction_set, code_buffer_, frame_size_,
                          core_spill_mask_, fp_spill_mask_, encoded_mapping_table_,
                          vmap_encoder.GetData(), native_gc_map_, cfi_info.get());
   return result;
@@ -1049,14 +1050,13 @@
 
 int Mir2Lir::ComputeFrameSize() {
   /* Figure out the frame size */
-  static const uint32_t kAlignMask = kStackAlignment - 1;
   uint32_t size = num_core_spills_ * GetBytesPerGprSpillLocation(cu_->instruction_set)
                   + num_fp_spills_ * GetBytesPerFprSpillLocation(cu_->instruction_set)
                   + sizeof(uint32_t)  // Filler.
                   + (cu_->num_regs + cu_->num_outs) * sizeof(uint32_t)
                   + GetNumBytesForCompilerTempSpillRegion();
   /* Align and set */
-  return (size + kAlignMask) & ~(kAlignMask);
+  return RoundUp(size, kStackAlignment);
 }
 
 /*
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 2cd17cc..83d5045 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -564,13 +564,8 @@
       // There might have been a store before this volatile one so insert StoreStore barrier.
       GenMemBarrier(kStoreStore);
     }
-    if (is_long_or_double) {
-      StoreBaseDispWide(r_base, field_info.FieldOffset().Int32Value(), rl_src.reg);
-    } else if (rl_src.ref) {
-      StoreRefDisp(r_base, field_info.FieldOffset().Int32Value(), rl_src.reg);
-    } else {
-      Store32Disp(r_base, field_info.FieldOffset().Int32Value(), rl_src.reg);
-    }
+    OpSize size = LoadStoreOpSize(is_long_or_double, rl_src.ref);
+    StoreBaseDisp(r_base, field_info.FieldOffset().Int32Value(), rl_src.reg, size);
     if (field_info.IsVolatile()) {
       // A load might follow the volatile store so insert a StoreLoad barrier.
       GenMemBarrier(kStoreLoad);
@@ -646,13 +641,8 @@
     }
     RegLocation rl_result = EvalLoc(rl_dest, result_reg_kind, true);
 
-    if (is_long_or_double) {
-      LoadBaseDispWide(r_base, field_info.FieldOffset().Int32Value(), rl_result.reg, INVALID_SREG);
-    } else if (rl_result.ref) {
-      LoadRefDisp(r_base, field_info.FieldOffset().Int32Value(), rl_result.reg);
-    } else {
-      Load32Disp(r_base, field_info.FieldOffset().Int32Value(), rl_result.reg);
-    }
+    OpSize size = LoadStoreOpSize(is_long_or_double, rl_result.ref);
+    LoadBaseDisp(r_base, field_info.FieldOffset().Int32Value(), rl_result.reg, size);
     FreeTemp(r_base);
 
     if (field_info.IsVolatile()) {
@@ -714,8 +704,7 @@
           result_reg_kind = kFPReg;
         }
         rl_result = EvalLoc(rl_dest, result_reg_kind, true);
-        LoadBaseDispWide(rl_obj.reg, field_info.FieldOffset().Int32Value(), rl_result.reg,
-                         rl_obj.s_reg_low);
+        LoadBaseDisp(rl_obj.reg, field_info.FieldOffset().Int32Value(), rl_result.reg, size);
         MarkPossibleNullPointerException(opt_flags);
         if (field_info.IsVolatile()) {
           // Without context sensitive analysis, we must issue the most conservative barriers.
@@ -727,7 +716,7 @@
         RegStorage reg_ptr = AllocTemp();
         OpRegRegImm(kOpAdd, reg_ptr, rl_obj.reg, field_info.FieldOffset().Int32Value());
         rl_result = EvalLoc(rl_dest, reg_class, true);
-        LoadBaseDispWide(reg_ptr, 0, rl_result.reg, INVALID_SREG);
+        LoadBaseDisp(reg_ptr, 0, rl_result.reg, size);
         MarkPossibleNullPointerException(opt_flags);
         if (field_info.IsVolatile()) {
           // Without context sensitive analysis, we must issue the most conservative barriers.
@@ -741,8 +730,7 @@
     } else {
       rl_result = EvalLoc(rl_dest, reg_class, true);
       GenNullCheck(rl_obj.reg, opt_flags);
-      LoadBaseDisp(rl_obj.reg, field_info.FieldOffset().Int32Value(), rl_result.reg, k32,
-                   rl_obj.s_reg_low);
+      LoadBaseDisp(rl_obj.reg, field_info.FieldOffset().Int32Value(), rl_result.reg, k32);
       MarkPossibleNullPointerException(opt_flags);
       if (field_info.IsVolatile()) {
         // Without context sensitive analysis, we must issue the most conservative barriers.
@@ -791,7 +779,7 @@
         // There might have been a store before this volatile one so insert StoreStore barrier.
         GenMemBarrier(kStoreStore);
       }
-      StoreBaseDispWide(reg_ptr, 0, rl_src.reg);
+      StoreBaseDisp(reg_ptr, 0, rl_src.reg, size);
       MarkPossibleNullPointerException(opt_flags);
       if (field_info.IsVolatile()) {
         // A load might follow the volatile store so insert a StoreLoad barrier.
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 9c1fbe4..d321b00 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -346,7 +346,7 @@
 
 /*
  * If there are any ins passed in registers that have not been promoted
- * to a callee-save register, flush them to the frame.  Perform intial
+ * to a callee-save register, flush them to the frame.  Perform initial
  * assignment of promoted arguments.
  *
  * ArgLocs is an array of location records describing the incoming arguments
@@ -791,7 +791,7 @@
       }
       int outs_offset = (next_use + 1) * 4;
       if (rl_arg.wide) {
-        StoreBaseDispWide(TargetReg(kSp), outs_offset, arg_reg);
+        StoreBaseDisp(TargetReg(kSp), outs_offset, arg_reg, k64);
         next_use += 2;
       } else {
         Store32Disp(TargetReg(kSp), outs_offset, arg_reg);
@@ -859,7 +859,7 @@
     if (loc.wide) {
       loc = UpdateLocWide(loc);
       if ((next_arg >= 2) && (loc.location == kLocPhysReg)) {
-        StoreBaseDispWide(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg);
+        StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64);
       }
       next_arg += 2;
     } else {
@@ -1133,8 +1133,7 @@
   if (cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64) {
     LoadBaseIndexed(reg_ptr, reg_off, rl_result.reg, 1, kUnsignedHalf);
   } else {
-    LoadBaseIndexedDisp(reg_ptr, reg_off, 1, data_offset, rl_result.reg, kUnsignedHalf,
-                        INVALID_SREG);
+    LoadBaseIndexedDisp(reg_ptr, reg_off, 1, data_offset, rl_result.reg, kUnsignedHalf);
   }
   FreeTemp(reg_off);
   FreeTemp(reg_ptr);
@@ -1429,11 +1428,11 @@
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   if (is_long) {
     if (cu_->instruction_set == kX86) {
-      LoadBaseIndexedDisp(rl_object.reg, rl_offset.reg, 0, 0, rl_result.reg, k64, INVALID_SREG);
+      LoadBaseIndexedDisp(rl_object.reg, rl_offset.reg, 0, 0, rl_result.reg, k64);
     } else {
       RegStorage rl_temp_offset = AllocTemp();
       OpRegRegReg(kOpAdd, rl_temp_offset, rl_object.reg, rl_offset.reg);
-      LoadBaseDispWide(rl_temp_offset, 0, rl_result.reg, INVALID_SREG);
+      LoadBaseDisp(rl_temp_offset, 0, rl_result.reg, k64);
       FreeTemp(rl_temp_offset);
     }
   } else {
@@ -1476,11 +1475,11 @@
   if (is_long) {
     rl_value = LoadValueWide(rl_src_value, kCoreReg);
     if (cu_->instruction_set == kX86) {
-      StoreBaseIndexedDisp(rl_object.reg, rl_offset.reg, 0, 0, rl_value.reg, k64, INVALID_SREG);
+      StoreBaseIndexedDisp(rl_object.reg, rl_offset.reg, 0, 0, rl_value.reg, k64);
     } else {
       RegStorage rl_temp_offset = AllocTemp();
       OpRegRegReg(kOpAdd, rl_temp_offset, rl_object.reg, rl_offset.reg);
-      StoreBaseDispWide(rl_temp_offset, 0, rl_value.reg);
+      StoreBaseDisp(rl_temp_offset, 0, rl_value.reg, k64);
       FreeTemp(rl_temp_offset);
     }
   } else {
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index e6911cd..fc6af29 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -123,7 +123,7 @@
   } else {
     DCHECK((rl_src.location == kLocDalvikFrame) ||
            (rl_src.location == kLocCompilerTemp));
-    LoadBaseDispWide(TargetReg(kSp), SRegOffset(rl_src.s_reg_low), r_dest, INVALID_SREG);
+    LoadBaseDisp(TargetReg(kSp), SRegOffset(rl_src.s_reg_low), r_dest, k64);
   }
 }
 
@@ -258,7 +258,7 @@
     def_start = last_lir_insn_;
     DCHECK_EQ((mir_graph_->SRegToVReg(rl_dest.s_reg_low)+1),
               mir_graph_->SRegToVReg(GetSRegHi(rl_dest.s_reg_low)));
-    StoreBaseDispWide(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg);
+    StoreBaseDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg, k64);
     MarkClean(rl_dest);
     def_end = last_lir_insn_;
     MarkDefWide(rl_dest, def_start, def_end);
@@ -320,7 +320,7 @@
     LIR *def_start = last_lir_insn_;
     DCHECK_EQ((mir_graph_->SRegToVReg(rl_dest.s_reg_low)+1),
               mir_graph_->SRegToVReg(GetSRegHi(rl_dest.s_reg_low)));
-    StoreBaseDispWide(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg);
+    StoreBaseDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg, k64);
     MarkClean(rl_dest);
     LIR *def_end = last_lir_insn_;
     MarkDefWide(rl_dest, def_start, def_end);
diff --git a/compiler/dex/quick/mips/assemble_mips.cc b/compiler/dex/quick/mips/assemble_mips.cc
index baae319..b26ab57 100644
--- a/compiler/dex/quick/mips/assemble_mips.cc
+++ b/compiler/dex/quick/mips/assemble_mips.cc
@@ -748,7 +748,7 @@
   int offset = AssignInsnOffsets();
 
   /* Const values have to be word aligned */
-  offset = (offset + 3) & ~3;
+  offset = RoundUp(offset, 4);
 
   /* Set up offsets for literals */
   data_offset_ = offset;
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index 7a8376e..20fd4b1 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -32,22 +32,20 @@
     bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
     LIR* CheckSuspendUsingLoad() OVERRIDE;
     RegStorage LoadHelper(ThreadOffset<4> offset);
-    LIR* LoadBaseDisp(int r_base, int displacement, int r_dest, OpSize size, int s_reg);
-    LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size,
-                      int s_reg);
-    LIR* LoadBaseDispWide(RegStorage r_base, int displacement, RegStorage r_dest, int s_reg);
+    LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
+                      OpSize size) OVERRIDE;
     LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale,
-                         OpSize size);
+                         OpSize size) OVERRIDE;
     LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
-                             RegStorage r_dest, OpSize size, int s_reg);
+                             RegStorage r_dest, OpSize size) OVERRIDE;
     LIR* LoadConstantNoClobber(RegStorage r_dest, int value);
     LIR* LoadConstantWide(RegStorage r_dest, int64_t value);
-    LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, OpSize size);
-    LIR* StoreBaseDispWide(RegStorage r_base, int displacement, RegStorage r_src);
+    LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
+                       OpSize size) OVERRIDE;
     LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale,
-                          OpSize size);
+                          OpSize size) OVERRIDE;
     LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
-                              RegStorage r_src, OpSize size, int s_reg);
+                              RegStorage r_src, OpSize size) OVERRIDE;
     void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
 
     // Required for target - register utilities.
@@ -173,7 +171,7 @@
 
     // TODO: collapse r_dest.
     LIR* LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest,
-                          RegStorage r_dest_hi, OpSize size, int s_reg);
+                          RegStorage r_dest_hi, OpSize size);
     // TODO: collapse r_src.
     LIR* StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src,
                            RegStorage r_src_hi, OpSize size);
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index 1410e14..fdfe7fe 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -290,7 +290,7 @@
   RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   DCHECK(size == kSignedByte);
-  LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, INVALID_SREG);
+  LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size);
   StoreValue(rl_dest, rl_result);
   return true;
 }
@@ -511,7 +511,7 @@
       GenArrayBoundsCheck(rl_index.reg, reg_len);
       FreeTemp(reg_len);
     }
-    LoadBaseDispWide(reg_ptr, 0, rl_result.reg, INVALID_SREG);
+    LoadBaseDisp(reg_ptr, 0, rl_result.reg, size);
 
     FreeTemp(reg_ptr);
     StoreValueWide(rl_dest, rl_result);
@@ -589,7 +589,7 @@
       FreeTemp(reg_len);
     }
 
-    StoreBaseDispWide(reg_ptr, 0, rl_src.reg);
+    StoreBaseDisp(reg_ptr, 0, rl_src.reg, size);
   } else {
     rl_src = LoadValue(rl_src, reg_class);
     if (needs_range_check) {
diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc
index 50b945a..8397411 100644
--- a/compiler/dex/quick/mips/utility_mips.cc
+++ b/compiler/dex/quick/mips/utility_mips.cc
@@ -448,7 +448,7 @@
 
 // FIXME: don't split r_dest into 2 containers.
 LIR* MipsMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest,
-                                   RegStorage r_dest_hi, OpSize size, int s_reg) {
+                                   RegStorage r_dest_hi, OpSize size) {
 /*
  * Load value from base + displacement.  Optionally perform null check
  * on base (which must have an associated s_reg and MIR).  If not
@@ -546,20 +546,19 @@
 }
 
 LIR* MipsMir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
-                               OpSize size, int s_reg) {
+                               OpSize size) {
   // TODO: base this on target.
   if (size == kWord) {
     size = k32;
   }
-  return LoadBaseDispBody(r_base, displacement, r_dest, RegStorage::InvalidReg(), size,
-                          s_reg);
+  if (size == k64 || size == kDouble) {
+    return LoadBaseDispBody(r_base, displacement, r_dest.GetLow(), r_dest.GetHigh(), size);
+  } else {
+    return LoadBaseDispBody(r_base, displacement, r_dest, RegStorage::InvalidReg(), size);
+  }
 }
 
-LIR* MipsMir2Lir::LoadBaseDispWide(RegStorage r_base, int displacement, RegStorage r_dest,
-                                   int s_reg) {
-  return LoadBaseDispBody(r_base, displacement, r_dest.GetLow(), r_dest.GetHigh(), k64, s_reg);
-}
-
+// FIXME: don't split r_dest into 2 containers.
 LIR* MipsMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement,
                                     RegStorage r_src, RegStorage r_src_hi, OpSize size) {
   LIR *res;
@@ -647,11 +646,11 @@
   if (size == kWord) {
     size = k32;
   }
-  return StoreBaseDispBody(r_base, displacement, r_src, RegStorage::InvalidReg(), size);
-}
-
-LIR* MipsMir2Lir::StoreBaseDispWide(RegStorage r_base, int displacement, RegStorage r_src) {
-  return StoreBaseDispBody(r_base, displacement, r_src.GetLow(), r_src.GetHigh(), k64);
+  if (size == k64 || size == kDouble) {
+    return StoreBaseDispBody(r_base, displacement, r_src.GetLow(), r_src.GetHigh(), size);
+  } else {
+    return StoreBaseDispBody(r_base, displacement, r_src, RegStorage::InvalidReg(), size);
+  }
 }
 
 LIR* MipsMir2Lir::OpThreadMem(OpKind op, ThreadOffset<4> thread_offset) {
@@ -665,7 +664,7 @@
 }
 
 LIR* MipsMir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
-                                       int displacement, RegStorage r_src, OpSize size, int s_reg) {
+                                       int displacement, RegStorage r_src, OpSize size) {
   LOG(FATAL) << "Unexpected use of StoreBaseIndexedDisp for MIPS";
   return NULL;
 }
@@ -676,7 +675,7 @@
 }
 
 LIR* MipsMir2Lir::LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
-                                      int displacement, RegStorage r_dest, OpSize size, int s_reg) {
+                                      int displacement, RegStorage r_dest, OpSize size) {
   LOG(FATAL) << "Unexpected use of LoadBaseIndexedDisp for MIPS";
   return NULL;
 }
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index c9e1950..d10296f 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -59,7 +59,7 @@
       RegStorage new_regs = AllocTypedTempWide(false, kAnyReg);
       reg_arg_low = new_regs.GetLow();
       reg_arg_high = new_regs.GetHigh();
-      LoadBaseDispWide(TargetReg(kSp), offset, new_regs, INVALID_SREG);
+      LoadBaseDisp(TargetReg(kSp), offset, new_regs, k64);
     } else {
       reg_arg_high = AllocTemp();
       int offset_high = offset + sizeof(uint32_t);
@@ -112,7 +112,7 @@
       OpRegCopy(rl_dest.reg.GetHigh(), reg_arg_high);
       Load32Disp(TargetReg(kSp), offset, rl_dest.reg.GetLow());
     } else {
-      LoadBaseDispWide(TargetReg(kSp), offset, rl_dest.reg, INVALID_SREG);
+      LoadBaseDisp(TargetReg(kSp), offset, rl_dest.reg, k64);
     }
   }
 }
@@ -126,6 +126,9 @@
   }
 
   bool wide = (data.op_variant == InlineMethodAnalyser::IGetVariant(Instruction::IGET_WIDE));
+  bool ref = (data.op_variant == InlineMethodAnalyser::IGetVariant(Instruction::IGET_OBJECT));
+  OpSize size = LoadStoreOpSize(wide, ref);
+
   // The inliner doesn't distinguish kDouble or kFloat, use shorty.
   bool double_or_float = cu_->shorty[0] == 'F' || cu_->shorty[0] == 'D';
 
@@ -134,11 +137,7 @@
   LockArg(data.object_arg);
   RegLocation rl_dest = wide ? GetReturnWide(double_or_float) : GetReturn(double_or_float);
   RegStorage reg_obj = LoadArg(data.object_arg);
-  if (wide) {
-    LoadBaseDispWide(reg_obj, data.field_offset, rl_dest.reg, INVALID_SREG);
-  } else {
-    Load32Disp(reg_obj, data.field_offset, rl_dest.reg);
-  }
+  LoadBaseDisp(reg_obj, data.field_offset, rl_dest.reg, size);
   if (data.is_volatile) {
     // Without context sensitive analysis, we must issue the most conservative barriers.
     // In this case, either a load or store may follow so we issue both barriers.
@@ -161,6 +160,8 @@
   }
 
   bool wide = (data.op_variant == InlineMethodAnalyser::IPutVariant(Instruction::IPUT_WIDE));
+  bool ref = (data.op_variant == InlineMethodAnalyser::IGetVariant(Instruction::IGET_OBJECT));
+  OpSize size = LoadStoreOpSize(wide, ref);
 
   // Point of no return - no aborts after this
   GenPrintLabel(mir);
@@ -172,16 +173,12 @@
     // There might have been a store before this volatile one so insert StoreStore barrier.
     GenMemBarrier(kStoreStore);
   }
-  if (wide) {
-    StoreBaseDispWide(reg_obj, data.field_offset, reg_src);
-  } else {
-    Store32Disp(reg_obj, data.field_offset, reg_src);
-  }
+  StoreBaseDisp(reg_obj, data.field_offset, reg_src, size);
   if (data.is_volatile) {
     // A load might follow the volatile store so insert a StoreLoad barrier.
     GenMemBarrier(kStoreLoad);
   }
-  if (data.op_variant == InlineMethodAnalyser::IPutVariant(Instruction::IPUT_OBJECT)) {
+  if (ref) {
     MarkGCCard(reg_src, reg_obj);
   }
   return true;
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index cb4396f..4b1de4b 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -91,6 +91,7 @@
 
 // Common combo register usage patterns.
 #define REG_DEF01            (REG_DEF0 | REG_DEF1)
+#define REG_DEF012           (REG_DEF0 | REG_DEF1 | REG_DEF2)
 #define REG_DEF01_USE2       (REG_DEF0 | REG_DEF1 | REG_USE2)
 #define REG_DEF0_USE01       (REG_DEF0 | REG_USE01)
 #define REG_DEF0_USE0        (REG_DEF0 | REG_USE0)
@@ -167,6 +168,8 @@
 // Target-specific initialization.
 Mir2Lir* ArmCodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
                           ArenaAllocator* const arena);
+Mir2Lir* Arm64CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
+                            ArenaAllocator* const arena);
 Mir2Lir* MipsCodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
                           ArenaAllocator* const arena);
 Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
@@ -783,7 +786,7 @@
                                                             bool safepoint_pc);
     void GenInvoke(CallInfo* info);
     void GenInvokeNoInline(CallInfo* info);
-    void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
+    virtual void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
     int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
                              NextCallInsn next_call_insn,
                              const MethodReference& target_method,
@@ -830,7 +833,7 @@
     bool GenInlinedUnsafeGet(CallInfo* info, bool is_long, bool is_volatile);
     bool GenInlinedUnsafePut(CallInfo* info, bool is_long, bool is_object,
                              bool is_volatile, bool is_ordered);
-    int LoadArgRegs(CallInfo* info, int call_state,
+    virtual int LoadArgRegs(CallInfo* info, int call_state,
                     NextCallInsn next_call_insn,
                     const MethodReference& target_method,
                     uint32_t vtable_idx,
@@ -843,15 +846,15 @@
     LIR* LoadConstant(RegStorage r_dest, int value);
     // Natural word size.
     LIR* LoadWordDisp(RegStorage r_base, int displacement, RegStorage r_dest) {
-      return LoadBaseDisp(r_base, displacement, r_dest, kWord, INVALID_SREG);
+      return LoadBaseDisp(r_base, displacement, r_dest, kWord);
     }
     // Load 32 bits, regardless of target.
     LIR* Load32Disp(RegStorage r_base, int displacement, RegStorage r_dest)  {
-      return LoadBaseDisp(r_base, displacement, r_dest, k32, INVALID_SREG);
+      return LoadBaseDisp(r_base, displacement, r_dest, k32);
     }
     // Load a reference at base + displacement and decompress into register.
     LIR* LoadRefDisp(RegStorage r_base, int displacement, RegStorage r_dest) {
-      return LoadBaseDisp(r_base, displacement, r_dest, kReference, INVALID_SREG);
+      return LoadBaseDisp(r_base, displacement, r_dest, kReference);
     }
     // Load Dalvik value with 32-bit memory storage.  If compressed object reference, decompress.
     RegLocation LoadValue(RegLocation rl_src, RegisterClass op_kind);
@@ -975,25 +978,20 @@
     virtual bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) = 0;
     virtual LIR* CheckSuspendUsingLoad() = 0;
     virtual RegStorage LoadHelper(ThreadOffset<4> offset) = 0;
-    virtual LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size,
-                              int s_reg) = 0;
-    virtual LIR* LoadBaseDispWide(RegStorage r_base, int displacement, RegStorage r_dest,
-                                  int s_reg) = 0;
+    virtual LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
+                              OpSize size) = 0;
     virtual LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest,
                                  int scale, OpSize size) = 0;
     virtual LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
-                                     int displacement, RegStorage r_dest, OpSize size,
-                                     int s_reg) = 0;
+                                     int displacement, RegStorage r_dest, OpSize size) = 0;
     virtual LIR* LoadConstantNoClobber(RegStorage r_dest, int value) = 0;
     virtual LIR* LoadConstantWide(RegStorage r_dest, int64_t value) = 0;
     virtual LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
                                OpSize size) = 0;
-    virtual LIR* StoreBaseDispWide(RegStorage r_base, int displacement, RegStorage r_src) = 0;
     virtual LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
                                   int scale, OpSize size) = 0;
     virtual LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
-                                      int displacement, RegStorage r_src, OpSize size,
-                                      int s_reg) = 0;
+                                      int displacement, RegStorage r_src, OpSize size) = 0;
     virtual void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) = 0;
 
     // Required for target - register utilities.
@@ -1263,6 +1261,10 @@
      */
     RegLocation ForceTempWide(RegLocation loc);
 
+    static constexpr OpSize LoadStoreOpSize(bool wide, bool ref) {
+      return wide ? k64 : ref ? kReference : k32;
+    }
+
     virtual void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
                                     RegLocation rl_dest, RegLocation rl_src);
 
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index a39611e..76553af 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -634,14 +634,14 @@
         info1 = info2;
       }
       int v_reg = mir_graph_->SRegToVReg(info1->SReg());
-      StoreBaseDispWide(TargetReg(kSp), VRegOffset(v_reg), reg);
+      StoreBaseDisp(TargetReg(kSp), VRegOffset(v_reg), reg, k64);
     }
   } else {
     RegisterInfo* info = GetRegInfo(reg);
     if (info->IsLive() && info->IsDirty()) {
       info->SetIsDirty(false);
       int v_reg = mir_graph_->SRegToVReg(info->SReg());
-      StoreBaseDispWide(TargetReg(kSp), VRegOffset(v_reg), reg);
+      StoreBaseDisp(TargetReg(kSp), VRegOffset(v_reg), reg, k64);
     }
   }
 }
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 58e2f42..b8481e2 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -25,7 +25,7 @@
 const X86EncodingMap X86Mir2Lir::EncodingMap[kX86Last] = {
   { kX8632BitData, kData,    IS_UNARY_OP,            { 0, 0, 0x00, 0, 0, 0, 0, 4 }, "data",  "0x!0d" },
   { kX86Bkpt,      kNullary, NO_OPERAND | IS_BRANCH, { 0, 0, 0xCC, 0, 0, 0, 0, 0 }, "int 3", "" },
-  { kX86Nop,       kNop,     IS_UNARY_OP,            { 0, 0, 0x90, 0, 0, 0, 0, 0 }, "nop",   "" },
+  { kX86Nop,       kNop,     NO_OPERAND,             { 0, 0, 0x90, 0, 0, 0, 0, 0 }, "nop",   "" },
 
 #define ENCODING_MAP(opname, mem_use, reg_def, uses_ccodes, \
                      rm8_r8, rm32_r32, \
@@ -175,12 +175,14 @@
   { kX86Mov32AI, kArrayImm,  IS_STORE | IS_QUIN_OP     | REG_USE01,      { 0,             0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32AI", "[!0r+!1r<<!2d+!3d],!4d" },
   { kX86Mov32TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, 0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32TI", "fs:[!0d],!1d" },
 
-  { kX86Lea32RM, kRegMem, IS_TERTIARY_OP | IS_LOAD | REG_DEF0_USE1, { 0, 0, 0x8D, 0, 0, 0, 0, 0 }, "Lea32RM", "!0r,[!1r+!2d]" },
+  { kX86Lea32RM, kRegMem, IS_TERTIARY_OP | IS_LOAD | REG_DEF0_USE1,      { 0, 0, 0x8D, 0, 0, 0, 0, 0 }, "Lea32RM", "!0r,[!1r+!2d]" },
 
   { kX86Lea32RA, kRegArray, IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8D, 0, 0, 0, 0, 0 }, "Lea32RA", "!0r,[!1r+!2r<<!3d+!4d]" },
 
   { kX86Cmov32RRC, kRegRegCond, IS_TERTIARY_OP | REG_DEF0_USE01 | USES_CCODES, {0, 0, 0x0F, 0x40, 0, 0, 0, 0}, "Cmovcc32RR", "!2c !0r,!1r" },
 
+  { kX86Cmov32RMC, kRegMemCond, IS_QUAD_OP | IS_LOAD | REG_DEF0_USE01 | USES_CCODES, {0, 0, 0x0F, 0x40, 0, 0, 0, 0}, "Cmovcc32RM", "!3c !0r,[!1r+!2d]" },
+
 #define SHIFT_ENCODING_MAP(opname, modrm_opcode) \
 { kX86 ## opname ## 8RI, kShiftRegImm,                        IS_BINARY_OP   | REG_DEF0_USE0 |            SETS_CCODES, { 0,    0, 0xC0, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "8RI", "!0r,!1d" }, \
 { kX86 ## opname ## 8MI, kShiftMemImm,   IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      |            SETS_CCODES, { 0,    0, 0xC0, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "8MI", "[!0r+!1d],!2d" }, \
@@ -213,8 +215,10 @@
 #undef SHIFT_ENCODING_MAP
 
   { kX86Cmc, kNullary, NO_OPERAND, { 0, 0, 0xF5, 0, 0, 0, 0, 0}, "Cmc", "" },
-  { kX86Shld32RRI,  kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES, { 0,    0, 0x0F, 0xA4, 0, 0, 0, 1}, "Shld32", "!0r,!1r,!2d" },
-  { kX86Shrd32RRI,  kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES, { 0,    0, 0x0F, 0xAC, 0, 0, 0, 1}, "Shrd32", "!0r,!1r,!2d" },
+  { kX86Shld32RRI,  kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES, { 0,    0, 0x0F, 0xA4, 0, 0, 0, 1}, "Shld32RRI", "!0r,!1r,!2d" },
+  { kX86Shld32MRI,  kMemRegImm,    IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { 0,    0, 0x0F, 0xA4, 0, 0, 0, 1}, "Shld32MRI", "[!0r+!1d],!2r,!3d" },
+  { kX86Shrd32RRI,  kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES, { 0,    0, 0x0F, 0xAC, 0, 0, 0, 1}, "Shrd32RRI", "!0r,!1r,!2d" },
+  { kX86Shrd32MRI,  kMemRegImm,    IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { 0,    0, 0x0F, 0xAC, 0, 0, 0, 1}, "Shrd32MRI", "[!0r+!1d],!2r,!3d" },
 
   { kX86Test8RI,  kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1}, "Test8RI", "!0r,!1d" },
   { kX86Test8MI,  kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1}, "Test8MI", "[!0r+!1d],!2d" },
@@ -233,15 +237,15 @@
                            arr, arr_kind, arr_flags, imm, \
                            b_flags, hw_flags, w_flags, \
                            b_format, hw_format, w_format) \
-{ kX86 ## opname ## 8 ## reg,  reg_kind,                      reg_flags | b_flags  | sets_ccodes, { 0,    0, 0xF6, 0, 0, modrm, 0, imm << 0}, #opname "8" #reg, #b_format "!0r" }, \
-{ kX86 ## opname ## 8 ## mem,  mem_kind, IS_LOAD | is_store | mem_flags | b_flags  | sets_ccodes, { 0,    0, 0xF6, 0, 0, modrm, 0, imm << 0}, #opname "8" #mem, #b_format "[!0r+!1d]" }, \
-{ kX86 ## opname ## 8 ## arr,  arr_kind, IS_LOAD | is_store | arr_flags | b_flags  | sets_ccodes, { 0,    0, 0xF6, 0, 0, modrm, 0, imm << 0}, #opname "8" #arr, #b_format "[!0r+!1r<<!2d+!3d]" }, \
-{ kX86 ## opname ## 16 ## reg, reg_kind,                      reg_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1}, #opname "16" #reg, #hw_format "!0r" }, \
-{ kX86 ## opname ## 16 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1}, #opname "16" #mem, #hw_format "[!0r+!1d]" }, \
-{ kX86 ## opname ## 16 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1}, #opname "16" #arr, #hw_format "[!0r+!1r<<!2d+!3d]" }, \
-{ kX86 ## opname ## 32 ## reg, reg_kind,                      reg_flags | w_flags  | sets_ccodes, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #reg, #w_format "!0r" }, \
-{ kX86 ## opname ## 32 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | w_flags  | sets_ccodes, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #mem, #w_format "[!0r+!1d]" }, \
-{ kX86 ## opname ## 32 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | w_flags  | sets_ccodes, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #arr, #w_format "[!0r+!1r<<!2d+!3d]" }
+{ kX86 ## opname ## 8 ## reg,  reg_kind,                      reg_flags | b_flags  | sets_ccodes, { 0,    0, 0xF6, 0, 0, modrm, 0, imm << 0}, #opname "8" #reg, b_format "!0r" }, \
+{ kX86 ## opname ## 8 ## mem,  mem_kind, IS_LOAD | is_store | mem_flags | b_flags  | sets_ccodes, { 0,    0, 0xF6, 0, 0, modrm, 0, imm << 0}, #opname "8" #mem, b_format "[!0r+!1d]" }, \
+{ kX86 ## opname ## 8 ## arr,  arr_kind, IS_LOAD | is_store | arr_flags | b_flags  | sets_ccodes, { 0,    0, 0xF6, 0, 0, modrm, 0, imm << 0}, #opname "8" #arr, b_format "[!0r+!1r<<!2d+!3d]" }, \
+{ kX86 ## opname ## 16 ## reg, reg_kind,                      reg_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1}, #opname "16" #reg, hw_format "!0r" }, \
+{ kX86 ## opname ## 16 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1}, #opname "16" #mem, hw_format "[!0r+!1d]" }, \
+{ kX86 ## opname ## 16 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1}, #opname "16" #arr, hw_format "[!0r+!1r<<!2d+!3d]" }, \
+{ kX86 ## opname ## 32 ## reg, reg_kind,                      reg_flags | w_flags  | sets_ccodes, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #reg, w_format "!0r" }, \
+{ kX86 ## opname ## 32 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | w_flags  | sets_ccodes, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #mem, w_format "[!0r+!1d]" }, \
+{ kX86 ## opname ## 32 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | w_flags  | sets_ccodes, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #arr, w_format "[!0r+!1r<<!2d+!3d]" }
 
   UNARY_ENCODING_MAP(Not, 0x2, IS_STORE, 0,           R, kReg, IS_UNARY_OP | REG_DEF0_USE0, M, kMem, IS_BINARY_OP | REG_USE0, A, kArray, IS_QUAD_OP | REG_USE01, 0, 0, 0, 0, "", "", ""),
   UNARY_ENCODING_MAP(Neg, 0x3, IS_STORE, SETS_CCODES, R, kReg, IS_UNARY_OP | REG_DEF0_USE0, M, kMem, IS_BINARY_OP | REG_USE0, A, kArray, IS_QUAD_OP | REG_USE01, 0, 0, 0, 0, "", "", ""),
@@ -258,9 +262,9 @@
   { kX86Pop32R,   kRegOpcode, IS_UNARY_OP | REG_DEF0 | REG_USE_SP | REG_DEF_SP | IS_LOAD,  { 0, 0, 0x58, 0,    0, 0, 0, 0 }, "Pop32R",   "!0r" },
 
 #define EXT_0F_ENCODING_MAP(opname, prefix, opcode, reg_def) \
-{ kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP   | reg_def | REG_USE01,  { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RR", "!0r,!1r" }, \
-{ kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01,  { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RM", "!0r,[!1r+!2d]" }, \
-{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE012, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
+{ kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP   | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RR", "!0r,!1r" }, \
+{ kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RM", "!0r,[!1r+!2d]" }, \
+{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE12, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
 
   EXT_0F_ENCODING_MAP(Movsd, 0xF2, 0x10, REG_DEF0),
   { kX86MovsdMR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0xF2, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovsdMR", "[!0r+!1d],!2r" },
@@ -276,23 +280,23 @@
   EXT_0F_ENCODING_MAP(Cvttss2si, 0xF3, 0x2C, REG_DEF0),
   EXT_0F_ENCODING_MAP(Cvtsd2si,  0xF2, 0x2D, REG_DEF0),
   EXT_0F_ENCODING_MAP(Cvtss2si,  0xF3, 0x2D, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Ucomisd,   0x66, 0x2E, SETS_CCODES),
-  EXT_0F_ENCODING_MAP(Ucomiss,   0x00, 0x2E, SETS_CCODES),
-  EXT_0F_ENCODING_MAP(Comisd,    0x66, 0x2F, SETS_CCODES),
-  EXT_0F_ENCODING_MAP(Comiss,    0x00, 0x2F, SETS_CCODES),
-  EXT_0F_ENCODING_MAP(Orps,      0x00, 0x56, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Xorps,     0x00, 0x57, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Addsd,     0xF2, 0x58, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Addss,     0xF3, 0x58, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Mulsd,     0xF2, 0x59, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Mulss,     0xF3, 0x59, REG_DEF0),
+  EXT_0F_ENCODING_MAP(Ucomisd,   0x66, 0x2E, SETS_CCODES|REG_USE0),
+  EXT_0F_ENCODING_MAP(Ucomiss,   0x00, 0x2E, SETS_CCODES|REG_USE0),
+  EXT_0F_ENCODING_MAP(Comisd,    0x66, 0x2F, SETS_CCODES|REG_USE0),
+  EXT_0F_ENCODING_MAP(Comiss,    0x00, 0x2F, SETS_CCODES|REG_USE0),
+  EXT_0F_ENCODING_MAP(Orps,      0x00, 0x56, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Xorps,     0x00, 0x57, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Addsd,     0xF2, 0x58, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Addss,     0xF3, 0x58, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Mulsd,     0xF2, 0x59, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Mulss,     0xF3, 0x59, REG_DEF0_USE0),
   EXT_0F_ENCODING_MAP(Cvtsd2ss,  0xF2, 0x5A, REG_DEF0),
   EXT_0F_ENCODING_MAP(Cvtss2sd,  0xF3, 0x5A, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Subsd,     0xF2, 0x5C, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Subss,     0xF3, 0x5C, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Divsd,     0xF2, 0x5E, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Divss,     0xF3, 0x5E, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Punpckldq, 0x66, 0x62, REG_DEF0),
+  EXT_0F_ENCODING_MAP(Subsd,     0xF2, 0x5C, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Subss,     0xF3, 0x5C, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Divsd,     0xF2, 0x5E, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Divss,     0xF3, 0x5E, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Punpckldq, 0x66, 0x62, REG_DEF0_USE0),
 
   { kX86PsrlqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 2, 0, 1 }, "PsrlqRI", "!0r,!1d" },
   { kX86PsllqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 6, 0, 1 }, "PsllqRI", "!0r,!1d" },
@@ -322,7 +326,7 @@
   { kX86MovhpsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x0, 0, 0x0F, 0x17, 0, 0, 0, 0 }, "MovhpsAR", "[!0r+!1r<<!2d+!3d],!4r" },
 
   EXT_0F_ENCODING_MAP(Movdxr,    0x66, 0x6E, REG_DEF0),
-  { kX86MovdrxRR, kRegRegStore, IS_BINARY_OP | REG_DEF0   | REG_USE01,  { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxRR", "!0r,!1r" },
+  { kX86MovdrxRR, kRegRegStore, IS_BINARY_OP | REG_DEF0   | REG_USE1,   { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxRR", "!0r,!1r" },
   { kX86MovdrxMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxMR", "[!0r+!1d],!2r" },
   { kX86MovdrxAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxAR", "[!0r+!1r<<!2d+!3d],!4r" },
 
@@ -334,8 +338,8 @@
   // Encode the modrm opcode as an extra opcode byte to avoid computation during assembly.
   { kX86Mfence, kReg,                 NO_OPERAND,     { 0, 0, 0x0F, 0xAE, 0, 6, 0, 0 }, "Mfence", "" },
 
-  EXT_0F_ENCODING_MAP(Imul16,  0x66, 0xAF, REG_DEF0 | SETS_CCODES),
-  EXT_0F_ENCODING_MAP(Imul32,  0x00, 0xAF, REG_DEF0 | SETS_CCODES),
+  EXT_0F_ENCODING_MAP(Imul16,  0x66, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES),
+  EXT_0F_ENCODING_MAP(Imul32,  0x00, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES),
 
   { kX86CmpxchgRR, kRegRegStore, IS_BINARY_OP | REG_DEF0 | REG_USE01 | REG_DEFA_USEA | SETS_CCODES, { 0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Cmpxchg", "!0r,!1r" },
   { kX86CmpxchgMR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02 | REG_DEFA_USEA | SETS_CCODES, { 0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Cmpxchg", "[!0r+!1d],!2r" },
@@ -369,7 +373,7 @@
   { kX86StartOfMethod, kMacro,  IS_UNARY_OP | SETS_CCODES,             { 0, 0, 0,    0, 0, 0, 0, 0 }, "StartOfMethod", "!0r" },
   { kX86PcRelLoadRA,   kPcRel,  IS_LOAD | IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8B, 0, 0, 0, 0, 0 }, "PcRelLoadRA",   "!0r,[!1r+!2r<<!3d+!4p]" },
   { kX86PcRelAdr,      kPcRel,  IS_LOAD | IS_BINARY_OP | REG_DEF0,     { 0, 0, 0xB8, 0, 0, 0, 0, 4 }, "PcRelAdr",      "!0r,!1d" },
-  { kX86RepneScasw, kPrefix2Nullary, NO_OPERAND | SETS_CCODES,         { 0x66, 0xF2, 0xAF, 0, 0, 0, 0, 0 }, "RepNE ScasW", "" },
+  { kX86RepneScasw, kPrefix2Nullary, NO_OPERAND | REG_USEA | REG_USEC | SETS_CCODES, { 0x66, 0xF2, 0xAF, 0, 0, 0, 0, 0 }, "RepNE ScasW", "" },
 };
 
 static size_t ComputeSize(const X86EncodingMap* entry, int base, int displacement, bool has_sib) {
@@ -425,6 +429,8 @@
       return ComputeSize(entry, lir->operands[0], lir->operands[3], true);
     case kMemReg:  // lir operands - 0: base, 1: disp, 2: reg
       return ComputeSize(entry, lir->operands[0], lir->operands[1], false);
+    case kMemRegImm:  // lir operands - 0: base, 1: disp, 2: reg 3: immediate
+      return ComputeSize(entry, lir->operands[0], lir->operands[1], false);
     case kArrayReg:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg
       return ComputeSize(entry, lir->operands[0], lir->operands[3], true);
     case kThreadReg:  // lir operands - 0: disp, 1: reg
@@ -489,6 +495,8 @@
       return ComputeSize(entry, lir->operands[0], lir->operands[3], true);
     case kRegRegCond:  // lir operands - 0: reg, 1: reg, 2: cond
       return ComputeSize(entry, 0, 0, false);
+    case kRegMemCond:  // lir operands - 0: reg, 1: reg, 2: disp, 3:cond
+      return ComputeSize(entry, lir->operands[1], lir->operands[2], false);
     case kJcc:
       if (lir->opcode == kX86Jcc8) {
         return 2;  // opcode + rel8
@@ -729,6 +737,14 @@
   EmitRegArray(entry, reg, base, index, scale, disp);
 }
 
+void X86Mir2Lir::EmitArrayImm(const X86EncodingMap* entry, uint8_t base, uint8_t index, int scale,
+                              int disp, int32_t imm) {
+  EmitPrefixAndOpcode(entry);
+  EmitModrmSibDisp(entry->skeleton.modrm_opcode, base, index, scale, disp);
+  DCHECK_EQ(0, entry->skeleton.ax_opcode);
+  EmitImm(entry, imm);
+}
+
 void X86Mir2Lir::EmitRegThread(const X86EncodingMap* entry, uint8_t reg, int disp) {
   DCHECK_NE(entry->skeleton.prefix1, 0);
   EmitPrefixAndOpcode(entry);
@@ -788,6 +804,11 @@
   EmitImm(entry, imm);
 }
 
+void X86Mir2Lir::EmitMemRegImm(const X86EncodingMap* entry,
+                               uint8_t base, int disp, uint8_t reg, int32_t imm) {
+  EmitRegMemImm(entry, reg, base, disp, imm);
+}
+
 void X86Mir2Lir::EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) {
   if (entry->skeleton.prefix1 != 0) {
     code_buffer_.push_back(entry->skeleton.prefix1);
@@ -889,6 +910,26 @@
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
+void X86Mir2Lir::EmitShiftMemImm(const X86EncodingMap* entry, uint8_t base,
+                                int displacement, int imm) {
+  EmitPrefix(entry);
+  if (imm != 1) {
+    code_buffer_.push_back(entry->skeleton.opcode);
+  } else {
+    // Shorter encoding for 1 bit shift
+    code_buffer_.push_back(entry->skeleton.ax_opcode);
+  }
+  DCHECK_NE(0x0F, entry->skeleton.opcode);
+  DCHECK_EQ(0, entry->skeleton.extra_opcode1);
+  DCHECK_EQ(0, entry->skeleton.extra_opcode2);
+  EmitModrmDisp(entry->skeleton.modrm_opcode, base, displacement);
+  if (imm != 1) {
+    DCHECK_EQ(entry->skeleton.immediate_bytes, 1);
+    DCHECK(IS_SIMM8(imm));
+    code_buffer_.push_back(imm & 0xFF);
+  }
+}
+
 void X86Mir2Lir::EmitRegCond(const X86EncodingMap* entry, uint8_t reg, uint8_t condition) {
   if (entry->skeleton.prefix1 != 0) {
     code_buffer_.push_back(entry->skeleton.prefix1);
@@ -910,6 +951,25 @@
   DCHECK_EQ(entry->skeleton.immediate_bytes, 0);
 }
 
+void X86Mir2Lir::EmitMemCond(const X86EncodingMap* entry, uint8_t base, int displacement, uint8_t condition) {
+  if (entry->skeleton.prefix1 != 0) {
+    code_buffer_.push_back(entry->skeleton.prefix1);
+    if (entry->skeleton.prefix2 != 0) {
+      code_buffer_.push_back(entry->skeleton.prefix2);
+    }
+  } else {
+    DCHECK_EQ(0, entry->skeleton.prefix2);
+  }
+  DCHECK_EQ(0, entry->skeleton.ax_opcode);
+  DCHECK_EQ(0x0F, entry->skeleton.opcode);
+  code_buffer_.push_back(0x0F);
+  DCHECK_EQ(0x90, entry->skeleton.extra_opcode1);
+  code_buffer_.push_back(0x90 | condition);
+  DCHECK_EQ(0, entry->skeleton.extra_opcode2);
+  EmitModrmDisp(entry->skeleton.modrm_opcode, base, displacement);
+  DCHECK_EQ(entry->skeleton.immediate_bytes, 0);
+}
+
 void X86Mir2Lir::EmitRegRegCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2,
                                 uint8_t condition) {
   // Generate prefix and opcode without the condition
@@ -935,6 +995,24 @@
   code_buffer_.push_back(modrm);
 }
 
+void X86Mir2Lir::EmitRegMemCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t base, int displacement, uint8_t condition) {
+  // Generate prefix and opcode without the condition
+  EmitPrefixAndOpcode(entry);
+
+  // Now add the condition. The last byte of opcode is the one that receives it.
+  DCHECK_LE(condition, 0xF);
+  code_buffer_.back() += condition;
+
+  DCHECK_EQ(0, entry->skeleton.immediate_bytes);
+  DCHECK_EQ(0, entry->skeleton.modrm_opcode);
+
+  // Check that registers requested for encoding are sane.
+  DCHECK_LT(reg1, 8);
+  DCHECK_LT(base, 8);
+
+  EmitModrmDisp(reg1, base, displacement);
+}
+
 void X86Mir2Lir::EmitJmp(const X86EncodingMap* entry, int rel) {
   if (entry->opcode == kX86Jmp8) {
     DCHECK(IS_SIMM8(rel));
@@ -1254,6 +1332,10 @@
       case kMemImm:  // lir operands - 0: base, 1: disp, 2: immediate
         EmitMemImm(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
         break;
+      case kArrayImm:  // lir operands - 0: base, 1: index, 2: disp, 3:scale, 4:immediate
+        EmitArrayImm(entry, lir->operands[0], lir->operands[1], lir->operands[2],
+                     lir->operands[3], lir->operands[4]);
+        break;
       case kArrayReg:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg
         EmitArrayReg(entry, lir->operands[0], lir->operands[1], lir->operands[2],
                      lir->operands[3], lir->operands[4]);
@@ -1277,6 +1359,10 @@
       case kRegRegImmRev:
         EmitRegRegImmRev(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
         break;
+      case kMemRegImm:
+        EmitMemRegImm(entry, lir->operands[0], lir->operands[1], lir->operands[2],
+                      lir->operands[3]);
+        break;
       case kRegRegImm:
         EmitRegRegImm(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
         break;
@@ -1296,6 +1382,9 @@
       case kShiftRegImm:  // lir operands - 0: reg, 1: immediate
         EmitShiftRegImm(entry, lir->operands[0], lir->operands[1]);
         break;
+      case kShiftMemImm:  // lir operands - 0: base, 1: disp, 2:immediate
+        EmitShiftMemImm(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
+        break;
       case kShiftRegCl:  // lir operands - 0: reg, 1: cl
         EmitShiftRegCl(entry, lir->operands[0], lir->operands[1]);
         break;
@@ -1305,9 +1394,15 @@
       case kRegCond:  // lir operands - 0: reg, 1: condition
         EmitRegCond(entry, lir->operands[0], lir->operands[1]);
         break;
+      case kMemCond:  // lir operands - 0: base, 1: displacement, 2: condition
+        EmitMemCond(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
+        break;
       case kRegRegCond:  // lir operands - 0: reg, 1: reg, 2: condition
         EmitRegRegCond(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
         break;
+      case kRegMemCond:  // lir operands - 0: reg, 1: reg, displacement, 3: condition
+        EmitRegMemCond(entry, lir->operands[0], lir->operands[1], lir->operands[2], lir->operands[3]);
+        break;
       case kJmp:  // lir operands - 0: rel
         if (entry->opcode == kX86JmpT) {
           // This works since the instruction format for jmp and call is basically the same and
@@ -1388,7 +1483,7 @@
   int offset = AssignInsnOffsets();
 
   /* Const values have to be word aligned */
-  offset = (offset + 3) & ~3;
+  offset = RoundUp(offset, 4);
 
   /* Set up offsets for literals */
   data_offset_ = offset;
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index c3ea55f..a03e5f2 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -32,21 +32,20 @@
     bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
     LIR* CheckSuspendUsingLoad() OVERRIDE;
     RegStorage LoadHelper(ThreadOffset<4> offset);
-    LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size,
-                      int s_reg);
-    LIR* LoadBaseDispWide(RegStorage r_base, int displacement, RegStorage r_dest, int s_reg);
+    LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
+                      OpSize size) OVERRIDE;
     LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale,
-                         OpSize size);
+                         OpSize size) OVERRIDE;
     LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
-                             RegStorage r_dest, OpSize size, int s_reg);
+                             RegStorage r_dest, OpSize size) OVERRIDE;
     LIR* LoadConstantNoClobber(RegStorage r_dest, int value);
     LIR* LoadConstantWide(RegStorage r_dest, int64_t value);
-    LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, OpSize size);
-    LIR* StoreBaseDispWide(RegStorage r_base, int displacement, RegStorage r_src);
+    LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
+                       OpSize size) OVERRIDE;
     LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale,
-                          OpSize size);
+                          OpSize size) OVERRIDE;
     LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
-                              RegStorage r_src, OpSize size, int s_reg);
+                              RegStorage r_src, OpSize size) OVERRIDE;
     void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
 
     // Required for target - register utilities.
@@ -331,19 +330,24 @@
                       int scale, int disp);
     void EmitArrayReg(const X86EncodingMap* entry, uint8_t base, uint8_t index, int scale, int disp,
                       uint8_t reg);
+    void EmitArrayImm(const X86EncodingMap* entry, uint8_t base, uint8_t index, int scale, int disp,
+                      int32_t imm);
     void EmitRegThread(const X86EncodingMap* entry, uint8_t reg, int disp);
     void EmitRegReg(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2);
     void EmitRegRegImm(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, int32_t imm);
     void EmitRegRegImmRev(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, int32_t imm);
     void EmitRegMemImm(const X86EncodingMap* entry, uint8_t reg1, uint8_t base, int disp,
                        int32_t imm);
+    void EmitMemRegImm(const X86EncodingMap* entry, uint8_t base, int disp, uint8_t reg1, int32_t imm);
     void EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm);
     void EmitThreadImm(const X86EncodingMap* entry, int disp, int imm);
     void EmitMovRegImm(const X86EncodingMap* entry, uint8_t reg, int imm);
     void EmitShiftRegImm(const X86EncodingMap* entry, uint8_t reg, int imm);
+    void EmitShiftMemImm(const X86EncodingMap* entry, uint8_t base, int disp, int imm);
     void EmitShiftMemCl(const X86EncodingMap* entry, uint8_t base, int displacement, uint8_t cl);
     void EmitShiftRegCl(const X86EncodingMap* entry, uint8_t reg, uint8_t cl);
     void EmitRegCond(const X86EncodingMap* entry, uint8_t reg, uint8_t condition);
+    void EmitMemCond(const X86EncodingMap* entry, uint8_t base, int displacement, uint8_t condition);
 
     /**
      * @brief Used for encoding conditional register to register operation.
@@ -354,6 +358,16 @@
      */
     void EmitRegRegCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, uint8_t condition);
 
+    /**
+     * @brief Used for encoding conditional register to memory operation.
+     * @param entry The entry in the encoding map for the opcode.
+     * @param reg1 The first physical register.
+     * @param base The memory base register.
+     * @param displacement The memory displacement.
+     * @param condition The condition code for operation.
+     */
+    void EmitRegMemCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t base, int displacement, uint8_t condition);
+
     void EmitJmp(const X86EncodingMap* entry, int rel);
     void EmitJcc(const X86EncodingMap* entry, int rel, uint8_t cc);
     void EmitCallMem(const X86EncodingMap* entry, uint8_t base, int disp);
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index 1ed0b63..d1c2e70 100644
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -149,7 +149,7 @@
     } else {
       // It must have been register promoted if it is not a temp but is still in physical
       // register. Since we need it to be in memory to convert, we place it there now.
-      StoreBaseDispWide(TargetReg(kSp), src_v_reg_offset, rl_src.reg);
+      StoreBaseDisp(TargetReg(kSp), src_v_reg_offset, rl_src.reg, k64);
     }
   }
 
@@ -183,7 +183,7 @@
     if (is_double) {
       rl_result = EvalLocWide(rl_dest, kFPReg, true);
 
-      LoadBaseDispWide(TargetReg(kSp), dest_v_reg_offset, rl_result.reg, INVALID_SREG);
+      LoadBaseDisp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg, k64);
 
       StoreFinalValueWide(rl_dest, rl_result);
     } else {
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 4446f43..ce5766f 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -142,8 +142,10 @@
     } else {
       if (src_fp) {
         NewLIR2(kX86MovdrxRR, r_dest.GetLowReg(), r_src.GetReg());
-        NewLIR2(kX86PsrlqRI, r_src.GetReg(), 32);
-        NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), r_src.GetReg());
+        RegStorage temp_reg = AllocTempDouble();
+        NewLIR2(kX86MovsdRR, temp_reg.GetReg(), r_src.GetReg());
+        NewLIR2(kX86PsrlqRI, temp_reg.GetReg(), 32);
+        NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), temp_reg.GetReg());
       } else {
         DCHECK(r_dest.IsPair());
         DCHECK(r_src.IsPair());
@@ -325,49 +327,60 @@
   int32_t val_lo = Low32Bits(val);
   int32_t val_hi = High32Bits(val);
   LIR* taken = &block_label_list_[bb->taken];
-  LIR* not_taken = &block_label_list_[bb->fall_through];
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+  bool is_equality_test = ccode == kCondEq || ccode == kCondNe;
+  if (is_equality_test && val != 0) {
+    rl_src1 = ForceTempWide(rl_src1);
+  }
   RegStorage low_reg = rl_src1.reg.GetLow();
   RegStorage high_reg = rl_src1.reg.GetHigh();
 
-  if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) {
-    RegStorage t_reg = AllocTemp();
-    OpRegRegReg(kOpOr, t_reg, low_reg, high_reg);
-    FreeTemp(t_reg);
-    OpCondBranch(ccode, taken);
-    return;
+  if (is_equality_test) {
+    // We can simpolify of comparing for ==, != to 0.
+    if (val == 0) {
+      if (IsTemp(low_reg)) {
+        OpRegReg(kOpOr, low_reg, high_reg);
+        // We have now changed it; ignore the old values.
+        Clobber(rl_src1.reg);
+      } else {
+        RegStorage t_reg = AllocTemp();
+        OpRegRegReg(kOpOr, t_reg, low_reg, high_reg);
+        FreeTemp(t_reg);
+      }
+      OpCondBranch(ccode, taken);
+      return;
+    }
+
+    // Need to compute the actual value for ==, !=.
+    OpRegImm(kOpSub, low_reg, val_lo);
+    NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi);
+    OpRegReg(kOpOr, high_reg, low_reg);
+    Clobber(rl_src1.reg);
+  } else if (ccode == kCondLe || ccode == kCondGt) {
+    // Swap operands and condition code to prevent use of zero flag.
+    RegStorage tmp = AllocTypedTempWide(false, kCoreReg);
+    LoadConstantWide(tmp, val);
+    OpRegReg(kOpSub, tmp.GetLow(), low_reg);
+    OpRegReg(kOpSbc, tmp.GetHigh(), high_reg);
+    ccode = (ccode == kCondLe) ? kCondGe : kCondLt;
+    FreeTemp(tmp);
+  } else {
+    // We can use a compare for the low word to set CF.
+    OpRegImm(kOpCmp, low_reg, val_lo);
+    if (IsTemp(high_reg)) {
+      NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi);
+      // We have now changed it; ignore the old values.
+      Clobber(rl_src1.reg);
+    } else {
+      // mov temp_reg, high_reg; sbb temp_reg, high_constant
+      RegStorage t_reg = AllocTemp();
+      OpRegCopy(t_reg, high_reg);
+      NewLIR2(kX86Sbb32RI, t_reg.GetReg(), val_hi);
+      FreeTemp(t_reg);
+    }
   }
 
-  OpRegImm(kOpCmp, high_reg, val_hi);
-  switch (ccode) {
-    case kCondEq:
-    case kCondNe:
-      OpCondBranch(kCondNe, (ccode == kCondEq) ? not_taken : taken);
-      break;
-    case kCondLt:
-      OpCondBranch(kCondLt, taken);
-      OpCondBranch(kCondGt, not_taken);
-      ccode = kCondUlt;
-      break;
-    case kCondLe:
-      OpCondBranch(kCondLt, taken);
-      OpCondBranch(kCondGt, not_taken);
-      ccode = kCondLs;
-      break;
-    case kCondGt:
-      OpCondBranch(kCondGt, taken);
-      OpCondBranch(kCondLt, not_taken);
-      ccode = kCondHi;
-      break;
-    case kCondGe:
-      OpCondBranch(kCondGt, taken);
-      OpCondBranch(kCondLt, not_taken);
-      ccode = kCondUge;
-      break;
-    default:
-      LOG(FATAL) << "Unexpected ccode: " << ccode;
-  }
-  OpCmpImmBranch(ccode, low_reg, val_lo, taken);
+  OpCondBranch(ccode, taken);
 }
 
 void X86Mir2Lir::CalculateMagicAndShift(int divisor, int& magic, int& shift) {
@@ -677,14 +690,12 @@
   RegLocation rl_dest = size == k64 ? InlineTargetWide(info) : InlineTarget(info);
   RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  // Unaligned access is allowed on x86.
+  LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size);
   if (size == k64) {
-    // Unaligned access is allowed on x86.
-    LoadBaseDispWide(rl_address.reg, 0, rl_result.reg, INVALID_SREG);
     StoreValueWide(rl_dest, rl_result);
   } else {
     DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
-    // Unaligned access is allowed on x86.
-    LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, INVALID_SREG);
     StoreValue(rl_dest, rl_result);
   }
   return true;
@@ -698,7 +709,7 @@
   if (size == k64) {
     // Unaligned access is allowed on x86.
     RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg);
-    StoreBaseDispWide(rl_address.reg, 0, rl_value.reg);
+    StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size);
   } else {
     DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
     // Unaligned access is allowed on x86.
@@ -996,7 +1007,7 @@
       NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg());
       break;
     case 1:
-      LoadBaseDisp(rs_rX86_SP, displacement, dest, k32, sreg);
+      LoadBaseDisp(rs_rX86_SP, displacement, dest, k32);
       break;
     default:
       m = NewLIR4(IS_SIMM8(val) ? kX86Imul32RMI8 : kX86Imul32RMI, dest.GetReg(),
@@ -1100,8 +1111,7 @@
   if (src1_in_reg) {
     NewLIR2(kX86Mov32RR, rs_r1.GetReg(), rl_src1.reg.GetHighReg());
   } else {
-    LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, rs_r1,
-                 k32, GetSRegHi(rl_src1.s_reg_low));
+    LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, rs_r1, k32);
   }
 
   if (is_square) {
@@ -1124,8 +1134,7 @@
     if (src2_in_reg) {
       NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetHighReg());
     } else {
-      LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, rs_r0,
-                   k32, GetSRegHi(rl_src2.s_reg_low));
+      LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, rs_r0, k32);
     }
 
     // EAX <- EAX * 1L  (2H * 1L)
@@ -1158,8 +1167,7 @@
   if (src2_in_reg) {
     NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetLowReg());
   } else {
-    LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, rs_r0,
-                 k32, rl_src2.s_reg_low);
+    LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, rs_r0, k32);
   }
 
   // EDX:EAX <- 2L * 1L (double precision)
@@ -1408,8 +1416,7 @@
     }
   }
   rl_result = EvalLoc(rl_dest, reg_class, true);
-  LoadBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_result.reg, size,
-                      INVALID_SREG);
+  LoadBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_result.reg, size);
   if ((size == k64) || (size == kDouble)) {
     StoreValueWide(rl_dest, rl_result);
   } else {
@@ -1466,10 +1473,9 @@
       rl_src.reg.GetRegNum() >= rs_rX86_SP.GetRegNum()) {
     RegStorage temp = AllocTemp();
     OpRegCopy(temp, rl_src.reg);
-    StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, temp, size, INVALID_SREG);
+    StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, temp, size);
   } else {
-    StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_src.reg, size,
-                         INVALID_SREG);
+    StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_src.reg, size);
   }
   if (card_mark) {
     // Free rl_index if its a temp. Ensures there are 2 free regs for card mark.
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index da6ded5..8423ec4 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -520,7 +520,7 @@
         // 4 byte offset.  We will fix this up in the assembler later to have the right
         // value.
         res = LoadBaseDisp(rl_method.reg, 256 /* bogus */, RegStorage::Solo64(low_reg_val),
-                           kDouble, INVALID_SREG);
+                           kDouble);
         res->target = data_target;
         res->flags.fixup = kFixupLoad;
         SetMemRefType(res, true, kLiteral);
@@ -546,7 +546,7 @@
 }
 
 LIR* X86Mir2Lir::LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
-                                     int displacement, RegStorage r_dest, OpSize size, int s_reg) {
+                                     int displacement, RegStorage r_dest, OpSize size) {
   LIR *load = NULL;
   LIR *load2 = NULL;
   bool is_array = r_index.Valid();
@@ -663,26 +663,21 @@
 /* Load value from base + scaled index. */
 LIR* X86Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest,
                                  int scale, OpSize size) {
-  return LoadBaseIndexedDisp(r_base, r_index, scale, 0, r_dest, size, INVALID_SREG);
+  return LoadBaseIndexedDisp(r_base, r_index, scale, 0, r_dest, size);
 }
 
 LIR* X86Mir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
-                              OpSize size, int s_reg) {
+                              OpSize size) {
   // TODO: base this on target.
   if (size == kWord) {
     size = k32;
   }
   return LoadBaseIndexedDisp(r_base, RegStorage::InvalidReg(), 0, displacement, r_dest,
-                             size, s_reg);
-}
-
-LIR* X86Mir2Lir::LoadBaseDispWide(RegStorage r_base, int displacement, RegStorage r_dest,
-                                  int s_reg) {
-  return LoadBaseIndexedDisp(r_base, RegStorage::InvalidReg(), 0, displacement, r_dest, k64, s_reg);
+                             size);
 }
 
 LIR* X86Mir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
-                                      int displacement, RegStorage r_src, OpSize size, int s_reg) {
+                                      int displacement, RegStorage r_src, OpSize size) {
   LIR *store = NULL;
   LIR *store2 = NULL;
   bool is_array = r_index.Valid();
@@ -757,7 +752,7 @@
 /* store value base base + scaled index. */
 LIR* X86Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
                       int scale, OpSize size) {
-  return StoreBaseIndexedDisp(r_base, r_index, scale, 0, r_src, size, INVALID_SREG);
+  return StoreBaseIndexedDisp(r_base, r_index, scale, 0, r_src, size);
 }
 
 LIR* X86Mir2Lir::StoreBaseDisp(RegStorage r_base, int displacement,
@@ -766,13 +761,7 @@
   if (size == kWord) {
     size = k32;
   }
-  return StoreBaseIndexedDisp(r_base, RegStorage::InvalidReg(), 0, displacement, r_src, size,
-                              INVALID_SREG);
-}
-
-LIR* X86Mir2Lir::StoreBaseDispWide(RegStorage r_base, int displacement, RegStorage r_src) {
-  return StoreBaseIndexedDisp(r_base, RegStorage::InvalidReg(), 0, displacement,
-                              r_src, k64, INVALID_SREG);
+  return StoreBaseIndexedDisp(r_base, RegStorage::InvalidReg(), 0, displacement, r_src, size);
 }
 
 LIR* X86Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 9bf49c3..77d716f 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -343,6 +343,10 @@
   // RRC - Register Register ConditionCode - cond_opcode reg1, reg2
   //             - lir operands - 0: reg1, 1: reg2, 2: CC
   kX86Cmov32RRC,
+  // RMC - Register Memory ConditionCode - cond_opcode reg1, [base + disp]
+  //             - lir operands - 0: reg1, 1: base, 2: disp 3: CC
+  kX86Cmov32RMC,
+
   // RC - Register CL - opcode reg, CL
   //          - lir operands - 0: reg, 1: CL
   // MC - Memory CL   - opcode [base + disp], CL
@@ -366,7 +370,9 @@
 #undef BinaryShiftOpcode
   kX86Cmc,
   kX86Shld32RRI,
+  kX86Shld32MRI,
   kX86Shrd32RRI,
+  kX86Shrd32MRI,
 #define UnaryOpcode(opcode, reg, mem, array) \
   opcode ## 8 ## reg, opcode ## 8 ## mem, opcode ## 8 ## array, \
   opcode ## 16 ## reg, opcode ## 16 ## mem, opcode ## 16 ## array, \
@@ -481,11 +487,13 @@
   kRegRegImm, kRegMemImm, kRegArrayImm,    // RRI, RMI and RAI instruction kinds.
   kMovRegImm,                              // Shorter form move RI.
   kRegRegImmRev,                           // RRI with first reg in r/m
+  kMemRegImm,                              // MRI instruction kinds.
   kShiftRegImm, kShiftMemImm, kShiftArrayImm,  // Shift opcode with immediate.
   kShiftRegCl, kShiftMemCl, kShiftArrayCl,     // Shift opcode with register CL.
   kRegRegReg, kRegRegMem, kRegRegArray,    // RRR, RRM, RRA instruction kinds.
   kRegCond, kMemCond, kArrayCond,          // R, M, A instruction kinds following by a condition.
   kRegRegCond,                             // RR instruction kind followed by a condition.
+  kRegMemCond,                             // RM instruction kind followed by a condition.
   kJmp, kJcc, kCall,                       // Branch instruction kinds.
   kPcRel,                                  // Operation with displacement that is PC relative
   kMacro,                                  // An instruction composing multiple others
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index e5decc5..6817f14 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -336,7 +336,7 @@
     : profile_ok_(false), compiler_options_(compiler_options),
       verification_results_(verification_results),
       method_inliner_map_(method_inliner_map),
-      compiler_(Compiler::Create(compiler_kind)),
+      compiler_(Compiler::Create(this, compiler_kind)),
       instruction_set_(instruction_set),
       instruction_set_features_(instruction_set_features),
       freezing_constructor_lock_("freezing constructor lock"),
@@ -374,7 +374,7 @@
 
   dex_to_dex_compiler_ = reinterpret_cast<DexToDexCompilerFn>(ArtCompileDEX);
 
-  compiler_->Init(*this);
+  compiler_->Init();
 
   CHECK(!Runtime::Current()->IsStarted());
   if (!image_) {
@@ -433,7 +433,7 @@
     STLDeleteElements(&classes_to_patch_);
   }
   CHECK_PTHREAD_CALL(pthread_key_delete, (tls_key_), "delete tls key");
-  compiler_->UnInit(*this);
+  compiler_->UnInit();
 }
 
 CompilerTls* CompilerDriver::GetTls() {
@@ -1874,7 +1874,7 @@
         (instruction_set_ == kX86_64 || instruction_set_ == kArm64)) {
       // Leaving this empty will trigger the generic JNI version
     } else {
-      compiled_method = compiler_->JniCompile(*this, access_flags, method_idx, dex_file);
+      compiled_method = compiler_->JniCompile(access_flags, method_idx, dex_file);
       CHECK(compiled_method != NULL);
     }
   } else if ((access_flags & kAccAbstract) != 0) {
@@ -1883,9 +1883,8 @@
     bool compile = verification_results_->IsCandidateForCompilation(method_ref, access_flags);
     if (compile) {
       // NOTE: if compiler declines to compile this method, it will return NULL.
-      compiled_method = compiler_->Compile(
-          *this, code_item, access_flags, invoke_type, class_def_idx,
-          method_idx, class_loader, dex_file);
+      compiled_method = compiler_->Compile(code_item, access_flags, invoke_type, class_def_idx,
+                                           method_idx, class_loader, dex_file);
     }
     if (compiled_method == nullptr && dex_to_dex_compilation_level != kDontDexToDexCompile) {
       // TODO: add a command-line option to disable DEX-to-DEX compilation ?
@@ -1983,7 +1982,7 @@
                               OatWriter* oat_writer,
                               art::File* file)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  return compiler_->WriteElf(file, oat_writer, dex_files, android_root, is_host, *this);
+  return compiler_->WriteElf(file, oat_writer, dex_files, android_root, is_host);
 }
 void CompilerDriver::InstructionSetToLLVMTarget(InstructionSet instruction_set,
                                                 std::string* target_triple,
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 9f439eb..5a22170 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -52,7 +52,7 @@
 //   registers, a reference to the method object is supplied as part of this
 //   convention.
 //
-CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver& compiler,
+CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver,
                                             uint32_t access_flags, uint32_t method_idx,
                                             const DexFile& dex_file) {
   const bool is_native = (access_flags & kAccNative) != 0;
@@ -60,7 +60,7 @@
   const bool is_static = (access_flags & kAccStatic) != 0;
   const bool is_synchronized = (access_flags & kAccSynchronized) != 0;
   const char* shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx));
-  InstructionSet instruction_set = compiler.GetInstructionSet();
+  InstructionSet instruction_set = driver->GetInstructionSet();
   if (instruction_set == kThumb2) {
     instruction_set = kArm;
   }
@@ -423,7 +423,7 @@
   std::vector<uint8_t> managed_code(cs);
   MemoryRegion code(&managed_code[0], managed_code.size());
   __ FinalizeInstructions(code);
-  return new CompiledMethod(compiler,
+  return new CompiledMethod(driver,
                             instruction_set,
                             managed_code,
                             frame_size,
@@ -536,7 +536,7 @@
 
 }  // namespace art
 
-extern "C" art::CompiledMethod* ArtQuickJniCompileMethod(art::CompilerDriver& compiler,
+extern "C" art::CompiledMethod* ArtQuickJniCompileMethod(art::CompilerDriver* compiler,
                                                          uint32_t access_flags, uint32_t method_idx,
                                                          const art::DexFile& dex_file) {
   return ArtJniCompileMethodInternal(compiler, access_flags, method_idx, dex_file);
diff --git a/compiler/llvm/compiler_llvm.cc b/compiler/llvm/compiler_llvm.cc
index 2812700..df895ee 100644
--- a/compiler/llvm/compiler_llvm.cc
+++ b/compiler/llvm/compiler_llvm.cc
@@ -175,8 +175,8 @@
 }  // namespace llvm
 }  // namespace art
 
-static art::llvm::CompilerLLVM* ContextOf(art::CompilerDriver& driver) {
-  void *compiler_context = driver.GetCompilerContext();
+static art::llvm::CompilerLLVM* ContextOf(art::CompilerDriver* driver) {
+  void *compiler_context = driver->GetCompilerContext();
   CHECK(compiler_context != NULL);
   return reinterpret_cast<art::llvm::CompilerLLVM*>(compiler_context);
 }
@@ -187,20 +187,20 @@
   return reinterpret_cast<art::llvm::CompilerLLVM*>(compiler_context);
 }
 
-extern "C" void ArtInitCompilerContext(art::CompilerDriver& driver) {
-  CHECK(driver.GetCompilerContext() == NULL);
+extern "C" void ArtInitCompilerContext(art::CompilerDriver* driver) {
+  CHECK(driver->GetCompilerContext() == nullptr);
 
-  art::llvm::CompilerLLVM* compiler_llvm = new art::llvm::CompilerLLVM(&driver,
-                                                                       driver.GetInstructionSet());
+  art::llvm::CompilerLLVM* compiler_llvm = new art::llvm::CompilerLLVM(driver,
+                                                                       driver->GetInstructionSet());
 
-  driver.SetCompilerContext(compiler_llvm);
+  driver->SetCompilerContext(compiler_llvm);
 }
 
-extern "C" void ArtUnInitCompilerContext(art::CompilerDriver& driver) {
+extern "C" void ArtUnInitCompilerContext(art::CompilerDriver* driver) {
   delete ContextOf(driver);
-  driver.SetCompilerContext(NULL);
+  driver->SetCompilerContext(nullptr);
 }
-extern "C" art::CompiledMethod* ArtCompileMethod(art::CompilerDriver& driver,
+extern "C" art::CompiledMethod* ArtCompileMethod(art::CompilerDriver* driver,
                                                  const art::DexFile::CodeItem* code_item,
                                                  uint32_t access_flags,
                                                  art::InvokeType invoke_type,
@@ -213,13 +213,13 @@
 
   art::DexCompilationUnit dex_compilation_unit(
     NULL, class_loader, class_linker, dex_file, code_item,
-    class_def_idx, method_idx, access_flags, driver.GetVerifiedMethod(&dex_file, method_idx));
+    class_def_idx, method_idx, access_flags, driver->GetVerifiedMethod(&dex_file, method_idx));
   art::llvm::CompilerLLVM* compiler_llvm = ContextOf(driver);
   art::CompiledMethod* result = compiler_llvm->CompileDexMethod(&dex_compilation_unit, invoke_type);
   return result;
 }
 
-extern "C" art::CompiledMethod* ArtLLVMJniCompileMethod(art::CompilerDriver& driver,
+extern "C" art::CompiledMethod* ArtLLVMJniCompileMethod(art::CompilerDriver* driver,
                                                         uint32_t access_flags, uint32_t method_idx,
                                                         const art::DexFile& dex_file) {
   art::ClassLinker *class_linker = art::Runtime::Current()->GetClassLinker();
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 8b85d71..bbebd3a 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -30,12 +30,12 @@
 namespace art {
 
 void CodeGenerator::Compile(CodeAllocator* allocator) {
-  const GrowableArray<HBasicBlock*>* blocks = GetGraph()->GetBlocks();
-  DCHECK(blocks->Get(0) == GetGraph()->GetEntryBlock());
-  DCHECK(GoesToNextBlock(GetGraph()->GetEntryBlock(), blocks->Get(1)));
+  const GrowableArray<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
+  DCHECK(blocks.Get(0) == GetGraph()->GetEntryBlock());
+  DCHECK(GoesToNextBlock(GetGraph()->GetEntryBlock(), blocks.Get(1)));
   GenerateFrameEntry();
-  for (size_t i = 0; i < blocks->Size(); i++) {
-    CompileBlock(blocks->Get(i));
+  for (size_t i = 0, e = blocks.Size(); i < e; ++i) {
+    CompileBlock(blocks.Get(i));
   }
   size_t code_size = GetAssembler()->CodeSize();
   uint8_t* buffer = allocator->Allocate(code_size);
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 74cbccc..aafd801 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -354,7 +354,7 @@
         pc_infos_(graph->GetArena(), 32),
         blocked_registers_(static_cast<bool*>(
             graph->GetArena()->Alloc(number_of_registers * sizeof(bool), kArenaAllocData))) {
-    block_labels_.SetSize(graph->GetBlocks()->Size());
+    block_labels_.SetSize(graph->GetBlocks().Size());
   }
   ~CodeGenerator() { }
 
diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc
index 1c30b79..0417050 100644
--- a/compiler/optimizing/dominator_test.cc
+++ b/compiler/optimizing/dominator_test.cc
@@ -32,13 +32,13 @@
   HGraph* graph = builder.BuildGraph(*item);
   ASSERT_NE(graph, nullptr);
   graph->BuildDominatorTree();
-  ASSERT_EQ(graph->GetBlocks()->Size(), blocks_length);
-  for (size_t i = 0; i < blocks_length; i++) {
+  ASSERT_EQ(graph->GetBlocks().Size(), blocks_length);
+  for (size_t i = 0, e = blocks_length; i < e; ++i) {
     if (blocks[i] == -1) {
-      ASSERT_EQ(nullptr, graph->GetBlocks()->Get(i)->GetDominator());
+      ASSERT_EQ(nullptr, graph->GetBlocks().Get(i)->GetDominator());
     } else {
-      ASSERT_NE(nullptr, graph->GetBlocks()->Get(i)->GetDominator());
-      ASSERT_EQ(blocks[i], graph->GetBlocks()->Get(i)->GetDominator()->GetBlockId());
+      ASSERT_NE(nullptr, graph->GetBlocks().Get(i)->GetDominator());
+      ASSERT_EQ(blocks[i], graph->GetBlocks().Get(i)->GetDominator()->GetBlockId());
     }
   }
 }
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
new file mode 100644
index 0000000..aa4d35e
--- /dev/null
+++ b/compiler/optimizing/liveness_test.cc
@@ -0,0 +1,515 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "builder.h"
+#include "dex_file.h"
+#include "dex_instruction.h"
+#include "nodes.h"
+#include "optimizing_unit_test.h"
+#include "ssa_liveness_analysis.h"
+#include "utils/arena_allocator.h"
+
+#include "gtest/gtest.h"
+
+namespace art {
+
+static void TestCode(const uint16_t* data, const char* expected) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+  HGraphBuilder builder(&allocator);
+  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
+  HGraph* graph = builder.BuildGraph(*item);
+  ASSERT_NE(graph, nullptr);
+  graph->BuildDominatorTree();
+  graph->TransformToSSA();
+  SsaLivenessAnalysis liveness(*graph);
+  liveness.Analyze();
+
+  std::ostringstream buffer;
+  for (HInsertionOrderIterator it(*graph); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    buffer << "Block " << block->GetBlockId() << std::endl;
+    BitVector* live_in = liveness.GetLiveInSet(*block);
+    live_in->Dump(buffer, "  live in: ");
+    BitVector* live_out = liveness.GetLiveOutSet(*block);
+    live_out->Dump(buffer, "  live out: ");
+    BitVector* kill = liveness.GetKillSet(*block);
+    kill->Dump(buffer, "  kill: ");
+  }
+  ASSERT_STREQ(expected, buffer.str().c_str());
+}
+
+TEST(LivenessTest, CFG1) {
+  const char* expected =
+    "Block 0\n"
+    "  live in: ()\n"
+    "  live out: ()\n"
+    "  kill: ()\n"
+    "Block 1\n"
+    "  live in: ()\n"
+    "  live out: ()\n"
+    "  kill: ()\n"
+    "Block 2\n"
+    "  live in: ()\n"
+    "  live out: ()\n"
+    "  kill: ()\n";
+
+  // Constant is not used.
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::RETURN_VOID);
+
+  TestCode(data, expected);
+}
+
+TEST(LivenessTest, CFG2) {
+  const char* expected =
+    "Block 0\n"
+    "  live in: (0)\n"
+    "  live out: (1)\n"
+    "  kill: (1)\n"
+    "Block 1\n"
+    "  live in: (1)\n"
+    "  live out: (0)\n"
+    "  kill: (0)\n"
+    "Block 2\n"
+    "  live in: (0)\n"
+    "  live out: (0)\n"
+    "  kill: (0)\n";
+
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::RETURN);
+
+  TestCode(data, expected);
+}
+
+TEST(LivenessTest, CFG3) {
+  const char* expected =
+    "Block 0\n"  // entry block
+    "  live in: (000)\n"
+    "  live out: (110)\n"
+    "  kill: (110)\n"
+    "Block 1\n"  // block with add
+    "  live in: (110)\n"
+    "  live out: (001)\n"
+    "  kill: (001)\n"
+    "Block 2\n"  // block with return
+    "  live in: (001)\n"
+    "  live out: (000)\n"
+    "  kill: (000)\n"
+    "Block 3\n"  // exit block
+    "  live in: (000)\n"
+    "  live out: (000)\n"
+    "  kill: (000)\n";
+
+  const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+    Instruction::CONST_4 | 3 << 12 | 0,
+    Instruction::CONST_4 | 4 << 12 | 1 << 8,
+    Instruction::ADD_INT_2ADDR | 1 << 12,
+    Instruction::GOTO | 0x100,
+    Instruction::RETURN);
+
+  TestCode(data, expected);
+}
+
+TEST(LivenessTest, CFG4) {
+  // var a;
+  // if (0 == 0) {
+  //   a = 5;
+  // } else {
+  //   a = 4;
+  // }
+  // return a;
+  //
+  // Bitsets are made of:
+  // (constant0, constant4, constant5, phi, equal test)
+  const char* expected =
+    "Block 0\n"  // entry block
+    "  live in: (00000)\n"
+    "  live out: (11100)\n"
+    "  kill: (11100)\n"
+    "Block 1\n"  // block with if
+    "  live in: (11100)\n"
+    "  live out: (01100)\n"
+    "  kill: (00010)\n"
+    "Block 2\n"  // else block
+    "  live in: (01000)\n"
+    "  live out: (00000)\n"
+    "  kill: (00000)\n"
+    "Block 3\n"  // then block
+    "  live in: (00100)\n"
+    "  live out: (00000)\n"
+    "  kill: (00000)\n"
+    "Block 4\n"  // return block
+    "  live in: (00000)\n"
+    "  live out: (00000)\n"
+    "  kill: (00001)\n"
+    "Block 5\n"  // exit block
+    "  live in: (00000)\n"
+    "  live out: (00000)\n"
+    "  kill: (00000)\n";
+
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 4,
+    Instruction::CONST_4 | 4 << 12 | 0,
+    Instruction::GOTO | 0x200,
+    Instruction::CONST_4 | 5 << 12 | 0,
+    Instruction::RETURN | 0 << 8);
+
+  TestCode(data, expected);
+}
+
+TEST(LivenessTest, CFG5) {
+  // var a = 0;
+  // if (0 == 0) {
+  // } else {
+  //   a = 4;
+  // }
+  // return a;
+  const char* expected =
+    "Block 0\n"  // entry block
+    "  live in: (0000)\n"
+    "  live out: (1100)\n"
+    "  kill: (1100)\n"
+    "Block 1\n"  // block with if
+    "  live in: (1100)\n"
+    "  live out: (0100)\n"
+    "  kill: (0010)\n"
+    "Block 2\n"  // else block
+    "  live in: (0100)\n"
+    "  live out: (0000)\n"
+    "  kill: (0000)\n"
+    "Block 3\n"  // return block
+    "  live in: (0000)\n"
+    "  live out: (0000)\n"
+    "  kill: (0001)\n"
+    "Block 4\n"  // exit block
+    "  live in: (0000)\n"
+    "  live out: (0000)\n"
+    "  kill: (0000)\n";
+
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 3,
+    Instruction::CONST_4 | 4 << 12 | 0,
+    Instruction::RETURN | 0 << 8);
+
+  TestCode(data, expected);
+}
+
+TEST(LivenessTest, Loop1) {
+  // Simple loop with one preheader and one back edge.
+  // var a = 0;
+  // while (a == a) {
+  //   a = 4;
+  // }
+  // return;
+  const char* expected =
+    "Block 0\n"  // entry block
+    "  live in: (0000)\n"
+    "  live out: (1100)\n"
+    "  kill: (1100)\n"
+    "Block 1\n"  // pre header
+    "  live in: (1100)\n"
+    "  live out: (0100)\n"
+    "  kill: (0000)\n"
+    "Block 2\n"  // loop header
+    "  live in: (0100)\n"
+    "  live out: (0100)\n"
+    "  kill: (0011)\n"
+    "Block 3\n"  // back edge
+    "  live in: (0100)\n"
+    "  live out: (0100)\n"
+    "  kill: (0000)\n"
+    "Block 4\n"  // return block
+    "  live in: (0000)\n"
+    "  live out: (0000)\n"
+    "  kill: (0000)\n"
+    "Block 5\n"  // exit block
+    "  live in: (0000)\n"
+    "  live out: (0000)\n"
+    "  kill: (0000)\n";
+
+
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 4,
+    Instruction::CONST_4 | 4 << 12 | 0,
+    Instruction::GOTO | 0xFD00,
+    Instruction::RETURN_VOID);
+
+  TestCode(data, expected);
+}
+
+TEST(LivenessTest, Loop3) {
+  // Test that the returned value stays live in a preceding loop.
+  // var a = 0;
+  // while (a == a) {
+  //   a = 4;
+  // }
+  // return 5;
+  const char* expected =
+    "Block 0\n"
+    "  live in: (00000)\n"
+    "  live out: (11100)\n"
+    "  kill: (11100)\n"
+    "Block 1\n"
+    "  live in: (11100)\n"
+    "  live out: (01100)\n"
+    "  kill: (00000)\n"
+    "Block 2\n"  // loop header
+    "  live in: (01100)\n"
+    "  live out: (01100)\n"
+    "  kill: (00011)\n"
+    "Block 3\n"  // back edge
+    "  live in: (01100)\n"
+    "  live out: (01100)\n"
+    "  kill: (00000)\n"
+    "Block 4\n"  // return block
+    "  live in: (00100)\n"
+    "  live out: (00000)\n"
+    "  kill: (00000)\n"
+    "Block 5\n"  // exit block
+    "  live in: (00000)\n"
+    "  live out: (00000)\n"
+    "  kill: (00000)\n";
+
+  const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 4,
+    Instruction::CONST_4 | 4 << 12 | 0,
+    Instruction::GOTO | 0xFD00,
+    Instruction::CONST_4 | 5 << 12 | 1 << 8,
+    Instruction::RETURN | 1 << 8);
+
+  TestCode(data, expected);
+}
+
+
+TEST(LivenessTest, Loop4) {
+  // Make sure we support a preheader of a loop not being the first predecessor
+  // in the predecessor list of the header.
+  // var a = 0;
+  // while (a == a) {
+  //   a = 4;
+  // }
+  // return a;
+  // Bitsets are made of:
+  // (constant0, constant4, phi, equal test)
+  const char* expected =
+    "Block 0\n"
+    "  live in: (0000)\n"
+    "  live out: (1100)\n"
+    "  kill: (1100)\n"
+    "Block 1\n"
+    "  live in: (1100)\n"
+    "  live out: (1100)\n"
+    "  kill: (0000)\n"
+    "Block 2\n"  // loop header
+    "  live in: (0100)\n"
+    "  live out: (0110)\n"
+    "  kill: (0011)\n"
+    "Block 3\n"  // back edge
+    "  live in: (0100)\n"
+    "  live out: (0100)\n"
+    "  kill: (0000)\n"
+    "Block 4\n"  // pre loop header
+    "  live in: (1100)\n"
+    "  live out: (0100)\n"
+    "  kill: (0000)\n"
+    "Block 5\n"  // return block
+    "  live in: (0010)\n"
+    "  live out: (0000)\n"
+    "  kill: (0000)\n"
+    "Block 6\n"  // exit block
+    "  live in: (0000)\n"
+    "  live out: (0000)\n"
+    "  kill: (0000)\n";
+
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::GOTO | 0x500,
+    Instruction::IF_EQ, 5,
+    Instruction::CONST_4 | 4 << 12 | 0,
+    Instruction::GOTO | 0xFD00,
+    Instruction::GOTO | 0xFC00,
+    Instruction::RETURN | 0 << 8);
+
+  TestCode(data, expected);
+}
+
+TEST(LivenessTest, Loop5) {
+  // Make sure we create a preheader of a loop when a header originally has two
+  // incoming blocks and one back edge.
+  // Bitsets are made of:
+  // (constant0, constant4, constant5, equal in block 1, phi in block 8, phi in block 4,
+  //  equal in block 4)
+  const char* expected =
+    "Block 0\n"
+    "  live in: (0000000)\n"
+    "  live out: (1110000)\n"
+    "  kill: (1110000)\n"
+    "Block 1\n"
+    "  live in: (1110000)\n"
+    "  live out: (0110000)\n"
+    "  kill: (0001000)\n"
+    "Block 2\n"
+    "  live in: (0100000)\n"
+    "  live out: (0000000)\n"
+    "  kill: (0000000)\n"
+    "Block 3\n"
+    "  live in: (0010000)\n"
+    "  live out: (0000000)\n"
+    "  kill: (0000000)\n"
+    "Block 4\n"  // loop header
+    "  live in: (0000000)\n"
+    "  live out: (0000010)\n"
+    "  kill: (0000011)\n"
+    "Block 5\n"  // back edge
+    "  live in: (0000010)\n"
+    "  live out: (0000000)\n"
+    "  kill: (0000000)\n"
+    "Block 6\n"  // return block
+    "  live in: (0000010)\n"
+    "  live out: (0000000)\n"
+    "  kill: (0000000)\n"
+    "Block 7\n"  // exit block
+    "  live in: (0000000)\n"
+    "  live out: (0000000)\n"
+    "  kill: (0000000)\n"
+    "Block 8\n"  // synthesized pre header
+    "  live in: (0000000)\n"
+    "  live out: (0000000)\n"
+    "  kill: (0000100)\n";
+
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 4,
+    Instruction::CONST_4 | 4 << 12 | 0,
+    Instruction::GOTO | 0x200,
+    Instruction::CONST_4 | 5 << 12 | 0,
+    Instruction::IF_EQ, 3,
+    Instruction::GOTO | 0xFE00,
+    Instruction::RETURN | 0 << 8);
+
+  TestCode(data, expected);
+}
+
+TEST(LivenessTest, Loop6) {
+  // Bitsets are made of:
+  // (constant0, constant4, constant5, phi in block 2, equal in block 2, equal in block 3)
+  const char* expected =
+    "Block 0\n"
+    "  live in: (000000)\n"
+    "  live out: (111000)\n"
+    "  kill: (111000)\n"
+    "Block 1\n"
+    "  live in: (111000)\n"
+    "  live out: (011000)\n"
+    "  kill: (000000)\n"
+    "Block 2\n"  // loop header
+    "  live in: (011000)\n"
+    "  live out: (011100)\n"
+    "  kill: (000110)\n"
+    "Block 3\n"
+    "  live in: (011000)\n"
+    "  live out: (011000)\n"
+    "  kill: (000001)\n"
+    "Block 4\n"  // back edge
+    "  live in: (011000)\n"
+    "  live out: (011000)\n"
+    "  kill: (000000)\n"
+    "Block 5\n"  // back edge
+    "  live in: (011000)\n"
+    "  live out: (011000)\n"
+    "  kill: (000000)\n"
+    "Block 6\n"  // return block
+    "  live in: (000100)\n"
+    "  live out: (000000)\n"
+    "  kill: (000000)\n"
+    "Block 7\n"  // exit block
+    "  live in: (000000)\n"
+    "  live out: (000000)\n"
+    "  kill: (000000)\n";
+
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 8,
+    Instruction::CONST_4 | 4 << 12 | 0,
+    Instruction::IF_EQ, 4,
+    Instruction::CONST_4 | 5 << 12 | 0,
+    Instruction::GOTO | 0xFA00,
+    Instruction::GOTO | 0xF900,
+    Instruction::RETURN | 0 << 8);
+
+  TestCode(data, expected);
+}
+
+
+TEST(LivenessTest, Loop7) {
+  // Bitsets are made of:
+  // (constant0, constant4, constant5, phi in block 2, equal in block 2, equal in block 3,
+  //  phi in block 6)
+  const char* expected =
+    "Block 0\n"
+    "  live in: (0000000)\n"
+    "  live out: (1110000)\n"
+    "  kill: (1110000)\n"
+    "Block 1\n"
+    "  live in: (1110000)\n"
+    "  live out: (0110000)\n"
+    "  kill: (0000000)\n"
+    "Block 2\n"  // loop header
+    "  live in: (0110000)\n"
+    "  live out: (0110000)\n"
+    "  kill: (0001100)\n"
+    "Block 3\n"
+    "  live in: (0110000)\n"
+    "  live out: (0110000)\n"
+    "  kill: (0000010)\n"
+    "Block 4\n"  // loop exit
+    "  live in: (0010000)\n"
+    "  live out: (0000000)\n"
+    "  kill: (0000000)\n"
+    "Block 5\n"  // back edge
+    "  live in: (0110000)\n"
+    "  live out: (0110000)\n"
+    "  kill: (0000000)\n"
+    "Block 6\n"  // return block
+    "  live in: (0000000)\n"
+    "  live out: (0000000)\n"
+    "  kill: (0000001)\n"
+    "Block 7\n"  // exit block
+    "  live in: (0000000)\n"
+    "  live out: (0000000)\n"
+    "  kill: (0000000)\n";
+
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 8,
+    Instruction::CONST_4 | 4 << 12 | 0,
+    Instruction::IF_EQ, 4,
+    Instruction::CONST_4 | 5 << 12 | 0,
+    Instruction::GOTO | 0x0200,
+    Instruction::GOTO | 0xF900,
+    Instruction::RETURN | 0 << 8);
+
+  TestCode(data, expected);
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 3d6aeb7..d153bf7 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -25,7 +25,7 @@
   blocks_.Add(block);
 }
 
-void HGraph::FindBackEdges(ArenaBitVector* visited) const {
+void HGraph::FindBackEdges(ArenaBitVector* visited) {
   ArenaBitVector visiting(arena_, blocks_.Size(), false);
   VisitBlockForBackEdges(entry_block_, visited, &visiting);
 }
@@ -49,7 +49,7 @@
 
 void HGraph::VisitBlockForBackEdges(HBasicBlock* block,
                                     ArenaBitVector* visited,
-                                    ArenaBitVector* visiting) const {
+                                    ArenaBitVector* visiting) {
   int id = block->GetBlockId();
   if (visited->IsBitSet(id)) return;
 
@@ -63,6 +63,7 @@
       VisitBlockForBackEdges(successor, visited, visiting);
     }
   }
+  post_order_.Add(block);
   visiting->ClearBit(id);
 }
 
@@ -82,7 +83,6 @@
   //     have been processed.
   GrowableArray<size_t> visits(arena_, blocks_.Size());
   visits.SetSize(blocks_.Size());
-  dominator_order_.Add(entry_block_);
   for (size_t i = 0; i < entry_block_->GetSuccessors()->Size(); i++) {
     VisitBlockForDominatorTree(entry_block_->GetSuccessors()->Get(i), entry_block_, &visits);
   }
@@ -120,7 +120,6 @@
   // dominator of the block. We can then start visiting its successors.
   if (visits->Get(block->GetBlockId()) ==
       block->GetPredecessors()->Size() - block->NumberOfBackEdges()) {
-    dominator_order_.Add(block);
     for (size_t i = 0; i < block->GetSuccessors()->Size(); i++) {
       VisitBlockForDominatorTree(block->GetSuccessors()->Get(i), block, visits);
     }
@@ -128,15 +127,15 @@
 }
 
 void HGraph::TransformToSSA() {
-  DCHECK(!dominator_order_.IsEmpty());
+  DCHECK(!post_order_.IsEmpty());
   SimplifyCFG();
   SsaBuilder ssa_builder(this);
   ssa_builder.BuildSsa();
 }
 
 void HGraph::SimplifyCFG() {
-  for (size_t i = 0; i < dominator_order_.Size(); i++) {
-    HBasicBlock* current = dominator_order_.Get(i);
+  for (size_t i = post_order_.Size(); i > 0; --i) {
+    HBasicBlock* current = post_order_.Get(i - 1);
     if (current->IsLoopHeader()) {
       // Make sure the loop has only one pre header. This simplifies SSA building by having
       // to just look at the pre header to know which locals are initialized at entry of the
@@ -149,10 +148,9 @@
         pre_header->AddInstruction(new (arena_) HGoto());
         pre_header->SetDominator(current->GetDominator());
         current->SetDominator(pre_header);
-        dominator_order_.InsertAt(i, pre_header);
-        i++;
+        post_order_.InsertAt(i, pre_header);
 
-        ArenaBitVector back_edges(arena_, GetBlocks()->Size(), false);
+        ArenaBitVector back_edges(arena_, GetBlocks().Size(), false);
         for (size_t pred = 0; pred < info->GetBackEdges()->Size(); pred++) {
           back_edges.SetBit(info->GetBackEdges()->Get(pred)->GetBlockId());
         }
@@ -298,9 +296,9 @@
 #undef DEFINE_ACCEPT
 
 void HGraphVisitor::VisitInsertionOrder() {
-  const GrowableArray<HBasicBlock*>* blocks = graph_->GetBlocks();
-  for (size_t i = 0 ; i < blocks->Size(); i++) {
-    VisitBasicBlock(blocks->Get(i));
+  const GrowableArray<HBasicBlock*>& blocks = graph_->GetBlocks();
+  for (size_t i = 0 ; i < blocks.Size(); i++) {
+    VisitBasicBlock(blocks.Get(i));
   }
 }
 
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 581c1d5..bd3d703 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -49,6 +49,7 @@
 
   friend class HBasicBlock;
   friend class HInstructionIterator;
+  friend class HBackwardInstructionIterator;
 
   DISALLOW_COPY_AND_ASSIGN(HInstructionList);
 };
@@ -59,14 +60,14 @@
   explicit HGraph(ArenaAllocator* arena)
       : arena_(arena),
         blocks_(arena, kDefaultNumberOfBlocks),
-        dominator_order_(arena, kDefaultNumberOfBlocks),
+        post_order_(arena, kDefaultNumberOfBlocks),
         maximum_number_of_out_vregs_(0),
         number_of_vregs_(0),
         number_of_in_vregs_(0),
         current_instruction_id_(0) { }
 
   ArenaAllocator* GetArena() const { return arena_; }
-  const GrowableArray<HBasicBlock*>* GetBlocks() const { return &blocks_; }
+  const GrowableArray<HBasicBlock*>& GetBlocks() const { return blocks_; }
 
   HBasicBlock* GetEntryBlock() const { return entry_block_; }
   HBasicBlock* GetExitBlock() const { return exit_block_; }
@@ -108,8 +109,8 @@
     return number_of_in_vregs_;
   }
 
-  GrowableArray<HBasicBlock*>* GetDominatorOrder() {
-    return &dominator_order_;
+  const GrowableArray<HBasicBlock*>& GetPostOrder() const {
+    return post_order_;
   }
 
  private:
@@ -117,10 +118,10 @@
   void VisitBlockForDominatorTree(HBasicBlock* block,
                                   HBasicBlock* predecessor,
                                   GrowableArray<size_t>* visits);
-  void FindBackEdges(ArenaBitVector* visited) const;
+  void FindBackEdges(ArenaBitVector* visited);
   void VisitBlockForBackEdges(HBasicBlock* block,
                               ArenaBitVector* visited,
-                              ArenaBitVector* visiting) const;
+                              ArenaBitVector* visiting);
   void RemoveDeadBlocks(const ArenaBitVector& visited) const;
 
   ArenaAllocator* const arena_;
@@ -128,8 +129,8 @@
   // List of blocks in insertion order.
   GrowableArray<HBasicBlock*> blocks_;
 
-  // List of blocks to perform a pre-order dominator tree traversal.
-  GrowableArray<HBasicBlock*> dominator_order_;
+  // List of blocks to perform a post order tree traversal.
+  GrowableArray<HBasicBlock*> post_order_;
 
   HBasicBlock* entry_block_;
   HBasicBlock* exit_block_;
@@ -322,6 +323,7 @@
         next_(nullptr),
         block_(nullptr),
         id_(-1),
+        ssa_index_(-1),
         uses_(nullptr),
         env_uses_(nullptr),
         environment_(nullptr),
@@ -360,11 +362,17 @@
   HUseListNode<HInstruction>* GetUses() const { return uses_; }
   HUseListNode<HEnvironment>* GetEnvUses() const { return env_uses_; }
 
-  bool HasUses() const { return uses_ != nullptr; }
+  bool HasUses() const { return uses_ != nullptr || env_uses_ != nullptr; }
 
   int GetId() const { return id_; }
   void SetId(int id) { id_ = id; }
 
+  int GetSsaIndex() const { return ssa_index_; }
+  void SetSsaIndex(int ssa_index) { ssa_index_ = ssa_index; }
+  bool HasSsaIndex() const { return ssa_index_ != -1; }
+
+  bool HasEnvironment() const { return environment_ != nullptr; }
+  HEnvironment* GetEnvironment() const { return environment_; }
   void SetEnvironment(HEnvironment* environment) { environment_ = environment; }
 
   LocationSummary* GetLocations() const { return locations_; }
@@ -388,6 +396,9 @@
   // has not beed added to the graph.
   int id_;
 
+  // When doing liveness analysis, instructions that have uses get an SSA index.
+  int ssa_index_;
+
   // List of instructions that have this instruction as input.
   HUseListNode<HInstruction>* uses_;
 
@@ -496,6 +507,25 @@
   HInstruction* next_;
 };
 
+class HBackwardInstructionIterator : public ValueObject {
+ public:
+  explicit HBackwardInstructionIterator(const HInstructionList& instructions)
+      : instruction_(instructions.last_instruction_) {
+    next_ = Done() ? nullptr : instruction_->GetPrevious();
+  }
+
+  bool Done() const { return instruction_ == nullptr; }
+  HInstruction* Current() const { return instruction_; }
+  void Advance() {
+    instruction_ = next_;
+    next_ = Done() ? nullptr : instruction_->GetPrevious();
+  }
+
+ private:
+  HInstruction* instruction_;
+  HInstruction* next_;
+};
+
 // An embedded container with N elements of type T.  Used (with partial
 // specialization for N=0) because embedded arrays cannot have size 0.
 template<typename T, intptr_t N>
@@ -966,6 +996,52 @@
   DISALLOW_COPY_AND_ASSIGN(HGraphVisitor);
 };
 
+class HInsertionOrderIterator : public ValueObject {
+ public:
+  explicit HInsertionOrderIterator(const HGraph& graph) : graph_(graph), index_(0) {}
+
+  bool Done() const { return index_ == graph_.GetBlocks().Size(); }
+  HBasicBlock* Current() const { return graph_.GetBlocks().Get(index_); }
+  void Advance() { ++index_; }
+
+ private:
+  const HGraph& graph_;
+  size_t index_;
+
+  DISALLOW_COPY_AND_ASSIGN(HInsertionOrderIterator);
+};
+
+class HPostOrderIterator : public ValueObject {
+ public:
+  explicit HPostOrderIterator(const HGraph& graph) : graph_(graph), index_(0) {}
+
+  bool Done() const { return index_ == graph_.GetPostOrder().Size(); }
+  HBasicBlock* Current() const { return graph_.GetPostOrder().Get(index_); }
+  void Advance() { ++index_; }
+
+ private:
+  const HGraph& graph_;
+  size_t index_;
+
+  DISALLOW_COPY_AND_ASSIGN(HPostOrderIterator);
+};
+
+class HReversePostOrderIterator : public ValueObject {
+ public:
+  explicit HReversePostOrderIterator(const HGraph& graph)
+      : graph_(graph), index_(graph_.GetPostOrder().Size()) {}
+
+  bool Done() const { return index_ == 0; }
+  HBasicBlock* Current() const { return graph_.GetPostOrder().Get(index_ - 1); }
+  void Advance() { --index_; }
+
+ private:
+  const HGraph& graph_;
+  size_t index_;
+
+  DISALLOW_COPY_AND_ASSIGN(HReversePostOrderIterator);
+};
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_H_
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 9438890..8594c69 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -22,6 +22,7 @@
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
 #include "nodes.h"
+#include "ssa_liveness_analysis.h"
 #include "utils/arena_allocator.h"
 
 namespace art {
@@ -50,8 +51,7 @@
 };
 
 
-CompiledMethod* OptimizingCompiler::TryCompile(CompilerDriver& driver,
-                                               const DexFile::CodeItem* code_item,
+CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_item,
                                                uint32_t access_flags,
                                                InvokeType invoke_type,
                                                uint16_t class_def_idx,
@@ -60,7 +60,8 @@
                                                const DexFile& dex_file) const {
   DexCompilationUnit dex_compilation_unit(
     nullptr, class_loader, art::Runtime::Current()->GetClassLinker(), dex_file, code_item,
-    class_def_idx, method_idx, access_flags, driver.GetVerifiedMethod(&dex_file, method_idx));
+    class_def_idx, method_idx, access_flags,
+    GetCompilerDriver()->GetVerifiedMethod(&dex_file, method_idx));
 
   // For testing purposes, we put a special marker on method names that should be compiled
   // with this compiler. This makes sure we're not regressing.
@@ -77,7 +78,7 @@
     return nullptr;
   }
 
-  InstructionSet instruction_set = driver.GetInstructionSet();
+  InstructionSet instruction_set = GetCompilerDriver()->GetInstructionSet();
   // The optimizing compiler currently does not have a Thumb2 assembler.
   if (instruction_set == kThumb2) {
     instruction_set = kArm;
@@ -103,8 +104,9 @@
   // Run these phases to get some test coverage.
   graph->BuildDominatorTree();
   graph->TransformToSSA();
+  SsaLivenessAnalysis(*graph).Analyze();
 
-  return new CompiledMethod(driver,
+  return new CompiledMethod(GetCompilerDriver(),
                             instruction_set,
                             allocator.GetMemory(),
                             codegen->GetFrameSize(),
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index bfb4f38..ee1e1e4 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -20,11 +20,11 @@
 namespace art {
 
 void SsaBuilder::BuildSsa() {
-  // 1) Visit in dominator order. We need to have all predecessors of a block visited
+  // 1) Visit in reverse post order. We need to have all predecessors of a block visited
   // (with the exception of loops) in order to create the right environment for that
   // block. For loops, we create phis whose inputs will be set in 2).
-  for (size_t i = 0; i < GetGraph()->GetDominatorOrder()->Size(); i++) {
-    VisitBasicBlock(GetGraph()->GetDominatorOrder()->Get(i));
+  for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
+    VisitBasicBlock(it.Current());
   }
 
   // 2) Set inputs of loop phis.
@@ -59,7 +59,7 @@
 
   if (block->IsLoopHeader()) {
     // If the block is a loop header, we know we only have visited the pre header
-    // because we are visiting in dominator order. We create phis for all initialized
+    // because we are visiting in reverse post order. We create phis for all initialized
     // locals from the pre header. Their inputs will be populated at the end of
     // the analysis.
     for (size_t local = 0; local < current_locals_->Size(); local++) {
@@ -76,7 +76,7 @@
     // blocks need to be updated.
     loop_headers_.Add(block);
   } else if (block->GetPredecessors()->Size() > 0) {
-    // All predecessors have already been visited because we are visiting in dominator order.
+    // All predecessors have already been visited because we are visiting in reverse post order.
     // We merge the values of all locals, creating phis if those values differ.
     for (size_t local = 0; local < current_locals_->Size(); local++) {
       bool is_different = false;
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index b6c6c0b..9d8c072 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -29,8 +29,8 @@
       : HGraphVisitor(graph),
         current_locals_(nullptr),
         loop_headers_(graph->GetArena(), kDefaultNumberOfLoops),
-        locals_for_(graph->GetArena(), graph->GetBlocks()->Size()) {
-    locals_for_.SetSize(graph->GetBlocks()->Size());
+        locals_for_(graph->GetArena(), graph->GetBlocks().Size()) {
+    locals_for_.SetSize(graph->GetBlocks().Size());
   }
 
   void BuildSsa();
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
new file mode 100644
index 0000000..838597d
--- /dev/null
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -0,0 +1,170 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ssa_liveness_analysis.h"
+#include "nodes.h"
+
+namespace art {
+
+void SsaLivenessAnalysis::Analyze() {
+  NumberInstructions();
+  ComputeSets();
+}
+
+void SsaLivenessAnalysis::NumberInstructions() {
+  int ssa_index = 0;
+  for (HReversePostOrderIterator it(graph_); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+
+    for (HInstructionIterator it(*block->GetPhis()); !it.Done(); it.Advance()) {
+      HInstruction* current = it.Current();
+      if (current->HasUses()) {
+        current->SetSsaIndex(ssa_index++);
+      }
+    }
+
+    for (HInstructionIterator it(*block->GetInstructions()); !it.Done(); it.Advance()) {
+      HInstruction* current = it.Current();
+      if (current->HasUses()) {
+        current->SetSsaIndex(ssa_index++);
+      }
+    }
+  }
+  number_of_ssa_values_ = ssa_index;
+}
+
+void SsaLivenessAnalysis::ComputeSets() {
+  for (HReversePostOrderIterator it(graph_); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    block_infos_.Put(
+        block->GetBlockId(),
+        new (graph_.GetArena()) BlockInfo(graph_.GetArena(), *block, number_of_ssa_values_));
+  }
+
+  // Compute the initial live_in, live_out, and kill sets. This method does not handle
+  // backward branches, therefore live_in and live_out sets are not yet correct.
+  ComputeInitialSets();
+
+  // Do a fixed point calculation to take into account backward branches,
+  // that will update live_in of loop headers, and therefore live_out and live_in
+  // of blocks in the loop.
+  ComputeLiveInAndLiveOutSets();
+}
+
+void SsaLivenessAnalysis::ComputeInitialSets() {
+  // Do a post orderr visit, adding inputs of instructions live in the block where
+  // that instruction is defined, and killing instructions that are being visited.
+  for (HPostOrderIterator it(graph_); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+
+    BitVector* kill = GetKillSet(*block);
+    BitVector* live_in = GetLiveInSet(*block);
+
+    for (HBackwardInstructionIterator it(*block->GetInstructions()); !it.Done(); it.Advance()) {
+      HInstruction* current = it.Current();
+      if (current->HasSsaIndex()) {
+        kill->SetBit(current->GetSsaIndex());
+        live_in->ClearBit(current->GetSsaIndex());
+      }
+
+      // All inputs of an instruction must be live.
+      for (size_t i = 0, e = current->InputCount(); i < e; ++i) {
+        DCHECK(current->InputAt(i)->HasSsaIndex());
+        live_in->SetBit(current->InputAt(i)->GetSsaIndex());
+      }
+
+      if (current->HasEnvironment()) {
+        // All instructions in the environment must be live.
+        GrowableArray<HInstruction*>* environment = current->GetEnvironment()->GetVRegs();
+        for (size_t i = 0, e = environment->Size(); i < e; ++i) {
+          HInstruction* instruction = environment->Get(i);
+          if (instruction != nullptr) {
+            DCHECK(instruction->HasSsaIndex());
+            live_in->SetBit(instruction->GetSsaIndex());
+          }
+        }
+      }
+    }
+
+    for (HInstructionIterator it(*block->GetPhis()); !it.Done(); it.Advance()) {
+      HInstruction* current = it.Current();
+      if (current->HasSsaIndex()) {
+        kill->SetBit(current->GetSsaIndex());
+        live_in->ClearBit(current->GetSsaIndex());
+      }
+
+      // Mark a phi input live_in for its corresponding predecessor.
+      for (size_t i = 0, e = current->InputCount(); i < e; ++i) {
+        HInstruction* input = current->InputAt(i);
+
+        HBasicBlock* predecessor = block->GetPredecessors()->Get(i);
+        size_t ssa_index = input->GetSsaIndex();
+        BitVector* predecessor_kill = GetKillSet(*predecessor);
+        BitVector* predecessor_live_in = GetLiveInSet(*predecessor);
+
+        // Phi inputs from a back edge have already been visited. If the back edge
+        // block defines that input, we should not add it to its live_in.
+        if (!predecessor_kill->IsBitSet(ssa_index)) {
+          predecessor_live_in->SetBit(ssa_index);
+        }
+      }
+    }
+  }
+}
+
+void SsaLivenessAnalysis::ComputeLiveInAndLiveOutSets() {
+  bool changed;
+  do {
+    changed = false;
+
+    for (HPostOrderIterator it(graph_); !it.Done(); it.Advance()) {
+      const HBasicBlock& block = *it.Current();
+
+      // The live_in set depends on the kill set (which does not
+      // change in this loop), and the live_out set.  If the live_out
+      // set does not change, there is no need to update the live_in set.
+      if (UpdateLiveOut(block) && UpdateLiveIn(block)) {
+        changed = true;
+      }
+    }
+  } while (changed);
+}
+
+bool SsaLivenessAnalysis::UpdateLiveOut(const HBasicBlock& block) {
+  BitVector* live_out = GetLiveOutSet(block);
+  bool changed = false;
+  // The live_out set of a block is the union of live_in sets of its successors.
+  for (size_t i = 0, e = block.GetSuccessors()->Size(); i < e; ++i) {
+    HBasicBlock* successor = block.GetSuccessors()->Get(i);
+    if (live_out->Union(GetLiveInSet(*successor))) {
+      changed = true;
+    }
+  }
+  return changed;
+}
+
+
+bool SsaLivenessAnalysis::UpdateLiveIn(const HBasicBlock& block) {
+  BitVector* live_out = GetLiveOutSet(block);
+  BitVector* kill = GetKillSet(block);
+  BitVector* live_in = GetLiveInSet(block);
+  // If live_out is updated (because of backward branches), we need to make
+  // sure instructions in live_out are also in live_in, unless they are killed
+  // by this block.
+  return live_in->UnionIfNotIn(live_out, kill);
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
new file mode 100644
index 0000000..6a901d1
--- /dev/null
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_SSA_LIVENESS_ANALYSIS_H_
+#define ART_COMPILER_OPTIMIZING_SSA_LIVENESS_ANALYSIS_H_
+
+#include "nodes.h"
+
+namespace art {
+
+class BlockInfo : public ArenaObject {
+ public:
+  BlockInfo(ArenaAllocator* allocator, const HBasicBlock& block, size_t number_of_ssa_values)
+      : block_(block),
+        live_in_(allocator, number_of_ssa_values, false),
+        live_out_(allocator, number_of_ssa_values, false),
+        kill_(allocator, number_of_ssa_values, false) {
+    live_in_.ClearAllBits();
+    live_out_.ClearAllBits();
+    kill_.ClearAllBits();
+  }
+
+ private:
+  const HBasicBlock& block_;
+  ArenaBitVector live_in_;
+  ArenaBitVector live_out_;
+  ArenaBitVector kill_;
+
+  friend class SsaLivenessAnalysis;
+
+  DISALLOW_COPY_AND_ASSIGN(BlockInfo);
+};
+
+class SsaLivenessAnalysis : public ValueObject {
+ public:
+  explicit SsaLivenessAnalysis(const HGraph& graph)
+      : graph_(graph),
+        block_infos_(graph.GetArena(), graph.GetBlocks().Size()),
+        number_of_ssa_values_(0) {
+    block_infos_.SetSize(graph.GetBlocks().Size());
+  }
+
+  void Analyze();
+
+  BitVector* GetLiveInSet(const HBasicBlock& block) const {
+    return &block_infos_.Get(block.GetBlockId())->live_in_;
+  }
+
+  BitVector* GetLiveOutSet(const HBasicBlock& block) const {
+    return &block_infos_.Get(block.GetBlockId())->live_out_;
+  }
+
+  BitVector* GetKillSet(const HBasicBlock& block) const {
+    return &block_infos_.Get(block.GetBlockId())->kill_;
+  }
+
+ private:
+  // Give an SSA number to each instruction that defines a value used by another instruction.
+  void NumberInstructions();
+
+  // Compute live_in, live_out and kill sets.
+  void ComputeSets();
+
+  // Compute the initial live_in, live_out and kill sets, without analyzing
+  // backward branches.
+  void ComputeInitialSets();
+
+  // After computing the initial sets, this method does a fixed point
+  // calculation over the live_in and live_out set to take into account
+  // backwards branches.
+  void ComputeLiveInAndLiveOutSets();
+
+  // Update the live_in set of the block and returns whether it has changed.
+  bool UpdateLiveIn(const HBasicBlock& block);
+
+  // Update the live_out set of the block and returns whether it has changed.
+  bool UpdateLiveOut(const HBasicBlock& block);
+
+  const HGraph& graph_;
+  GrowableArray<BlockInfo*> block_infos_;
+  size_t number_of_ssa_values_;
+
+  DISALLOW_COPY_AND_ASSIGN(SsaLivenessAnalysis);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_SSA_LIVENESS_ANALYSIS_H_
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index 7c3633b..e4aafb7 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -64,8 +64,8 @@
 
 static void ReNumberInstructions(HGraph* graph) {
   int id = 0;
-  for (size_t i = 0; i < graph->GetBlocks()->Size(); i++) {
-    HBasicBlock* block = graph->GetBlocks()->Get(i);
+  for (size_t i = 0, e = graph->GetBlocks().Size(); i < e; ++i) {
+    HBasicBlock* block = graph->GetBlocks().Get(i);
     for (HInstructionIterator it(*block->GetPhis()); !it.Done(); it.Advance()) {
       it.Current()->SetId(id++);
     }
@@ -147,7 +147,7 @@
 
 TEST(SsaTest, CFG3) {
   // Test that we create a phi for the join block of an if control flow instruction
-  // when there both branches update a local.
+  // when both branches update a local.
   const char* expected =
     "BasicBlock 0, succ: 1\n"
     "  0: IntConstant 0 [4, 4]\n"
diff --git a/compiler/utils/arena_allocator.h b/compiler/utils/arena_allocator.h
index 18a5bce..032eabc 100644
--- a/compiler/utils/arena_allocator.h
+++ b/compiler/utils/arena_allocator.h
@@ -23,6 +23,7 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "mem_map.h"
+#include "utils.h"
 
 namespace art {
 
@@ -155,7 +156,7 @@
     if (UNLIKELY(running_on_valgrind_)) {
       return AllocValgrind(bytes, kind);
     }
-    bytes = (bytes + 3) & ~3;
+    bytes = RoundUp(bytes, 4);
     if (UNLIKELY(ptr_ + bytes > end_)) {
       // Obtain a new block.
       ObtainNewArenaForAllocation(bytes);
diff --git a/compiler/utils/growable_array.h b/compiler/utils/growable_array.h
index 659b4f7..e703d8e 100644
--- a/compiler/utils/growable_array.h
+++ b/compiler/utils/growable_array.h
@@ -78,7 +78,7 @@
 
       private:
         size_t idx_;
-        GrowableArray* const g_list_;
+        GrowableArray* g_list_;
     };
 
     GrowableArray(ArenaAllocator* arena, size_t init_length, OatListKind kind = kGrowableArrayMisc)
diff --git a/compiler/utils/scoped_arena_allocator.cc b/compiler/utils/scoped_arena_allocator.cc
index bd78eae..b8b0e6e 100644
--- a/compiler/utils/scoped_arena_allocator.cc
+++ b/compiler/utils/scoped_arena_allocator.cc
@@ -92,7 +92,7 @@
 }
 
 void* ArenaStack::AllocValgrind(size_t bytes, ArenaAllocKind kind) {
-  size_t rounded_bytes = (bytes + kValgrindRedZoneBytes + 3) & ~3;
+  size_t rounded_bytes = RoundUp(bytes + kValgrindRedZoneBytes, 4);
   uint8_t* ptr = top_ptr_;
   if (UNLIKELY(static_cast<size_t>(top_end_ - ptr) < rounded_bytes)) {
     ptr = AllocateFromNextArena(rounded_bytes);
diff --git a/compiler/utils/scoped_arena_allocator.h b/compiler/utils/scoped_arena_allocator.h
index 28e86ec..d5b003c 100644
--- a/compiler/utils/scoped_arena_allocator.h
+++ b/compiler/utils/scoped_arena_allocator.h
@@ -67,7 +67,7 @@
     if (UNLIKELY(running_on_valgrind_)) {
       return AllocValgrind(bytes, kind);
     }
-    size_t rounded_bytes = (bytes + 3) & ~3;
+    size_t rounded_bytes = RoundUp(bytes, 4);
     uint8_t* ptr = top_ptr_;
     if (UNLIKELY(static_cast<size_t>(top_end_ - ptr) < rounded_bytes)) {
       ptr = AllocateFromNextArena(rounded_bytes);
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index cdf26f1..7c0befc 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -1010,7 +1010,7 @@
   }
 
   if (compiler_filter_string == NULL) {
-    if (instruction_set == kX86_64 || instruction_set == kArm64 || instruction_set == kMips) {
+    if (instruction_set == kX86_64 || instruction_set == kMips) {
       // TODO: implement/fix compilers for these architectures.
       compiler_filter_string = "interpret-only";
     } else if (image) {
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index eddaa0b..f81e2f9 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -34,7 +34,7 @@
 namespace art {
 
 extern "C" void art_quick_throw_null_pointer_exception();
-extern "C" void art_quick_throw_stack_overflow(void*);
+extern "C" void art_quick_throw_stack_overflow_from_signal();
 extern "C" void art_quick_implicit_suspend();
 
 // Get the size of a thumb2 instruction in bytes.
@@ -50,7 +50,7 @@
   struct ucontext *uc = (struct ucontext *)context;
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
   *out_sp = static_cast<uintptr_t>(sc->arm_sp);
-  LOG(DEBUG) << "sp: " << *out_sp;
+  VLOG(signals) << "sp: " << *out_sp;
   if (*out_sp == 0) {
     return;
   }
@@ -74,7 +74,7 @@
 
   // Need to work out the size of the instruction that caused the exception.
   uint8_t* ptr = reinterpret_cast<uint8_t*>(sc->arm_pc);
-  LOG(DEBUG) << "pc: " << std::hex << static_cast<void*>(ptr);
+  VLOG(signals) << "pc: " << std::hex << static_cast<void*>(ptr);
   uint32_t instr_size = GetInstructionSize(ptr);
 
   *out_return_pc = (sc->arm_pc + instr_size) | 1;
@@ -95,7 +95,7 @@
   uint32_t instr_size = GetInstructionSize(ptr);
   sc->arm_lr = (sc->arm_pc + instr_size) | 1;      // LR needs to point to gc map location
   sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
-  LOG(DEBUG) << "Generating null pointer exception";
+  VLOG(signals) << "Generating null pointer exception";
   return true;
 }
 
@@ -117,10 +117,10 @@
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
   uint8_t* ptr2 = reinterpret_cast<uint8_t*>(sc->arm_pc);
   uint8_t* ptr1 = ptr2 - 4;
-  LOG(DEBUG) << "checking suspend";
+  VLOG(signals) << "checking suspend";
 
   uint16_t inst2 = ptr2[0] | ptr2[1] << 8;
-  LOG(DEBUG) << "inst2: " << std::hex << inst2 << " checkinst2: " << checkinst2;
+  VLOG(signals) << "inst2: " << std::hex << inst2 << " checkinst2: " << checkinst2;
   if (inst2 != checkinst2) {
     // Second instruction is not good, not ours.
     return false;
@@ -132,7 +132,7 @@
   bool found = false;
   while (ptr1 > limit) {
     uint32_t inst1 = ((ptr1[0] | ptr1[1] << 8) << 16) | (ptr1[2] | ptr1[3] << 8);
-    LOG(DEBUG) << "inst1: " << std::hex << inst1 << " checkinst1: " << checkinst1;
+    VLOG(signals) << "inst1: " << std::hex << inst1 << " checkinst1: " << checkinst1;
     if (inst1 == checkinst1) {
       found = true;
       break;
@@ -140,7 +140,7 @@
     ptr1 -= 2;      // Min instruction size is 2 bytes.
   }
   if (found) {
-    LOG(DEBUG) << "suspend check match";
+    VLOG(signals) << "suspend check match";
     // This is a suspend check.  Arrange for the signal handler to return to
     // art_quick_implicit_suspend.  Also set LR so that after the suspend check it
     // will resume the instruction (current PC + 2).  PC points to the
@@ -148,14 +148,14 @@
 
     // NB: remember that we need to set the bottom bit of the LR register
     // to switch to thumb mode.
-    LOG(DEBUG) << "arm lr: " << std::hex << sc->arm_lr;
-    LOG(DEBUG) << "arm pc: " << std::hex << sc->arm_pc;
+    VLOG(signals) << "arm lr: " << std::hex << sc->arm_lr;
+    VLOG(signals) << "arm pc: " << std::hex << sc->arm_pc;
     sc->arm_lr = sc->arm_pc + 3;      // +2 + 1 (for thumb)
     sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_implicit_suspend);
 
     // Now remove the suspend trigger that caused this fault.
     Thread::Current()->RemoveSuspendTrigger();
-    LOG(DEBUG) << "removed suspend trigger invoking test suspend";
+    VLOG(signals) << "removed suspend trigger invoking test suspend";
     return true;
   }
   return false;
@@ -174,103 +174,60 @@
 // on the stack.
 //
 // If we determine this is a stack overflow we need to move the stack pointer
-// to the overflow region below the protected region.  Because we now have
-// a gap in the stack (skips over protected region), we need to arrange
-// for the rest of the system to be unaware of the new stack arrangement
-// and behave as if there is a fully valid stack.  We do this by placing
-// a unique address onto the stack followed by
-// the size of the gap.  The stack walker will detect this and skip over the
-// gap.
-
-// NB. We also need to be careful of stack alignment as the ARM EABI specifies that
-// stack must be 8 byte aligned when making any calls.
-
-// NB. The size of the gap is the difference between the previous frame's SP and
-// the SP at which the size word is pushed.
+// to the overflow region below the protected region.
 
 bool StackOverflowHandler::Action(int sig, siginfo_t* info, void* context) {
   struct ucontext *uc = (struct ucontext *)context;
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
-  LOG(DEBUG) << "stack overflow handler with sp at " << std::hex << &uc;
-  LOG(DEBUG) << "sigcontext: " << std::hex << sc;
+  VLOG(signals) << "stack overflow handler with sp at " << std::hex << &uc;
+  VLOG(signals) << "sigcontext: " << std::hex << sc;
 
-  uint8_t* sp = reinterpret_cast<uint8_t*>(sc->arm_sp);
-  LOG(DEBUG) << "sp: " << static_cast<void*>(sp);
+  uintptr_t sp = sc->arm_sp;
+  VLOG(signals) << "sp: " << std::hex << sp;
 
-  uintptr_t* fault_addr = reinterpret_cast<uintptr_t*>(sc->fault_address);
-  LOG(DEBUG) << "fault_addr: " << std::hex << fault_addr;
-  LOG(DEBUG) << "checking for stack overflow, sp: " << std::hex << static_cast<void*>(sp) <<
+  uintptr_t fault_addr = sc->fault_address;
+  VLOG(signals) << "fault_addr: " << std::hex << fault_addr;
+  VLOG(signals) << "checking for stack overflow, sp: " << std::hex << sp <<
     ", fault_addr: " << fault_addr;
-  uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(sp - Thread::kStackOverflowReservedBytes);
+
+  uintptr_t overflow_addr = sp - Thread::kStackOverflowReservedBytes;
+
+  Thread* self = reinterpret_cast<Thread*>(sc->arm_r9);
+  CHECK_EQ(self, Thread::Current());
+  uintptr_t pregion = reinterpret_cast<uintptr_t>(self->GetStackEnd()) -
+      Thread::kStackOverflowProtectedSize;
 
   // Check that the fault address is the value expected for a stack overflow.
   if (fault_addr != overflow_addr) {
-    LOG(DEBUG) << "Not a stack overflow";
+    VLOG(signals) << "Not a stack overflow";
     return false;
   }
 
   // We know this is a stack overflow.  We need to move the sp to the overflow region
-  // the exists below the protected region.  R9 contains the current Thread* so
-  // we can read the stack_end from that and subtract the size of the
-  // protected region.  This creates a gap in the stack that needs to be marked.
-  Thread* self = reinterpret_cast<Thread*>(sc->arm_r9);
+  // the exists below the protected region.  Determine the address of the next
+  // available valid address below the protected region.
+  uintptr_t prevsp = sp;
+  sp = pregion;
+  VLOG(signals) << "setting sp to overflow region at " << std::hex << sp;
 
-  uint8_t* prevsp = sp;
-  sp = self->GetStackEnd() - Thread::kStackOverflowProtectedSize;
-  LOG(DEBUG) << "setting sp to overflow region at " << std::hex << static_cast<void*>(sp);
-
-  // We need to find the previous frame.  Remember that
-  // this has not yet been fully constructed because the SP has not been
-  // decremented.  So we need to work out the size of the spill portion of the
-  // frame.  This consists of something like:
-  //
-  // 0xb6a1d49c: e92d40e0  push    {r5, r6, r7, lr}
-  // 0xb6a1d4a0: ed2d8a06  vpush.f32 {s16-s21}
-  //
-  // The first is encoded in the ArtMethod as the spill_mask, the second as the
-  // fp_spill_mask.  A population count on each will give the number of registers
-  // in each mask.  Each register is 4 bytes on ARM32.
-
-  mirror::ArtMethod* method = reinterpret_cast<mirror::ArtMethod*>(sc->arm_r0);
-  uint32_t spill_mask = method->GetCoreSpillMask();
-  uint32_t numcores = POPCOUNT(spill_mask);
-  uint32_t fp_spill_mask = method->GetFpSpillMask();
-  uint32_t numfps = POPCOUNT(fp_spill_mask);
-  uint32_t spill_size = (numcores + numfps) * 4;
-  LOG(DEBUG) << "spill size: " << spill_size;
-  uint8_t* prevframe = prevsp + spill_size;
-  LOG(DEBUG) << "previous frame: " << static_cast<void*>(prevframe);
-
-  // NOTE: the ARM EABI needs an 8 byte alignment.  In the case of ARM32 a pointer
-  // is 4 bytes so that, together with the offset to the previous frame is 8
-  // bytes.  On other architectures we will need to align the stack.
-
-  // Push a marker onto the stack to tell the stack walker that there is a stack
-  // overflow and the stack is not contiguous.
-
-  // First the offset from SP to the previous frame.
-  sp -= sizeof(uint32_t);
-  LOG(DEBUG) << "push gap of " << static_cast<uint32_t>(prevframe - sp);
-  *reinterpret_cast<uint32_t*>(sp) = static_cast<uint32_t>(prevframe - sp);
-
-  // Now the gap marker (pointer sized).
-  sp -= sizeof(mirror::ArtMethod*);
-  *reinterpret_cast<void**>(sp) = stack_overflow_gap_marker;
+  // Since the compiler puts the implicit overflow
+  // check before the callee save instructions, the SP is already pointing to
+  // the previous frame.
+  VLOG(signals) << "previous frame: " << std::hex << prevsp;
 
   // Now establish the stack pointer for the signal return.
-  sc->arm_sp = reinterpret_cast<uintptr_t>(sp);
+  sc->arm_sp = prevsp;
 
-  // Now arrange for the signal handler to return to art_quick_throw_stack_overflow.
-  // We need the LR to point to the GC map just after the fault instruction.
-  uint8_t* ptr = reinterpret_cast<uint8_t*>(sc->arm_pc);
-  uint32_t instr_size = GetInstructionSize(ptr);
-  sc->arm_lr = (sc->arm_pc + instr_size) | 1;      // LR needs to point to gc map location
-  sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow);
+  // Tell the stack overflow code where the new stack pointer should be.
+  sc->arm_ip = sp;      // aka r12
 
-  // The kernel will now return to the address in sc->arm_pc.  We have arranged the
-  // stack pointer to be in the overflow region.  Throwing the exception will perform
-  // a longjmp which will restore the stack pointer to the correct location for the
-  // exception catch.
+  // Now arrange for the signal handler to return to art_quick_throw_stack_overflow_from_signal.
+  // The value of LR must be the same as it was when we entered the code that
+  // caused this fault.  This will be inserted into a callee save frame by
+  // the function to which this handler returns (art_quick_throw_stack_overflow_from_signal).
+  sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow_from_signal);
+
+  // The kernel will now return to the address in sc->arm_pc.
   return true;
 }
 }       // namespace art
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index bc80644..dcf4561 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -235,6 +235,31 @@
      */
 ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_no_such_method, artThrowNoSuchMethodFromCode
 
+  /*
+   * Invoke stack overflow exception from signal handler.
+   * On entry:
+   * r9: thread
+   * sp: address of last known frame
+   * r12: address of next valid SP below protected region in stack
+   *
+   * This is deceptively simple but hides some complexity.  It is called in the case of
+   * a stack overflow condition during implicit checks.  The signal handler has been
+   * called by the kernel due to a load from the protected stack region.  The handler
+   * works out the address of the previous frame and passes this in SP.  However there
+   * is a piece of memory somewhere below the current SP that is not accessible (the
+   * memory that caused the signal).  The signal handler works out the next
+   * accessible value of SP and passes this in r12.  This code then sets up the SP
+   * to be this new value and calls the code to create and throw the stack overflow
+   * exception.
+   */
+ENTRY art_quick_throw_stack_overflow_from_signal
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    mov r0, r9                      @ pass Thread::Current
+    mov r1, sp                      @ pass SP
+    mov sp, r12                     @ move SP down to below protected region.
+    b   artThrowStackOverflowFromCode                   @ artThrowStackOverflowFromCode(Thread*, SP)
+END art_quick_throw_stack_overflow_from_signal
+
     /*
      * All generated callsites for interface invokes and invocation slow paths will load arguments
      * as usual - except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 7b66613..8079460 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -419,14 +419,30 @@
     brk 0  // Unreached
 .endm
 
-.macro RETURN_OR_DELIVER_PENDING_EXCEPTION
-    ldr x9, [xSELF, # THREAD_EXCEPTION_OFFSET]   // Get exception field.
-    cbnz x9, 1f
+.macro RETURN_OR_DELIVER_PENDING_EXCEPTION_REG reg
+    ldr \reg, [xSELF, # THREAD_EXCEPTION_OFFSET]   // Get exception field.
+    cbnz \reg, 1f
     ret
 1:
     DELIVER_PENDING_EXCEPTION
 .endm
 
+.macro RETURN_OR_DELIVER_PENDING_EXCEPTION
+    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG x9
+.endm
+
+// Same as above with x1. This is helpful in stubs that want to avoid clobbering another register.
+.macro RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
+    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG x1
+.endm
+
+.macro RETURN_IF_W0_IS_ZERO_OR_DELIVER
+    cbnz w0, 1f                // result non-zero branch over
+    ret                        // return
+1:
+    DELIVER_PENDING_EXCEPTION
+.endm
+
 // FIXME: Temporary fix for TR(XSELF).
 .macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
@@ -1153,19 +1169,6 @@
 UNIMPLEMENTED art_quick_initialize_static_storage
 UNIMPLEMENTED art_quick_initialize_type
 UNIMPLEMENTED art_quick_initialize_type_and_verify_access
-UNIMPLEMENTED art_quick_get32_static
-UNIMPLEMENTED art_quick_get64_static
-UNIMPLEMENTED art_quick_get_obj_static
-UNIMPLEMENTED art_quick_get32_instance
-UNIMPLEMENTED art_quick_get64_instance
-UNIMPLEMENTED art_quick_get_obj_instance
-UNIMPLEMENTED art_quick_set32_static
-UNIMPLEMENTED art_quick_set64_static
-UNIMPLEMENTED art_quick_set_obj_static
-UNIMPLEMENTED art_quick_set32_instance
-UNIMPLEMENTED art_quick_set64_instance
-UNIMPLEMENTED art_quick_set_obj_instance
-UNIMPLEMENTED art_quick_resolve_string
 
 // Macro to facilitate adding new allocation entrypoints.
 // TODO: xSELF -> x19. Temporarily rely on xSELF being saved in REF_ONLY
@@ -1197,6 +1200,82 @@
 END \name
 .endm
 
+// Macros taking opportunity of code similarities for downcalls with referrer.
+
+// TODO: xSELF -> x19. Temporarily rely on xSELF being saved in REF_ONLY
+.macro ONE_ARG_REF_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
+    ldr    x1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
+    mov    x2, xSELF                  // pass Thread::Current
+    mov    x3, sp                     // pass SP
+    bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*, SP)
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    \return
+END \name
+.endm
+
+// TODO: xSELF -> x19. Temporarily rely on xSELF being saved in REF_ONLY
+.macro TWO_ARG_REF_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
+    ldr    x2, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
+    mov    x3, xSELF                  // pass Thread::Current
+    mov    x4, sp                     // pass SP
+    bl     \entrypoint
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    \return
+END \name
+.endm
+
+// TODO: xSELF -> x19. Temporarily rely on xSELF being saved in REF_ONLY
+.macro THREE_ARG_REF_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
+    ldr    x3, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
+    mov    x4, xSELF                  // pass Thread::Current
+    mov    x5, sp                     // pass SP
+    bl     \entrypoint
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    \return
+END \name
+.endm
+
+ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
+ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
+ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
+
+TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
+TWO_ARG_REF_DOWNCALL art_quick_get64_instance, artGet64InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
+TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
+
+TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
+TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
+
+THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
+THREE_ARG_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
+THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
+
+// This is separated out as the argument order is different.
+    .extern artSet64StaticFromCode
+ENTRY art_quick_set64_static
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
+    mov    x3, x1                     // Store value
+    ldr    x1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
+    mov    x2, x3                     // Put value param
+    mov    x3, xSELF                  // pass Thread::Current
+    mov    x4, sp                     // pass SP
+    bl     artSet64StaticFromCode
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_W0_IS_ZERO_OR_DELIVER
+END art_quick_set64_static
+
+
+UNIMPLEMENTED art_quick_resolve_string
+
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALL_ALLOC_ENTRYPOINTS
 
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 94a7598..4438f25 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -15,6 +15,7 @@
  */
 
 #include "common_runtime_test.h"
+#include "mirror/art_field-inl.h"
 #include "mirror/string-inl.h"
 
 #include <cstdio>
@@ -73,17 +74,28 @@
     __asm__ __volatile__(
         "push {r1-r12, lr}\n\t"     // Save state, 13*4B = 52B
         ".cfi_adjust_cfa_offset 52\n\t"
-        "sub sp, sp, #8\n\t"        // +8B, so 16B aligned with nullptr
-        ".cfi_adjust_cfa_offset 8\n\t"
-        "mov r0, %[arg0]\n\t"       // Set arg0-arg2
-        "mov r1, %[arg1]\n\t"       // TODO: Any way to use constraints like on x86?
-        "mov r2, %[arg2]\n\t"
-        // Use r9 last as we don't know whether it was used for arg0-arg2
-        "mov r9, #0\n\t"            // Push nullptr to terminate stack
         "push {r9}\n\t"
         ".cfi_adjust_cfa_offset 4\n\t"
-        "mov r9, %[self]\n\t"       // Set the thread
-        "blx %[code]\n\t"           // Call the stub
+        "mov r9, #0\n\n"
+        "str r9, [sp, #-8]!\n\t"   // Push nullptr to terminate stack, +8B padding so 16B aligned
+        ".cfi_adjust_cfa_offset 8\n\t"
+        "ldr r9, [sp, #8]\n\t"
+
+        // Push everything on the stack, so we don't rely on the order. What a mess. :-(
+        "sub sp, sp, #20\n\t"
+        "str %[arg0], [sp]\n\t"
+        "str %[arg1], [sp, #4]\n\t"
+        "str %[arg2], [sp, #8]\n\t"
+        "str %[code], [sp, #12]\n\t"
+        "str %[self], [sp, #16]\n\t"
+        "ldr r0, [sp]\n\t"
+        "ldr r1, [sp, #4]\n\t"
+        "ldr r2, [sp, #8]\n\t"
+        "ldr r3, [sp, #12]\n\t"
+        "ldr r9, [sp, #16]\n\t"
+        "add sp, sp, #20\n\t"
+
+        "blx r3\n\t"                // Call the stub
         "add sp, sp, #12\n\t"       // Pop nullptr and padding
         ".cfi_adjust_cfa_offset -12\n\t"
         "pop {r1-r12, lr}\n\t"      // Restore state
@@ -91,30 +103,42 @@
         "mov %[result], r0\n\t"     // Save the result
         : [result] "=r" (result)
           // Use the result from r0
-        : [arg0] "0"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self)
+        : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self)
         : );  // clobber.
 #elif defined(__aarch64__)
     __asm__ __volatile__(
         "sub sp, sp, #48\n\t"          // Reserve stack space, 16B aligned
         ".cfi_adjust_cfa_offset 48\n\t"
-        "stp xzr, x1, [sp]\n\t"        // nullptr(end of quick stack), x1
-        "stp x2, x18, [sp, #16]\n\t"   // Save x2, x18(xSELF)
-        "str x30, [sp, #32]\n\t"       // Save xLR
-        "mov x0, %[arg0]\n\t"          // Set arg0-arg2
-        "mov x1, %[arg1]\n\t"          // TODO: Any way to use constraints like on x86?
-        "mov x2, %[arg2]\n\t"
-        // Use r18 last as we don't know whether it was used for arg0-arg2
-        "mov x18, %[self]\n\t"         // Set the thread
-        "blr %[code]\n\t"              // Call the stub
+        "stp xzr, x1,  [sp]\n\t"        // nullptr(end of quick stack), x1
+        "stp x2, x3,   [sp, #16]\n\t"   // Save x2, x3
+        "stp x18, x30, [sp, #32]\n\t"   // Save x18(xSELF), xLR
+
+        // Push everything on the stack, so we don't rely on the order. What a mess. :-(
+        "sub sp, sp, #48\n\t"
+        "str %[arg0], [sp]\n\t"
+        "str %[arg1], [sp, #8]\n\t"
+        "str %[arg2], [sp, #16]\n\t"
+        "str %[code], [sp, #24]\n\t"
+        "str %[self], [sp, #32]\n\t"
+        "ldr x0, [sp]\n\t"
+        "ldr x1, [sp, #8]\n\t"
+        "ldr x2, [sp, #16]\n\t"
+        "ldr x3, [sp, #24]\n\t"
+        "ldr x18, [sp, #32]\n\t"
+        "add sp, sp, #48\n\t"
+
+        "blr x3\n\t"              // Call the stub
         "ldp x1, x2, [sp, #8]\n\t"     // Restore x1, x2
-        "ldp x18, x30, [sp, #24]\n\t"  // Restore xSELF, xLR
+        "ldp x3, x18, [sp, #24]\n\t"   // Restore x3, xSELF
+        "ldr x30, [sp, #40]\n\t"      // Restore xLR
         "add sp, sp, #48\n\t"          // Free stack space
         ".cfi_adjust_cfa_offset -48\n\t"
+
         "mov %[result], x0\n\t"        // Save the result
         : [result] "=r" (result)
           // Use the result from r0
         : [arg0] "0"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self)
-        : "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17");  // clobber.
+        : "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17");  // clobber.
 #elif defined(__x86_64__)
     // Note: Uses the native convention
     // TODO: Set the thread?
@@ -139,6 +163,151 @@
     self->PopManagedStackFragment(fragment);
     return result;
   }
+
+ public:
+  // TODO: Set up a frame according to referrer's specs.
+  size_t Invoke3WithReferrer(size_t arg0, size_t arg1, size_t arg2, uintptr_t code, Thread* self,
+                             mirror::ArtMethod* referrer) {
+    // Push a transition back into managed code onto the linked list in thread.
+    ManagedStack fragment;
+    self->PushManagedStackFragment(&fragment);
+
+    size_t result;
+#if defined(__i386__)
+    // TODO: Set the thread?
+    __asm__ __volatile__(
+        "pushl %[referrer]\n\t"     // Store referrer
+        "call *%%edi\n\t"           // Call the stub
+        "addl $4, %%esp"            // Pop referrer
+        : "=a" (result)
+          // Use the result from eax
+          : "a"(arg0), "c"(arg1), "d"(arg2), "D"(code), [referrer]"r"(referrer)
+            // This places code into edi, arg0 into eax, arg1 into ecx, and arg2 into edx
+            : );  // clobber.
+    // TODO: Should we clobber the other registers? EBX gets clobbered by some of the stubs,
+    //       but compilation fails when declaring that.
+#elif defined(__arm__)
+    __asm__ __volatile__(
+        "push {r1-r12, lr}\n\t"     // Save state, 13*4B = 52B
+        ".cfi_adjust_cfa_offset 52\n\t"
+        "push {r9}\n\t"
+        ".cfi_adjust_cfa_offset 4\n\t"
+        "mov r9, %[referrer]\n\n"
+        "str r9, [sp, #-8]!\n\t"   // Push referrer, +8B padding so 16B aligned
+        ".cfi_adjust_cfa_offset 8\n\t"
+        "ldr r9, [sp, #8]\n\t"
+
+        // Push everything on the stack, so we don't rely on the order. What a mess. :-(
+        "sub sp, sp, #20\n\t"
+        "str %[arg0], [sp]\n\t"
+        "str %[arg1], [sp, #4]\n\t"
+        "str %[arg2], [sp, #8]\n\t"
+        "str %[code], [sp, #12]\n\t"
+        "str %[self], [sp, #16]\n\t"
+        "ldr r0, [sp]\n\t"
+        "ldr r1, [sp, #4]\n\t"
+        "ldr r2, [sp, #8]\n\t"
+        "ldr r3, [sp, #12]\n\t"
+        "ldr r9, [sp, #16]\n\t"
+        "add sp, sp, #20\n\t"
+
+        "blx r3\n\t"                // Call the stub
+        "add sp, sp, #12\n\t"       // Pop nullptr and padding
+        ".cfi_adjust_cfa_offset -12\n\t"
+        "pop {r1-r12, lr}\n\t"      // Restore state
+        ".cfi_adjust_cfa_offset -52\n\t"
+        "mov %[result], r0\n\t"     // Save the result
+        : [result] "=r" (result)
+          // Use the result from r0
+        : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
+          [referrer] "r"(referrer)
+        : );  // clobber.
+#elif defined(__aarch64__)
+    __asm__ __volatile__(
+        "sub sp, sp, #48\n\t"          // Reserve stack space, 16B aligned
+        ".cfi_adjust_cfa_offset 48\n\t"
+        "stp %[referrer], x1, [sp]\n\t"// referrer, x1
+        "stp x2, x3,   [sp, #16]\n\t"   // Save x2, x3
+        "stp x18, x30, [sp, #32]\n\t"   // Save x18(xSELF), xLR
+
+        // Push everything on the stack, so we don't rely on the order. What a mess. :-(
+        "sub sp, sp, #48\n\t"
+        "str %[arg0], [sp]\n\t"
+        "str %[arg1], [sp, #8]\n\t"
+        "str %[arg2], [sp, #16]\n\t"
+        "str %[code], [sp, #24]\n\t"
+        "str %[self], [sp, #32]\n\t"
+        "ldr x0, [sp]\n\t"
+        "ldr x1, [sp, #8]\n\t"
+        "ldr x2, [sp, #16]\n\t"
+        "ldr x3, [sp, #24]\n\t"
+        "ldr x18, [sp, #32]\n\t"
+        "add sp, sp, #48\n\t"
+
+        "blr x3\n\t"              // Call the stub
+        "ldp x1, x2, [sp, #8]\n\t"     // Restore x1, x2
+        "ldp x3, x18, [sp, #24]\n\t"   // Restore x3, xSELF
+        "ldr x30, [sp, #40]\n\t"      // Restore xLR
+        "add sp, sp, #48\n\t"          // Free stack space
+        ".cfi_adjust_cfa_offset -48\n\t"
+
+        "mov %[result], x0\n\t"        // Save the result
+        : [result] "=r" (result)
+          // Use the result from r0
+        : [arg0] "0"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
+          [referrer] "r"(referrer)
+        : "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17");  // clobber.
+#elif defined(__x86_64__)
+    // Note: Uses the native convention
+    // TODO: Set the thread?
+    __asm__ __volatile__(
+        "pushq %[referrer]\n\t"        // Push referrer
+        "pushq (%%rsp)\n\t"             // & 16B alignment padding
+        ".cfi_adjust_cfa_offset 16\n\t"
+        "call *%%rax\n\t"              // Call the stub
+        "addq $16, %%rsp\n\t"          // Pop nullptr and padding
+        ".cfi_adjust_cfa_offset -16\n\t"
+        : "=a" (result)
+          // Use the result from rax
+          : "D"(arg0), "S"(arg1), "d"(arg2), "a"(code), [referrer] "m"(referrer)
+            // This places arg0 into rdi, arg1 into rsi, arg2 into rdx, and code into rax
+            : "rbx", "rcx", "rbp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15");  // clobber all
+    // TODO: Should we clobber the other registers?
+#else
+    LOG(WARNING) << "Was asked to invoke for an architecture I do not understand.";
+    result = 0;
+#endif
+    // Pop transition.
+    self->PopManagedStackFragment(fragment);
+    return result;
+  }
+
+  // Method with 32b arg0, 64b arg1
+  size_t Invoke3UWithReferrer(size_t arg0, uint64_t arg1, uintptr_t code, Thread* self,
+                              mirror::ArtMethod* referrer) {
+#if defined(__x86_64__) || defined(__aarch64__)
+    // Just pass through.
+    return Invoke3WithReferrer(arg0, arg1, 0U, code, self, referrer);
+#else
+    // Need to split up arguments.
+    uint32_t lower = static_cast<uint32_t>(arg1 & 0xFFFFFFFF);
+    uint32_t upper = static_cast<uint32_t>((arg1 >> 32) & 0xFFFFFFFF);
+
+    return Invoke3WithReferrer(arg0, lower, upper, code, self, referrer);
+#endif
+  }
+
+  // Method with 32b arg0, 32b arg1, 64b arg2
+  size_t Invoke3UUWithReferrer(uint32_t arg0, uint32_t arg1, uint64_t arg2, uintptr_t code,
+                               Thread* self, mirror::ArtMethod* referrer) {
+#if defined(__x86_64__) || defined(__aarch64__)
+    // Just pass through.
+    return Invoke3WithReferrer(arg0, arg1, arg2, code, self, referrer);
+#else
+    // TODO: Needs 4-param invoke.
+    return 0;
+#endif
+  }
 };
 
 
@@ -231,6 +400,7 @@
 #endif
 }
 
+
 class RandGen {
  public:
   explicit RandGen(uint32_t seed) : val_(seed) {}
@@ -723,11 +893,11 @@
   // Play with it...
 
   EXPECT_FALSE(self->IsExceptionPending());
-/*
- * For some reason this does not work, as the type_idx is artificial and outside what the
- * resolved types of c_obj allow...
- *
-  {
+
+  // For some reason this does not work, as the type_idx is artificial and outside what the
+  // resolved types of c_obj allow...
+
+  if (false) {
     // Use an arbitrary method from c to use as referrer
     size_t result = Invoke3(static_cast<size_t>(c->GetDexTypeIndex()),    // type_idx
                             reinterpret_cast<size_t>(c_obj->GetVirtualMethod(0)),  // arbitrary
@@ -742,7 +912,7 @@
     VerifyObject(obj);
     EXPECT_EQ(obj->GetLength(), 10);
   }
-*/
+
   {
     // We can use nullptr in the second argument as we do not need a method here (not used in
     // resolved/initialized cases)
@@ -750,7 +920,7 @@
                             reinterpret_cast<uintptr_t>(&art_quick_alloc_array_resolved_rosalloc),
                             self);
 
-    EXPECT_FALSE(self->IsExceptionPending());
+    EXPECT_FALSE(self->IsExceptionPending()) << PrettyTypeOf(self->GetException(nullptr));
     EXPECT_NE(reinterpret_cast<size_t>(nullptr), result);
     mirror::Object* obj = reinterpret_cast<mirror::Object*>(result);
     EXPECT_TRUE(obj->IsArrayInstance());
@@ -881,4 +1051,383 @@
 #endif
 }
 
+
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+extern "C" void art_quick_set32_static(void);
+extern "C" void art_quick_get32_static(void);
+#endif
+
+static void GetSet32Static(SirtRef<mirror::Object>* obj, SirtRef<mirror::ArtField>* f, Thread* self,
+                           mirror::ArtMethod* referrer, StubTest* test)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+  constexpr size_t num_values = 7;
+  uint32_t values[num_values] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF };
+
+  for (size_t i = 0; i < num_values; ++i) {
+    test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                              static_cast<size_t>(values[i]),
+                              0U,
+                              reinterpret_cast<uintptr_t>(&art_quick_set32_static),
+                              self,
+                              referrer);
+
+    size_t res = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                                           0U, 0U,
+                                           reinterpret_cast<uintptr_t>(&art_quick_get32_static),
+                                           self,
+                                           referrer);
+
+    EXPECT_EQ(res, values[i]) << "Iteration " << i;
+  }
+#else
+  LOG(INFO) << "Skipping set32static as I don't know how to do that on " << kRuntimeISA;
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping set32static as I don't know how to do that on " << kRuntimeISA << std::endl;
+#endif
+}
+
+
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+extern "C" void art_quick_set32_instance(void);
+extern "C" void art_quick_get32_instance(void);
+#endif
+
+static void GetSet32Instance(SirtRef<mirror::Object>* obj, SirtRef<mirror::ArtField>* f,
+                             Thread* self, mirror::ArtMethod* referrer, StubTest* test)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+  constexpr size_t num_values = 7;
+  uint32_t values[num_values] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF };
+
+  for (size_t i = 0; i < num_values; ++i) {
+    test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                              reinterpret_cast<size_t>(obj->get()),
+                              static_cast<size_t>(values[i]),
+                              reinterpret_cast<uintptr_t>(&art_quick_set32_instance),
+                              self,
+                              referrer);
+
+    int32_t res = f->get()->GetInt(obj->get());
+    EXPECT_EQ(res, static_cast<int32_t>(values[i])) << "Iteration " << i;
+
+    res++;
+    f->get()->SetInt<false>(obj->get(), res);
+
+    size_t res2 = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                                            reinterpret_cast<size_t>(obj->get()),
+                                            0U,
+                                            reinterpret_cast<uintptr_t>(&art_quick_get32_instance),
+                                            self,
+                                            referrer);
+    EXPECT_EQ(res, static_cast<int32_t>(res2));
+  }
+#else
+  LOG(INFO) << "Skipping set32instance as I don't know how to do that on " << kRuntimeISA;
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping set32instance as I don't know how to do that on " << kRuntimeISA << std::endl;
+#endif
+}
+
+
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+extern "C" void art_quick_set_obj_static(void);
+extern "C" void art_quick_get_obj_static(void);
+
+static void set_and_check_static(uint32_t f_idx, mirror::Object* val, Thread* self,
+                                 mirror::ArtMethod* referrer, StubTest* test)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  test->Invoke3WithReferrer(static_cast<size_t>(f_idx),
+                            reinterpret_cast<size_t>(val),
+                            0U,
+                            reinterpret_cast<uintptr_t>(&art_quick_set_obj_static),
+                            self,
+                            referrer);
+
+  size_t res = test->Invoke3WithReferrer(static_cast<size_t>(f_idx),
+                                         0U, 0U,
+                                         reinterpret_cast<uintptr_t>(&art_quick_get_obj_static),
+                                         self,
+                                         referrer);
+
+  EXPECT_EQ(res, reinterpret_cast<size_t>(val)) << "Value " << val;
+}
+#endif
+
+static void GetSetObjStatic(SirtRef<mirror::Object>* obj, SirtRef<mirror::ArtField>* f, Thread* self,
+                            mirror::ArtMethod* referrer, StubTest* test)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+  set_and_check_static((*f)->GetDexFieldIndex(), nullptr, self, referrer, test);
+
+  // Allocate a string object for simplicity.
+  mirror::String* str = mirror::String::AllocFromModifiedUtf8(self, "Test");
+  set_and_check_static((*f)->GetDexFieldIndex(), str, self, referrer, test);
+
+  set_and_check_static((*f)->GetDexFieldIndex(), nullptr, self, referrer, test);
+#else
+  LOG(INFO) << "Skipping setObjstatic as I don't know how to do that on " << kRuntimeISA;
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping setObjstatic as I don't know how to do that on " << kRuntimeISA << std::endl;
+#endif
+}
+
+
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+extern "C" void art_quick_set_obj_instance(void);
+extern "C" void art_quick_get_obj_instance(void);
+
+static void set_and_check_instance(SirtRef<mirror::ArtField>* f, mirror::Object* trg,
+                                   mirror::Object* val, Thread* self, mirror::ArtMethod* referrer,
+                                   StubTest* test)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                            reinterpret_cast<size_t>(trg),
+                            reinterpret_cast<size_t>(val),
+                            reinterpret_cast<uintptr_t>(&art_quick_set_obj_instance),
+                            self,
+                            referrer);
+
+  size_t res = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                                         reinterpret_cast<size_t>(trg),
+                                         0U,
+                                         reinterpret_cast<uintptr_t>(&art_quick_get_obj_instance),
+                                         self,
+                                         referrer);
+
+  EXPECT_EQ(res, reinterpret_cast<size_t>(val)) << "Value " << val;
+
+  EXPECT_EQ(val, f->get()->GetObj(trg));
+}
+#endif
+
+static void GetSetObjInstance(SirtRef<mirror::Object>* obj, SirtRef<mirror::ArtField>* f,
+                              Thread* self, mirror::ArtMethod* referrer, StubTest* test)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+  set_and_check_instance(f, obj->get(), nullptr, self, referrer, test);
+
+  // Allocate a string object for simplicity.
+  mirror::String* str = mirror::String::AllocFromModifiedUtf8(self, "Test");
+  set_and_check_instance(f, obj->get(), str, self, referrer, test);
+
+  set_and_check_instance(f, obj->get(), nullptr, self, referrer, test);
+#else
+  LOG(INFO) << "Skipping setObjinstance as I don't know how to do that on " << kRuntimeISA;
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping setObjinstance as I don't know how to do that on " << kRuntimeISA << std::endl;
+#endif
+}
+
+
+// TODO: Complete these tests for 32b architectures.
+
+#if defined(__x86_64__) || defined(__aarch64__)
+extern "C" void art_quick_set64_static(void);
+extern "C" void art_quick_get64_static(void);
+#endif
+
+static void GetSet64Static(SirtRef<mirror::Object>* obj, SirtRef<mirror::ArtField>* f, Thread* self,
+                           mirror::ArtMethod* referrer, StubTest* test)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+#if defined(__x86_64__) || defined(__aarch64__)
+  constexpr size_t num_values = 8;
+  uint64_t values[num_values] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF, 0xFFFFFFFFFFFF };
+
+  for (size_t i = 0; i < num_values; ++i) {
+    test->Invoke3UWithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                               values[i],
+                               reinterpret_cast<uintptr_t>(&art_quick_set64_static),
+                               self,
+                               referrer);
+
+    size_t res = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                                           0U, 0U,
+                                           reinterpret_cast<uintptr_t>(&art_quick_get64_static),
+                                           self,
+                                           referrer);
+
+    EXPECT_EQ(res, values[i]) << "Iteration " << i;
+  }
+#else
+  LOG(INFO) << "Skipping set64static as I don't know how to do that on " << kRuntimeISA;
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping set64static as I don't know how to do that on " << kRuntimeISA << std::endl;
+#endif
+}
+
+
+#if defined(__x86_64__) || defined(__aarch64__)
+extern "C" void art_quick_set64_instance(void);
+extern "C" void art_quick_get64_instance(void);
+#endif
+
+static void GetSet64Instance(SirtRef<mirror::Object>* obj, SirtRef<mirror::ArtField>* f,
+                             Thread* self, mirror::ArtMethod* referrer, StubTest* test)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+#if defined(__x86_64__) || defined(__aarch64__)
+  constexpr size_t num_values = 8;
+  uint64_t values[num_values] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF, 0xFFFFFFFFFFFF };
+
+  for (size_t i = 0; i < num_values; ++i) {
+    test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                              reinterpret_cast<size_t>(obj->get()),
+                              static_cast<size_t>(values[i]),
+                              reinterpret_cast<uintptr_t>(&art_quick_set64_instance),
+                              self,
+                              referrer);
+
+    int64_t res = f->get()->GetLong(obj->get());
+    EXPECT_EQ(res, static_cast<int64_t>(values[i])) << "Iteration " << i;
+
+    res++;
+    f->get()->SetLong<false>(obj->get(), res);
+
+    size_t res2 = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                                            reinterpret_cast<size_t>(obj->get()),
+                                            0U,
+                                            reinterpret_cast<uintptr_t>(&art_quick_get64_instance),
+                                            self,
+                                            referrer);
+    EXPECT_EQ(res, static_cast<int64_t>(res2));
+  }
+#else
+  LOG(INFO) << "Skipping set64instance as I don't know how to do that on " << kRuntimeISA;
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping set64instance as I don't know how to do that on " << kRuntimeISA << std::endl;
+#endif
+}
+
+static void TestFields(Thread* self, StubTest* test, Primitive::Type test_type) {
+  // garbage is created during ClassLinker::Init
+
+  JNIEnv* env = Thread::Current()->GetJniEnv();
+  jclass jc = env->FindClass("AllFields");
+  CHECK(jc != NULL);
+  jobject o = env->AllocObject(jc);
+  CHECK(o != NULL);
+
+  ScopedObjectAccess soa(self);
+  SirtRef<mirror::Object> obj(self, soa.Decode<mirror::Object*>(o));
+
+  SirtRef<mirror::Class> c(self, obj->GetClass());
+
+  // Need a method as a referrer
+  SirtRef<mirror::ArtMethod> m(self, c->GetDirectMethod(0));
+
+  // Play with it...
+
+  // Static fields.
+  {
+    SirtRef<mirror::ObjectArray<mirror::ArtField>> fields(self, c.get()->GetSFields());
+    int32_t num_fields = fields->GetLength();
+    for (int32_t i = 0; i < num_fields; ++i) {
+      SirtRef<mirror::ArtField> f(self, fields->Get(i));
+
+      FieldHelper fh(f.get());
+      Primitive::Type type = fh.GetTypeAsPrimitiveType();
+      switch (type) {
+        case Primitive::Type::kPrimInt:
+          if (test_type == type) {
+            GetSet32Static(&obj, &f, self, m.get(), test);
+          }
+          break;
+
+        case Primitive::Type::kPrimLong:
+          if (test_type == type) {
+            GetSet64Static(&obj, &f, self, m.get(), test);
+          }
+          break;
+
+        case Primitive::Type::kPrimNot:
+          // Don't try array.
+          if (test_type == type && fh.GetTypeDescriptor()[0] != '[') {
+            GetSetObjStatic(&obj, &f, self, m.get(), test);
+          }
+          break;
+
+        default:
+          break;  // Skip.
+      }
+    }
+  }
+
+  // Instance fields.
+  {
+    SirtRef<mirror::ObjectArray<mirror::ArtField>> fields(self, c.get()->GetIFields());
+    int32_t num_fields = fields->GetLength();
+    for (int32_t i = 0; i < num_fields; ++i) {
+      SirtRef<mirror::ArtField> f(self, fields->Get(i));
+
+      FieldHelper fh(f.get());
+      Primitive::Type type = fh.GetTypeAsPrimitiveType();
+      switch (type) {
+        case Primitive::Type::kPrimInt:
+          if (test_type == type) {
+            GetSet32Instance(&obj, &f, self, m.get(), test);
+          }
+          break;
+
+        case Primitive::Type::kPrimLong:
+          if (test_type == type) {
+            GetSet64Instance(&obj, &f, self, m.get(), test);
+          }
+          break;
+
+        case Primitive::Type::kPrimNot:
+          // Don't try array.
+          if (test_type == type && fh.GetTypeDescriptor()[0] != '[') {
+            GetSetObjInstance(&obj, &f, self, m.get(), test);
+          }
+          break;
+
+        default:
+          break;  // Skip.
+      }
+    }
+  }
+
+  // TODO: Deallocate things.
+}
+
+
+TEST_F(StubTest, Fields32) {
+  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
+
+  Thread* self = Thread::Current();
+
+  self->TransitionFromSuspendedToRunnable();
+  LoadDex("AllFields");
+  bool started = runtime_->Start();
+  CHECK(started);
+
+  TestFields(self, this, Primitive::Type::kPrimInt);
+}
+
+TEST_F(StubTest, FieldsObj) {
+  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
+
+  Thread* self = Thread::Current();
+
+  self->TransitionFromSuspendedToRunnable();
+  LoadDex("AllFields");
+  bool started = runtime_->Start();
+  CHECK(started);
+
+  TestFields(self, this, Primitive::Type::kPrimNot);
+}
+
+TEST_F(StubTest, Fields64) {
+  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
+
+  Thread* self = Thread::Current();
+
+  self->TransitionFromSuspendedToRunnable();
+  LoadDex("AllFields");
+  bool started = runtime_->Start();
+  CHECK(started);
+
+  TestFields(self, this, Primitive::Type::kPrimLong);
+}
+
 }  // namespace art
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 7b56718..a55dbb6 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -871,82 +871,63 @@
 UNIMPLEMENTED art_quick_lshr
 UNIMPLEMENTED art_quick_lushr
 
-DEFINE_FUNCTION art_quick_set32_instance
+
+MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
+    DEFINE_FUNCTION VAR(c_name, 0)
+    movq 8(%rsp), %rsi                 // pass referrer
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
+                                       // arg0 is in rdi
+    movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
+    movq %rsp, %rcx                    // pass SP
+    call PLT_VAR(cxx_name, 1)          // cxx_name(arg0, referrer, Thread*, SP)
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    CALL_MACRO(return_macro, 2)
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
+
+MACRO3(TWO_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
+    DEFINE_FUNCTION VAR(c_name, 0)
+    movq 8(%rsp), %rdx                 // pass referrer
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
+                                       // arg0 and arg1 are in rdi/rsi
+    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
+    movq %rsp, %r8                     // pass SP
+    call PLT_VAR(cxx_name, 1)          // (arg0, arg1, referrer, Thread*, SP)
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    CALL_MACRO(return_macro, 2)
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
+
+MACRO3(THREE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
+    DEFINE_FUNCTION VAR(c_name, 0)
     movq 8(%rsp), %rcx                 // pass referrer
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx, Object* and new_val are in rdi/rsi/rdx
-    movq %gs:THREAD_SELF_OFFSET, %r8   // pass Thread::Current()
+                                       // arg0, arg1, and arg2 are in rdi/rsi/rdx
+    movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
     movq %rsp, %r9                     // pass SP
-    call PLT_SYMBOL(artSet32InstanceFromCode)  // (field_idx, Object*, new_val, referrer, Thread*, SP)
+    call PLT_VAR(cxx_name, 1)          // cxx_name(arg0, arg1, arg2, referrer, Thread*, SP)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_IF_EAX_ZERO                 // return or deliver exception
-END_FUNCTION art_quick_set32_instance
+    CALL_MACRO(return_macro, 2)        // return or deliver exception
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
 
-DEFINE_FUNCTION art_quick_set64_instance
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx, Object* and new_val are in rdi/rsi/rdx
-    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
-    movq %rsp, %r8                     // pass SP
-    call PLT_SYMBOL(artSet64InstanceFromCode)  // (field_idx, Object*, new_val, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_IF_EAX_ZERO                 // return or deliver exception
-END_FUNCTION art_quick_set64_instance
 
-DEFINE_FUNCTION art_quick_set_obj_instance
-    movq 8(%rsp), %rcx                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx, Object* and new_val are in rdi/rsi/rdx
-    movq %gs:THREAD_SELF_OFFSET, %r8   // pass Thread::Current()
-    movq %rsp, %r9                     // pass SP
-    call PLT_SYMBOL(artSetObjInstanceFromCode)  // (field_idx, Object*, new_val, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_IF_EAX_ZERO                 // return or deliver exception
-END_FUNCTION art_quick_set_obj_instance
+THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCode, RETURN_IF_EAX_ZERO
+THREE_ARG_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCode, RETURN_IF_EAX_ZERO
+THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCode, RETURN_IF_EAX_ZERO
 
-DEFINE_FUNCTION art_quick_get32_instance
-    movq 8(%rsp), %rdx                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx and Object* are in rdi/rsi
-    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
-    movq %rsp, %r8                     // pass SP
-    call PLT_SYMBOL(artGet32InstanceFromCode)  // (field_idx, Object*, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_OR_DELIVER_PENDING_EXCEPTION  // return or deliver exception
-END_FUNCTION art_quick_get32_instance
+TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
+TWO_ARG_REF_DOWNCALL art_quick_get64_instance, artGet64InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
+TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
 
-DEFINE_FUNCTION art_quick_get64_instance
-    movq 8(%rsp), %rdx                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx and Object* are in rdi/rsi
-    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
-    movq %rsp, %r8                     // pass SP
-    call PLT_SYMBOL(artGet64InstanceFromCode)  // (field_idx, Object*, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_OR_DELIVER_PENDING_EXCEPTION  // return or deliver exception
-END_FUNCTION art_quick_get64_instance
+TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCode, RETURN_IF_EAX_ZERO
+TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCode, RETURN_IF_EAX_ZERO
 
-DEFINE_FUNCTION art_quick_get_obj_instance
-    movq 8(%rsp), %rdx                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx and Object* are in rdi/rsi
-    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
-    movq %rsp, %r8                     // pass SP
-    call PLT_SYMBOL(artGetObjInstanceFromCode)  // (field_idx, Object*, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_OR_DELIVER_PENDING_EXCEPTION  // return or deliver exception
-END_FUNCTION art_quick_get_obj_instance
+ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
+ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
+ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
 
-DEFINE_FUNCTION art_quick_set32_static
-    movq 8(%rsp), %rdx                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx and new_val are in rdi/rsi
-    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
-    movq %rsp, %r8                     // pass SP
-    call PLT_SYMBOL(artSet32StaticFromCode)  // (field_idx, new_val, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_IF_EAX_ZERO                 // return or deliver exception
-END_FUNCTION art_quick_set32_static
-
+// This is singled out as the argument order is different.
 DEFINE_FUNCTION art_quick_set64_static
     movq %rsi, %rdx                    // pass new_val
     movq 8(%rsp), %rsi                 // pass referrer
@@ -959,49 +940,6 @@
     RETURN_IF_EAX_ZERO                 // return or deliver exception
 END_FUNCTION art_quick_set64_static
 
-DEFINE_FUNCTION art_quick_set_obj_static
-    movq 8(%rsp), %rdx                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx and new_val are in rdi/rsi
-    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
-    movq %rsp, %r8                     // pass SP
-    call PLT_SYMBOL(artSetObjStaticFromCode)  // (field_idx, new_val, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_OR_DELIVER_PENDING_EXCEPTION
-END_FUNCTION art_quick_set_obj_static
-
-DEFINE_FUNCTION art_quick_get32_static
-    movq 8(%rsp), %rsi                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx is in rdi
-    movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
-    movq %rsp, %rcx                    // pass SP
-    call PLT_SYMBOL(artGet32StaticFromCode)  // (field_idx, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_OR_DELIVER_PENDING_EXCEPTION
-END_FUNCTION art_quick_get32_static
-
-DEFINE_FUNCTION art_quick_get64_static
-    movq 8(%rsp), %rsi                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx is in rdi
-    movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
-    movq %rsp, %rcx                    // pass SP
-    call PLT_SYMBOL(artGet64StaticFromCode)  // (field_idx, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_OR_DELIVER_PENDING_EXCEPTION
-END_FUNCTION art_quick_get64_static
-
-DEFINE_FUNCTION art_quick_get_obj_static
-    movq 8(%rsp), %rsi                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx is in rdi
-    movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
-    movq %rsp, %rcx                    // pass SP
-    call PLT_SYMBOL(artGetObjStaticFromCode)  // (field_idx, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_OR_DELIVER_PENDING_EXCEPTION
-END_FUNCTION art_quick_get_obj_static
 
 DEFINE_FUNCTION art_quick_proxy_invoke_handler
     // Save callee and GPR args, mixed together to agree with core spills bitmap of ref. and args
diff --git a/runtime/base/bit_vector.cc b/runtime/base/bit_vector.cc
index 3df5101..0e01dc2 100644
--- a/runtime/base/bit_vector.cc
+++ b/runtime/base/bit_vector.cc
@@ -43,7 +43,8 @@
   : allocator_(allocator),
     expandable_(expandable),
     storage_size_(storage_size),
-    storage_(storage) {
+    storage_(storage),
+    number_of_bits_(start_bits) {
   DCHECK_EQ(sizeof(*storage_), 4U);  // Assuming 32-bit units.
   if (storage_ == nullptr) {
     storage_size_ = BitsToWords(start_bits);
@@ -93,6 +94,7 @@
     // TOTO: collect stats on space wasted because of resize.
     storage_ = new_storage;
     storage_size_ = new_size;
+    number_of_bits_ = num;
   }
 
   storage_[num >> 5] |= check_masks[num & 0x1f];
@@ -113,23 +115,24 @@
 
   // If the highest bit set is different, we are different.
   if (our_highest != src_highest) {
-    return true;
+    return false;
   }
 
   // If the highest bit set is -1, both are cleared, we are the same.
   // If the highest bit set is 0, both have a unique bit set, we are the same.
-  if (our_highest >= 0) {
+  if (our_highest <= 0) {
     return true;
   }
 
-  // Get the highest bit set's cell's index.
-  int our_highest_index = (our_highest >> 5);
+  // Get the highest bit set's cell's index
+  // No need of highest + 1 here because it can't be 0 so BitsToWords will work here.
+  int our_highest_index = BitsToWords(our_highest);
 
   // This memcmp is enough: we know that the highest bit set is the same for both:
   //   - Therefore, min_size goes up to at least that, we are thus comparing at least what we need to, but not less.
   //      ie. we are comparing all storage cells that could have difference, if both vectors have cells above our_highest_index,
   //          they are automatically at 0.
-  return (memcmp(storage_, src->GetRawStorage(), our_highest_index * sizeof(*storage_)) != 0);
+  return (memcmp(storage_, src->GetRawStorage(), our_highest_index * sizeof(*storage_)) == 0);
 }
 
 // Intersect with another bit vector.
@@ -156,13 +159,14 @@
 /*
  * Union with another bit vector.
  */
-void BitVector::Union(const BitVector* src) {
+bool BitVector::Union(const BitVector* src) {
   // Get the highest bit to determine how much we need to expand.
   int highest_bit = src->GetHighestBitSet();
+  bool changed = false;
 
   // If src has no bit set, we are done: there is no need for a union with src.
   if (highest_bit == -1) {
-    return;
+    return changed;
   }
 
   // Update src_size to how many cells we actually care about: where the bit is + 1.
@@ -170,6 +174,8 @@
 
   // Is the storage size smaller than src's?
   if (storage_size_ < src_size) {
+    changed = true;
+
     // Set it to reallocate.
     SetBit(highest_bit);
 
@@ -178,8 +184,62 @@
   }
 
   for (uint32_t idx = 0; idx < src_size; idx++) {
-    storage_[idx] |= src->GetRawStorageWord(idx);
+    uint32_t existing = storage_[idx];
+    uint32_t update = existing | src->GetRawStorageWord(idx);
+    if (existing != update) {
+      changed = true;
+      storage_[idx] = update;
+    }
   }
+  return changed;
+}
+
+bool BitVector::UnionIfNotIn(const BitVector* union_with, const BitVector* not_in) {
+  // Get the highest bit to determine how much we need to expand.
+  int highest_bit = union_with->GetHighestBitSet();
+  bool changed = false;
+
+  // If src has no bit set, we are done: there is no need for a union with src.
+  if (highest_bit == -1) {
+    return changed;
+  }
+
+  // Update union_with_size to how many cells we actually care about: where the bit is + 1.
+  uint32_t union_with_size = BitsToWords(highest_bit + 1);
+
+  // Is the storage size smaller than src's?
+  if (storage_size_ < union_with_size) {
+    changed = true;
+
+    // Set it to reallocate.
+    SetBit(highest_bit);
+
+    // Paranoid: storage size should be big enough to hold this bit now.
+    DCHECK_LT(static_cast<uint32_t> (highest_bit), storage_size_ * sizeof(*(storage_)) * 8);
+  }
+
+  uint32_t not_in_size = not_in->GetStorageSize();
+
+  uint32_t idx = 0;
+  for (; idx < std::min(not_in_size, union_with_size); idx++) {
+    uint32_t existing = storage_[idx];
+    uint32_t update = existing |
+        (union_with->GetRawStorageWord(idx) & ~not_in->GetRawStorageWord(idx));
+    if (existing != update) {
+      changed = true;
+      storage_[idx] = update;
+    }
+  }
+
+  for (; idx < union_with_size; idx++) {
+    uint32_t existing = storage_[idx];
+    uint32_t update = existing | union_with->GetRawStorageWord(idx);
+    if (existing != update) {
+      changed = true;
+      storage_[idx] = update;
+    }
+  }
+  return changed;
 }
 
 void BitVector::Subtract(const BitVector *src) {
@@ -342,7 +402,7 @@
 void BitVector::Dump(std::ostream& os, const char *prefix) {
   std::ostringstream buffer;
   DumpHelper(buffer, prefix);
-  os << buffer << std::endl;
+  os << buffer.str() << std::endl;
 }
 
 void BitVector::DumpDot(FILE* file, const char* prefix, bool last_entry) {
@@ -367,13 +427,11 @@
     buffer << prefix;
   }
 
-  int max = GetHighestBitSet();
-
-  for (int i = 0; i <= max; i++) {
-    if (IsBitSet(i)) {
-      buffer << i << " ";
-    }
+  buffer << '(';
+  for (size_t i = 0; i < number_of_bits_; i++) {
+    buffer << IsBitSet(i);
   }
+  buffer << ')';
 }
 
 }  // namespace art
diff --git a/runtime/base/bit_vector.h b/runtime/base/bit_vector.h
index db29c49..6ee6b00 100644
--- a/runtime/base/bit_vector.h
+++ b/runtime/base/bit_vector.h
@@ -103,7 +103,11 @@
 
     void Copy(const BitVector* src);
     void Intersect(const BitVector* src2);
-    void Union(const BitVector* src);
+    bool Union(const BitVector* src);
+
+    // Set bits of union_with that are not in not_in.
+    bool UnionIfNotIn(const BitVector* union_with, const BitVector* not_in);
+
     void Subtract(const BitVector* src);
     // Are we equal to another bit vector?  Note: expandability attributes must also match.
     bool Equal(const BitVector* src) {
@@ -155,6 +159,7 @@
     const bool expandable_;         // expand bitmap if we run out?
     uint32_t   storage_size_;       // current size, in 32-bit words.
     uint32_t*  storage_;
+    uint32_t number_of_bits_;
 };
 
 
diff --git a/runtime/base/logging.h b/runtime/base/logging.h
index bd5ae85..c4461fa 100644
--- a/runtime/base/logging.h
+++ b/runtime/base/logging.h
@@ -296,6 +296,7 @@
   bool startup;
   bool third_party_jni;  // Enabled with "-verbose:third-party-jni".
   bool threads;
+  bool signals;
 };
 
 extern LogVerbosity gLogVerbosity;
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index dbea0d8..e3c162b 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -3309,33 +3309,36 @@
   if (klass->IsInterface()) {
     return true;
   }
-  Thread* self = Thread::Current();
-  // begin with the methods local to the superclass
+  // Begin with the methods local to the superclass.
+  MethodHelper mh;
+  MethodHelper super_mh;
   if (klass->HasSuperClass() &&
       klass->GetClassLoader() != klass->GetSuperClass()->GetClassLoader()) {
-    SirtRef<mirror::Class> super(self, klass->GetSuperClass());
-    for (int i = super->GetVTable()->GetLength() - 1; i >= 0; --i) {
-      mirror::ArtMethod* method = klass->GetVTable()->Get(i);
-      if (method != super->GetVTable()->Get(i) &&
-          !IsSameMethodSignatureInDifferentClassContexts(self, method, super.get(), klass.get())) {
+    for (int i = klass->GetSuperClass()->GetVTable()->GetLength() - 1; i >= 0; --i) {
+      mh.ChangeMethod(klass->GetVTable()->GetWithoutChecks(i));
+      super_mh.ChangeMethod(klass->GetSuperClass()->GetVTable()->GetWithoutChecks(i));
+      bool is_override = mh.GetMethod() != super_mh.GetMethod();
+      if (is_override && !mh.HasSameSignatureWithDifferentClassLoaders(&super_mh)) {
         ThrowLinkageError(klass.get(), "Class %s method %s resolves differently in superclass %s",
-                          PrettyDescriptor(klass.get()).c_str(), PrettyMethod(method).c_str(),
-                          PrettyDescriptor(super.get()).c_str());
+                          PrettyDescriptor(klass.get()).c_str(),
+                          PrettyMethod(mh.GetMethod()).c_str(),
+                          PrettyDescriptor(klass->GetSuperClass()).c_str());
         return false;
       }
     }
   }
   for (int32_t i = 0; i < klass->GetIfTableCount(); ++i) {
-    SirtRef<mirror::Class> interface(self, klass->GetIfTable()->GetInterface(i));
-    if (klass->GetClassLoader() != interface->GetClassLoader()) {
-      for (size_t j = 0; j < interface->NumVirtualMethods(); ++j) {
-        mirror::ArtMethod* method = klass->GetIfTable()->GetMethodArray(i)->Get(j);
-        if (!IsSameMethodSignatureInDifferentClassContexts(self, method, interface.get(),
-                                                           method->GetDeclaringClass())) {
+    if (klass->GetClassLoader() != klass->GetIfTable()->GetInterface(i)->GetClassLoader()) {
+      uint32_t num_methods = klass->GetIfTable()->GetInterface(i)->NumVirtualMethods();
+      for (uint32_t j = 0; j < num_methods; ++j) {
+        mh.ChangeMethod(klass->GetIfTable()->GetMethodArray(i)->GetWithoutChecks(j));
+        super_mh.ChangeMethod(klass->GetIfTable()->GetInterface(i)->GetVirtualMethod(j));
+        bool is_override = mh.GetMethod() != super_mh.GetMethod();
+        if (is_override && !mh.HasSameSignatureWithDifferentClassLoaders(&super_mh)) {
           ThrowLinkageError(klass.get(), "Class %s method %s resolves differently in interface %s",
-                            PrettyDescriptor(method->GetDeclaringClass()).c_str(),
-                            PrettyMethod(method).c_str(),
-                            PrettyDescriptor(interface.get()).c_str());
+                            PrettyDescriptor(klass.get()).c_str(),
+                            PrettyMethod(mh.GetMethod()).c_str(),
+                            PrettyDescriptor(klass->GetIfTable()->GetInterface(i)).c_str());
           return false;
         }
       }
@@ -3344,60 +3347,6 @@
   return true;
 }
 
-// Returns true if classes referenced by the signature of the method are the
-// same classes in klass1 as they are in klass2.
-bool ClassLinker::IsSameMethodSignatureInDifferentClassContexts(Thread* self,
-                                                                mirror::ArtMethod* method,
-                                                                mirror::Class* klass1,
-                                                                mirror::Class* klass2) {
-  if (klass1 == klass2) {
-    return true;
-  }
-  CHECK(klass1 != nullptr);
-  CHECK(klass2 != nullptr);
-  SirtRef<mirror::ClassLoader> loader1(self, klass1->GetClassLoader());
-  SirtRef<mirror::ClassLoader> loader2(self, klass2->GetClassLoader());
-  const DexFile& dex_file = *method->GetDeclaringClass()->GetDexCache()->GetDexFile();
-  const DexFile::ProtoId& proto_id =
-      dex_file.GetMethodPrototype(dex_file.GetMethodId(method->GetDexMethodIndex()));
-  for (DexFileParameterIterator it(dex_file, proto_id); it.HasNext(); it.Next()) {
-    const char* descriptor = it.GetDescriptor();
-    if (descriptor == nullptr) {
-      break;
-    }
-    if (descriptor[0] == 'L' || descriptor[0] == '[') {
-      // Found a non-primitive type.
-      if (!IsSameDescriptorInDifferentClassContexts(self, descriptor, loader1, loader2)) {
-        return false;
-      }
-    }
-  }
-  // Check the return type
-  const char* descriptor = dex_file.GetReturnTypeDescriptor(proto_id);
-  if (descriptor[0] == 'L' || descriptor[0] == '[') {
-    if (!IsSameDescriptorInDifferentClassContexts(self, descriptor, loader1, loader2)) {
-      return false;
-    }
-  }
-  return true;
-}
-
-// Returns true if the descriptor resolves to the same class in the context of loader1 and loader2.
-bool ClassLinker::IsSameDescriptorInDifferentClassContexts(Thread* self, const char* descriptor,
-                                                           SirtRef<mirror::ClassLoader>& loader1,
-                                                           SirtRef<mirror::ClassLoader>& loader2) {
-  CHECK(descriptor != nullptr);
-  SirtRef<mirror::Class> found1(self, FindClass(self, descriptor, loader1));
-  if (found1.get() == nullptr) {
-    self->ClearException();
-  }
-  mirror::Class* found2 = FindClass(self, descriptor, loader2);
-  if (found2 == nullptr) {
-    self->ClearException();
-  }
-  return found1.get() == found2;
-}
-
 bool ClassLinker::EnsureInitialized(const SirtRef<mirror::Class>& c, bool can_init_fields,
                                     bool can_init_parents) {
   DCHECK(c.get() != NULL);
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index b8093bc..283faa2 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -35,6 +35,13 @@
 // Static fault manger object accessed by signal handler.
 FaultManager fault_manager;
 
+extern "C" {
+void art_sigsegv_fault() {
+  // Set a breakpoint here to be informed when a SIGSEGV is unhandled by ART.
+  LOG(ERROR)<< "Caught unknown SIGSEGV in ART fault handler";
+}
+}
+
 // Signal handler called on SIGSEGV.
 static void art_fault_handler(int sig, siginfo_t* info, void* context) {
   fault_manager.HandleFault(sig, info, context);
@@ -75,7 +82,10 @@
       return;
     }
   }
-  LOG(ERROR)<< "Caught unknown SIGSEGV in ART fault handler";
+
+  // Allow the user to catch this problem with a simple breakpoint in art_sigsegv_fault.
+  art_sigsegv_fault();
+
   oldaction_.sa_sigaction(sig, info, context);
 }
 
@@ -106,23 +116,23 @@
 bool FaultManager::IsInGeneratedCode(void* context, bool check_dex_pc) {
   // We can only be running Java code in the current thread if it
   // is in Runnable state.
-  LOG(DEBUG) << "Checking for generated code";
+  VLOG(signals) << "Checking for generated code";
   Thread* thread = Thread::Current();
   if (thread == nullptr) {
-    LOG(DEBUG) << "no current thread";
+    VLOG(signals) << "no current thread";
     return false;
   }
 
   ThreadState state = thread->GetState();
   if (state != kRunnable) {
-    LOG(DEBUG) << "not runnable";
+    VLOG(signals) << "not runnable";
     return false;
   }
 
   // Current thread is runnable.
   // Make sure it has the mutator lock.
   if (!Locks::mutator_lock_->IsSharedHeld(thread)) {
-    LOG(DEBUG) << "no lock";
+    VLOG(signals) << "no lock";
     return false;
   }
 
@@ -135,9 +145,9 @@
   GetMethodAndReturnPCAndSP(context, &method_obj, &return_pc, &sp);
 
   // If we don't have a potential method, we're outta here.
-  LOG(DEBUG) << "potential method: " << method_obj;
+  VLOG(signals) << "potential method: " << method_obj;
   if (method_obj == 0 || !IsAligned<kObjectAlignment>(method_obj)) {
-    LOG(DEBUG) << "no method";
+    VLOG(signals) << "no method";
     return false;
   }
 
@@ -147,36 +157,36 @@
   // TODO: Method might be not a heap address, and GetClass could fault.
   mirror::Class* cls = method_obj->GetClass<kVerifyNone>();
   if (cls == nullptr) {
-    LOG(DEBUG) << "not a class";
+    VLOG(signals) << "not a class";
     return false;
   }
   if (!IsAligned<kObjectAlignment>(cls)) {
-    LOG(DEBUG) << "not aligned";
+    VLOG(signals) << "not aligned";
     return false;
   }
 
 
   if (!VerifyClassClass(cls)) {
-    LOG(DEBUG) << "not a class class";
+    VLOG(signals) << "not a class class";
     return false;
   }
 
   // Now make sure the class is a mirror::ArtMethod.
   if (!cls->IsArtMethodClass()) {
-    LOG(DEBUG) << "not a method";
+    VLOG(signals) << "not a method";
     return false;
   }
 
   // We can be certain that this is a method now.  Check if we have a GC map
   // at the return PC address.
   if (true || kIsDebugBuild) {
-    LOG(DEBUG) << "looking for dex pc for return pc " << std::hex << return_pc;
+    VLOG(signals) << "looking for dex pc for return pc " << std::hex << return_pc;
     const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(method_obj);
     uint32_t sought_offset = return_pc - reinterpret_cast<uintptr_t>(code);
-    LOG(DEBUG) << "pc offset: " << std::hex << sought_offset;
+    VLOG(signals) << "pc offset: " << std::hex << sought_offset;
   }
   uint32_t dexpc = method_obj->ToDexPc(return_pc, false);
-  LOG(DEBUG) << "dexpc: " << dexpc;
+  VLOG(signals) << "dexpc: " << dexpc;
   return !check_dex_pc || dexpc != DexFile::kDexNoIndex;
 }
 
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index f5d6299..a0659e7 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -64,8 +64,8 @@
 
 static constexpr bool kProtectFromSpace = true;
 static constexpr bool kStoreStackTraces = false;
-static constexpr bool kUseBytesPromoted = true;
 static constexpr size_t kBytesPromotedThreshold = 4 * MB;
+static constexpr size_t kLargeObjectBytesAllocatedThreshold = 16 * MB;
 
 void SemiSpace::BindBitmaps() {
   timings_.StartSplit("BindBitmaps");
@@ -104,8 +104,8 @@
       last_gc_to_space_end_(nullptr),
       bytes_promoted_(0),
       bytes_promoted_since_last_whole_heap_collection_(0),
+      large_object_bytes_allocated_at_last_whole_heap_collection_(0),
       whole_heap_collection_(true),
-      whole_heap_collection_interval_counter_(0),
       collector_name_(name_),
       swap_semi_spaces_(true) {
 }
@@ -187,12 +187,8 @@
     if (gc_cause_ == kGcCauseExplicit || gc_cause_ == kGcCauseForNativeAlloc ||
         clear_soft_references_) {
       // If an explicit, native allocation-triggered, or last attempt
-      // collection, collect the whole heap (and reset the interval
-      // counter to be consistent.)
+      // collection, collect the whole heap.
       whole_heap_collection_ = true;
-      if (!kUseBytesPromoted) {
-        whole_heap_collection_interval_counter_ = 0;
-      }
     }
     if (whole_heap_collection_) {
       VLOG(heap) << "Whole heap collection";
@@ -798,32 +794,27 @@
     // only space collection at the next collection by updating
     // whole_heap_collection.
     if (!whole_heap_collection_) {
-      if (!kUseBytesPromoted) {
-        // Enable whole_heap_collection once every
-        // kDefaultWholeHeapCollectionInterval collections.
-        --whole_heap_collection_interval_counter_;
-        DCHECK_GE(whole_heap_collection_interval_counter_, 0);
-        if (whole_heap_collection_interval_counter_ == 0) {
-          whole_heap_collection_ = true;
-        }
-      } else {
-        // Enable whole_heap_collection if the bytes promoted since
-        // the last whole heap collection exceeds a threshold.
-        bytes_promoted_since_last_whole_heap_collection_ += bytes_promoted_;
-        if (bytes_promoted_since_last_whole_heap_collection_ >= kBytesPromotedThreshold) {
-          whole_heap_collection_ = true;
-        }
+      // Enable whole_heap_collection if the bytes promoted since the
+      // last whole heap collection or the large object bytes
+      // allocated exceeds a threshold.
+      bytes_promoted_since_last_whole_heap_collection_ += bytes_promoted_;
+      bool bytes_promoted_threshold_exceeded =
+          bytes_promoted_since_last_whole_heap_collection_ >= kBytesPromotedThreshold;
+      uint64_t current_los_bytes_allocated = GetHeap()->GetLargeObjectsSpace()->GetBytesAllocated();
+      uint64_t last_los_bytes_allocated =
+          large_object_bytes_allocated_at_last_whole_heap_collection_;
+      bool large_object_bytes_threshold_exceeded =
+          current_los_bytes_allocated >=
+          last_los_bytes_allocated + kLargeObjectBytesAllocatedThreshold;
+      if (bytes_promoted_threshold_exceeded || large_object_bytes_threshold_exceeded) {
+        whole_heap_collection_ = true;
       }
     } else {
-      if (!kUseBytesPromoted) {
-        DCHECK_EQ(whole_heap_collection_interval_counter_, 0);
-        whole_heap_collection_interval_counter_ = kDefaultWholeHeapCollectionInterval;
-        whole_heap_collection_ = false;
-      } else {
-        // Reset it.
-        bytes_promoted_since_last_whole_heap_collection_ = bytes_promoted_;
-        whole_heap_collection_ = false;
-      }
+      // Reset the counters.
+      bytes_promoted_since_last_whole_heap_collection_ = bytes_promoted_;
+      large_object_bytes_allocated_at_last_whole_heap_collection_ =
+          GetHeap()->GetLargeObjectsSpace()->GetBytesAllocated();
+      whole_heap_collection_ = false;
     }
   }
   // Clear all of the spaces' mark bitmaps.
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index 3b3e1b1..9fdf471 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -234,14 +234,14 @@
   // the non-moving space, since the last whole heap collection.
   uint64_t bytes_promoted_since_last_whole_heap_collection_;
 
+  // Used for the generational mode. Keeps track of how many bytes of
+  // large objects were allocated at the last whole heap collection.
+  uint64_t large_object_bytes_allocated_at_last_whole_heap_collection_;
+
   // Used for the generational mode. When true, collect the whole
   // heap. When false, collect only the bump pointer spaces.
   bool whole_heap_collection_;
 
-  // Used for the generational mode. A counter used to enable
-  // whole_heap_collection_ once per interval.
-  int whole_heap_collection_interval_counter_;
-
   // How many objects and bytes we moved, used so that we don't need to get the size of the
   // to_space_ when calculating how many objects and bytes we freed.
   size_t bytes_moved_;
diff --git a/runtime/globals.h b/runtime/globals.h
index eb52a46..07fadb9 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -55,9 +55,8 @@
 // but ARM ELF requires 8..
 static constexpr size_t kArmAlignment = 8;
 
-// ARM64 instruction alignment. AArch64 require code to be 4-byte aligned.
-// AArch64 ELF requires at least 4.
-static constexpr size_t kArm64Alignment = 4;
+// ARM64 instruction alignment. This is the recommended alignment for maximum performance.
+static constexpr size_t kArm64Alignment = 16;
 
 // MIPS instruction alignment.  MIPS processors require code to be 4-byte aligned.
 // TODO: Can this be 4?
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 7232e54..a87f95c 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -28,7 +28,17 @@
                                 Object* receiver, uint32_t* args, JValue* result)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   std::string name(PrettyMethod(method));
-  if (name == "java.lang.ClassLoader dalvik.system.VMStack.getCallingClassLoader()") {
+  if (name == "java.lang.Object dalvik.system.VMRuntime.newUnpaddedArray(java.lang.Class, int)") {
+    int32_t length = args[1];
+    DCHECK_GE(length, 0);
+    mirror::Class* element_class = reinterpret_cast<Object*>(args[0])->AsClass();
+    Runtime* runtime = Runtime::Current();
+    mirror::Class* array_class = runtime->GetClassLinker()->FindArrayClass(self, element_class);
+    DCHECK(array_class != nullptr);
+    gc::AllocatorType allocator = runtime->GetHeap()->GetCurrentAllocator();
+    result->SetL(mirror::Array::Alloc<true>(self, array_class, length,
+                                            array_class->GetComponentSize(), allocator, true));
+  } else if (name == "java.lang.ClassLoader dalvik.system.VMStack.getCallingClassLoader()") {
     result->SetL(NULL);
   } else if (name == "java.lang.Class dalvik.system.VMStack.getStackClass2()") {
     NthCallerVisitor visitor(self, 3);
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 74b7c42..e425e91 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -519,7 +519,7 @@
       // Don't allow finalizable objects to be allocated during a transaction since these can't be
       // finalized without a started runtime.
       if (transaction_active && obj->GetClass()->IsFinalizable()) {
-        AbortTransaction(self, "Allocating finalizable object in transcation: %s",
+        AbortTransaction(self, "Allocating finalizable object in transaction: %s",
                          PrettyTypeOf(obj).c_str());
         HANDLE_PENDING_EXCEPTION();
       }
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 0da1445..9c13973 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -432,7 +432,7 @@
           // Don't allow finalizable objects to be allocated during a transaction since these can't
           // be finalized without a started runtime.
           if (transaction_active && obj->GetClass()->IsFinalizable()) {
-            AbortTransaction(self, "Allocating finalizable object in transcation: %s",
+            AbortTransaction(self, "Allocating finalizable object in transaction: %s",
                              PrettyTypeOf(obj).c_str());
             HANDLE_PENDING_EXCEPTION();
             break;
diff --git a/runtime/object_utils.h b/runtime/object_utils.h
index 072f074..504537a 100644
--- a/runtime/object_utils.h
+++ b/runtime/object_utils.h
@@ -520,8 +520,7 @@
     return GetParamPrimitiveType(param) == Primitive::kPrimNot;
   }
 
-  bool HasSameNameAndSignature(MethodHelper* other)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  bool HasSameNameAndSignature(MethodHelper* other) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     const DexFile& dex_file = GetDexFile();
     const DexFile::MethodId& mid = dex_file.GetMethodId(method_->GetDexMethodIndex());
     if (GetDexCache() == other->GetDexCache()) {
@@ -539,6 +538,33 @@
     return dex_file.GetMethodSignature(mid) == other_dex_file.GetMethodSignature(other_mid);
   }
 
+  bool HasSameSignatureWithDifferentClassLoaders(MethodHelper* other)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (UNLIKELY(GetReturnType() != other->GetReturnType())) {
+      return false;
+    }
+    const DexFile::TypeList* types = GetParameterTypeList();
+    const DexFile::TypeList* other_types = other->GetParameterTypeList();
+    if (types == nullptr) {
+      return (other_types == nullptr) || (other_types->Size() == 0);
+    } else if (UNLIKELY(other_types == nullptr)) {
+      return types->Size() == 0;
+    }
+    uint32_t num_types = types->Size();
+    if (UNLIKELY(num_types != other_types->Size())) {
+      return false;
+    }
+    for (uint32_t i = 0; i < num_types; ++i) {
+      mirror::Class* param_type = GetClassFromTypeIdx(types->GetTypeItem(i).type_idx_);
+      mirror::Class* other_param_type =
+          other->GetClassFromTypeIdx(other_types->GetTypeItem(i).type_idx_);
+      if (UNLIKELY(param_type != other_param_type)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
   const DexFile::CodeItem* GetCodeItem()
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetDexFile().GetCodeItem(method_->GetCodeItemOffset());
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 9cf8785..1562527 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -232,6 +232,7 @@
 //  gLogVerbosity.startup = true;  // TODO: don't check this in!
 //  gLogVerbosity.third_party_jni = true;  // TODO: don't check this in!
 //  gLogVerbosity.threads = true;  // TODO: don't check this in!
+//  gLogVerbosity.signals = true;  // TODO: don't check this in!
 
   method_trace_ = false;
   method_trace_file_ = "/data/method-trace-file.bin";
@@ -253,7 +254,7 @@
 #ifdef HAVE_ANDROID_OS
   {
     char buf[PROP_VALUE_MAX];
-    property_get("dalvik.vm.implicit_checks", buf, "none");
+    property_get("dalvik.vm.implicit_checks", buf, "null,stack");
     std::string checks(buf);
     std::vector<std::string> checkvec;
     Split(checks, ',', checkvec);
@@ -464,6 +465,8 @@
           gLogVerbosity.third_party_jni = true;
         } else if (verbose_options[i] == "threads") {
           gLogVerbosity.threads = true;
+        } else if (verbose_options[i] == "signals") {
+           gLogVerbosity.signals = true;
         } else {
           Usage("Unknown -verbose option %s\n", verbose_options[i].c_str());
           return false;
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 027feee..cbd51d4 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -535,9 +535,20 @@
     GetInstrumentation()->ForceInterpretOnly();
   }
 
-  if (options->explicit_checks_ != (ParsedOptions::kExplicitSuspendCheck |
+  bool implicit_checks_supported = false;
+  switch (kRuntimeISA) {
+  case kArm:
+  case kThumb2:
+    implicit_checks_supported = true;
+    break;
+  default:
+    break;
+  }
+
+  if (implicit_checks_supported &&
+    (options->explicit_checks_ != (ParsedOptions::kExplicitSuspendCheck |
         ParsedOptions::kExplicitNullCheck |
-        ParsedOptions::kExplicitStackOverflowCheck) || kEnableJavaStackTraceHandler) {
+        ParsedOptions::kExplicitStackOverflowCheck) || kEnableJavaStackTraceHandler)) {
     fault_manager.Init();
 
     // These need to be in a specific order.  The null point check handler must be
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 9c709ae..5e64e59 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -32,14 +32,6 @@
 
 namespace art {
 
-// Define a piece of memory, the address of which can be used as a marker
-// for the gap in the stack added during stack overflow handling.
-static uint32_t stack_overflow_object;
-
-// The stack overflow gap marker is simply a valid unique address.
-void* stack_overflow_gap_marker = &stack_overflow_object;
-
-
 mirror::Object* ShadowFrame::GetThisObject() const {
   mirror::ArtMethod* m = GetMethod();
   if (m->IsStatic()) {
@@ -305,56 +297,23 @@
   bool exit_stubs_installed = Runtime::Current()->GetInstrumentation()->AreExitStubsInstalled();
   uint32_t instrumentation_stack_depth = 0;
 
-  bool kDebugStackWalk = false;
-  bool kDebugStackWalkVeryVerbose = false;            // The name says it all.
-
-  if (kDebugStackWalk) {
-    LOG(INFO) << "walking stack";
-  }
   for (const ManagedStack* current_fragment = thread_->GetManagedStack(); current_fragment != NULL;
        current_fragment = current_fragment->GetLink()) {
     cur_shadow_frame_ = current_fragment->GetTopShadowFrame();
     cur_quick_frame_ = current_fragment->GetTopQuickFrame();
     cur_quick_frame_pc_ = current_fragment->GetTopQuickFramePc();
-    if (kDebugStackWalkVeryVerbose) {
-      LOG(INFO) << "cur_quick_frame: " << cur_quick_frame_;
-      LOG(INFO) << "cur_quick_frame_pc: " << std::hex << cur_quick_frame_pc_;
-    }
 
     if (cur_quick_frame_ != NULL) {  // Handle quick stack frames.
       // Can't be both a shadow and a quick fragment.
       DCHECK(current_fragment->GetTopShadowFrame() == NULL);
       mirror::ArtMethod* method = *cur_quick_frame_;
       while (method != NULL) {
-        // Check for a stack overflow gap marker.
-        if (method == reinterpret_cast<mirror::ArtMethod*>(stack_overflow_gap_marker)) {
-          // Marker for a stack overflow.  This is followed by the offset from the
-          // current SP to the next frame.  There is a gap in the stack here.  Jump
-          // the gap silently.
-          // Caveat coder: the layout of the overflow marker depends on the architecture.
-          //   The first element is address sized (8 bytes on a 64 bit machine).  The second
-          //   element is 32 bits.  So be careful with those address calculations.
-
-          // Get the address of the offset, just beyond the marker pointer.
-          byte* gapsizeaddr = reinterpret_cast<byte*>(cur_quick_frame_) + sizeof(uintptr_t);
-          uint32_t gap = *reinterpret_cast<uint32_t*>(gapsizeaddr);
-          CHECK_GT(gap, Thread::kStackOverflowProtectedSize);
-          mirror::ArtMethod** next_frame = reinterpret_cast<mirror::ArtMethod**>(
-            reinterpret_cast<byte*>(gapsizeaddr) + gap);
-          if (kDebugStackWalk) {
-            LOG(INFO) << "stack overflow marker hit, gap: " << gap << ", next_frame: " <<
-                next_frame;
-          }
-          cur_quick_frame_ = next_frame;
-          method = *next_frame;
-          CHECK(method != nullptr);
-        } else {
-          SanityCheckFrame();
-          bool should_continue = VisitFrame();
-          if (UNLIKELY(!should_continue)) {
-            return;
-          }
+        SanityCheckFrame();
+        bool should_continue = VisitFrame();
+        if (UNLIKELY(!should_continue)) {
+          return;
         }
+
         if (context_ != NULL) {
           context_->FillCalleeSaves(*this);
         }
@@ -363,9 +322,6 @@
         size_t return_pc_offset = method->GetReturnPcOffsetInBytes();
         byte* return_pc_addr = reinterpret_cast<byte*>(cur_quick_frame_) + return_pc_offset;
         uintptr_t return_pc = *reinterpret_cast<uintptr_t*>(return_pc_addr);
-        if (kDebugStackWalkVeryVerbose) {
-          LOG(INFO) << "frame size: " << frame_size << ", return_pc: " << std::hex << return_pc;
-        }
         if (UNLIKELY(exit_stubs_installed)) {
           // While profiling, the return pc is restored from the side stack, except when walking
           // the stack for an exception where the side stack will be unwound in VisitFrame.
@@ -398,10 +354,6 @@
         cur_quick_frame_ = reinterpret_cast<mirror::ArtMethod**>(next_frame);
         cur_depth_++;
         method = *cur_quick_frame_;
-        if (kDebugStackWalkVeryVerbose) {
-          LOG(INFO) << "new cur_quick_frame_: " << cur_quick_frame_;
-          LOG(INFO) << "new cur_quick_frame_pc_: " << std::hex << cur_quick_frame_pc_;
-        }
       }
     } else if (cur_shadow_frame_ != NULL) {
       do {
diff --git a/runtime/stack.h b/runtime/stack.h
index 73a823a..88ef78f 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -102,14 +102,6 @@
   kVRegNonSpecialTempBaseReg = -3,
 };
 
-// Special object used to mark the gap in the stack placed when a stack
-// overflow fault occurs during implicit stack checking.  This is not
-// a real object - it is used simply as a valid address to which a
-// mirror::ArtMethod* can be compared during a stack walk.  It is inserted
-// into the stack during the stack overflow signal handling to mark the gap
-// in which the memory is protected against read and write.
-extern void* stack_overflow_gap_marker;
-
 // A reference from the shadow stack to a MirrorType object within the Java heap.
 template<class MirrorType>
 class MANAGED StackReference : public mirror::ObjectReference<false, MirrorType> {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 23a6779..3a62cd5 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -243,10 +243,16 @@
   pregion -= kStackOverflowProtectedSize;
 
   // Touch the pages in the region to map them in.  Otherwise mprotect fails.  Only
-  // need to do this on the main stack.
+  // need to do this on the main stack.  We only need to touch one byte per page.
   if (is_main_stack) {
-    memset(pregion, 0x55, kStackOverflowProtectedSize);
+    byte* start = pregion;
+    byte* end = pregion + kStackOverflowProtectedSize;
+    while (start < end) {
+      *start = static_cast<byte>(0);
+      start += kPageSize;
+    }
   }
+
   VLOG(threads) << "installing stack protected region at " << std::hex <<
       static_cast<void*>(pregion) << " to " <<
       static_cast<void*>(pregion + kStackOverflowProtectedSize - 1);
@@ -255,6 +261,11 @@
     LOG(FATAL) << "Unable to create protected region in stack for implicit overflow check. Reason:"
         << strerror(errno);
   }
+
+  // Tell the kernel that we won't be needing these pages any more.
+  if (is_main_stack) {
+    madvise(pregion, kStackOverflowProtectedSize, MADV_DONTNEED);
+  }
 }
 
 void Thread::CreateNativeThread(JNIEnv* env, jobject java_peer, size_t stack_size, bool is_daemon) {
diff --git a/runtime/utils.cc b/runtime/utils.cc
index ee2cca4..c332bdf 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -1169,10 +1169,12 @@
 
 std::string GetDalvikCacheOrDie(const char* subdir, const bool create_if_absent) {
   CHECK(subdir != nullptr);
-  const std::string dalvik_cache_root(StringPrintf("%s/dalvik-cache/", GetAndroidData()));
+  const char* android_data = GetAndroidData();
+  const std::string dalvik_cache_root(StringPrintf("%s/dalvik-cache/", android_data));
   const std::string dalvik_cache = dalvik_cache_root + subdir;
   if (create_if_absent && !OS::DirectoryExists(dalvik_cache.c_str())) {
-    if (StartsWith(dalvik_cache_root, "/tmp/")) {
+    // Don't create the system's /data/dalvik-cache/... because it needs special permissions.
+    if (strcmp(android_data, "/data") != 0) {
       int result = mkdir(dalvik_cache_root.c_str(), 0700);
       if (result != 0 && errno != EEXIST) {
         PLOG(FATAL) << "Failed to create dalvik-cache directory " << dalvik_cache_root;
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 03ceed3..41ff96e 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -3125,10 +3125,19 @@
     VLOG(verifier) << "Failed to get mirror::Class* from '" << actual_arg_type << "'";
     return nullptr;
   }
-  mirror::ObjectArray<mirror::ArtMethod>* vtable = actual_arg_type.GetClass()->GetVTable();
-  CHECK(vtable != nullptr);
+  mirror::ObjectArray<mirror::ArtMethod>* vtable = nullptr;
+  mirror::Class* klass = actual_arg_type.GetClass();
+  if (klass->IsInterface()) {
+    // Derive Object.class from Class.class.getSuperclass().
+    mirror::Class* object_klass = klass->GetClass()->GetSuperClass();
+    CHECK(object_klass->IsObjectClass());
+    vtable = object_klass->GetVTable();
+  } else {
+    vtable = klass->GetVTable();
+  }
+  CHECK(vtable != nullptr) << PrettyDescriptor(klass);
   uint16_t vtable_index = is_range ? inst->VRegB_3rc() : inst->VRegB_35c();
-  CHECK_LT(static_cast<int32_t>(vtable_index), vtable->GetLength());
+  CHECK_LT(static_cast<int32_t>(vtable_index), vtable->GetLength()) << PrettyDescriptor(klass);
   mirror::ArtMethod* res_method = vtable->Get(vtable_index);
   CHECK(!Thread::Current()->IsExceptionPending());
   return res_method;
diff --git a/test/etc/push-and-run-test-jar b/test/etc/push-and-run-test-jar
index e0d2f1d..6cf7998 100755
--- a/test/etc/push-and-run-test-jar
+++ b/test/etc/push-and-run-test-jar
@@ -150,7 +150,7 @@
 
 JNI_OPTS="-Xjnigreflimit:512 -Xcheck:jni"
 
-cmdline="cd $DEX_LOCATION && mkdir -p dalvik-cache/{arm,arm64,mips,x86,x86_64} && export ANDROID_DATA=$DEX_LOCATION && export DEX_LOCATION=$DEX_LOCATION && \
+cmdline="cd $DEX_LOCATION && export ANDROID_DATA=$DEX_LOCATION && export DEX_LOCATION=$DEX_LOCATION && \
     $INVOKE_WITH $gdb /system/bin/dalvikvm$TARGET_SUFFIX $FLAGS $gdbargs -XXlib:$LIB $ZYGOTE $JNI_OPTS $INT_OPTS $DEBUGGER_OPTS $BOOT_OPT -cp $DEX_LOCATION/$TEST_NAME.jar Main"
 if [ "$DEV_MODE" = "y" ]; then
   echo $cmdline "$@"