Merge "Use 8-byte increment bracket sizes for rosalloc thread local runs."
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index 7a93613..5887620 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -283,11 +283,13 @@
     static_assert(sizeof(element_offset_) == sizeof(cmp1_), "needed by relational operators");
   };
   union {
-    uint32_t cmp2_;             // Used for relational operators.
+    // Note: To avoid uninitialized padding on 64-bit systems, we use `size_t` for `cmp2_`.
+    // This allows a hashing function to treat an array of linker patches as raw memory.
+    size_t cmp2_;             // Used for relational operators.
     // Literal offset of the insn loading PC (same as literal_offset if it's the same insn,
     // may be different if the PC-relative addressing needs multiple insns).
     uint32_t pc_insn_offset_;
-    static_assert(sizeof(pc_insn_offset_) == sizeof(cmp2_), "needed by relational operators");
+    static_assert(sizeof(pc_insn_offset_) <= sizeof(cmp2_), "needed by relational operators");
   };
 
   friend bool operator==(const LinkerPatch& lhs, const LinkerPatch& rhs);
diff --git a/compiler/compiler.h b/compiler/compiler.h
index 3a9ce1b..97c60de 100644
--- a/compiler/compiler.h
+++ b/compiler/compiler.h
@@ -64,7 +64,8 @@
 
   virtual bool JitCompile(Thread* self ATTRIBUTE_UNUSED,
                           jit::JitCodeCache* code_cache ATTRIBUTE_UNUSED,
-                          ArtMethod* method ATTRIBUTE_UNUSED)
+                          ArtMethod* method ATTRIBUTE_UNUSED,
+                          bool osr ATTRIBUTE_UNUSED)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     return false;
   }
diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc
index 372fe2b..4d6c058 100644
--- a/compiler/dex/quick/mips/utility_mips.cc
+++ b/compiler/dex/quick/mips/utility_mips.cc
@@ -28,6 +28,8 @@
 
 namespace art {
 
+static constexpr size_t kMips64DoublewordSize = 8;
+
 /* This file contains codegen for the Mips ISA */
 LIR* MipsMir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) {
   int opcode;
@@ -760,7 +762,25 @@
 
   if (cu_->target64) {
     if (short_form) {
-      load = res = NewLIR3(opcode, r_dest.GetReg(), displacement, r_base.GetReg());
+      if (!IsAligned<kMips64DoublewordSize>(displacement) && opcode == kMips64Ld) {
+        RegStorage r_tmp = AllocTemp();
+        load = res = NewLIR3(kMips64Lwu, r_dest.GetReg(), displacement + LOWORD_OFFSET,
+                             r_base.GetReg());
+        load2 = NewLIR3(kMips64Lwu, r_tmp.GetReg(), displacement + HIWORD_OFFSET, r_base.GetReg());
+        NewLIR3(kMips64Dsll32, r_tmp.GetReg(), r_tmp.GetReg(), 0x0);
+        NewLIR3(kMipsOr, r_dest.GetReg(), r_dest.GetReg(), r_tmp.GetReg());
+        FreeTemp(r_tmp);
+      } else if (!IsAligned<kMips64DoublewordSize>(displacement) && opcode == kMipsFldc1) {
+        RegStorage r_tmp = AllocTemp();
+        r_dest = Fp64ToSolo32(r_dest);
+        load = res = NewLIR3(kMipsFlwc1, r_dest.GetReg(), displacement + LOWORD_OFFSET,
+                             r_base.GetReg());
+        load2 = NewLIR3(kMipsLw, r_tmp.GetReg(), displacement + HIWORD_OFFSET, r_base.GetReg());
+        NewLIR2(kMipsMthc1, r_tmp.GetReg(), r_dest.GetReg());
+        FreeTemp(r_tmp);
+      } else {
+        load = res = NewLIR3(opcode, r_dest.GetReg(), displacement, r_base.GetReg());
+      }
     } else {
       RegStorage r_tmp = (r_base == r_dest) ? AllocTemp() : r_dest;
       res = OpRegRegImm(kOpAdd, r_tmp, r_base, displacement);
@@ -771,7 +791,12 @@
 
     if (mem_ref_type_ == ResourceMask::kDalvikReg) {
       DCHECK_EQ(r_base, TargetPtrReg(kSp));
-      AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit());
+      AnnotateDalvikRegAccess(load, (displacement + LOWORD_OFFSET) >> 2,
+                              true /* is_load */, r_dest.Is64Bit() /* is64bit */);
+      if (load2 != nullptr) {
+        AnnotateDalvikRegAccess(load2, (displacement + HIWORD_OFFSET) >> 2,
+                                true /* is_load */, r_dest.Is64Bit() /* is64bit */);
+      }
     }
     return res;
   }
@@ -932,7 +957,24 @@
 
   if (cu_->target64) {
     if (short_form) {
-      store = res = NewLIR3(opcode, r_src.GetReg(), displacement, r_base.GetReg());
+      if (!IsAligned<kMips64DoublewordSize>(displacement) && opcode == kMips64Sd) {
+        RegStorage r_tmp = AllocTemp();
+        res = NewLIR2(kMipsMove, r_tmp.GetReg(), r_src.GetReg());
+        store = NewLIR3(kMipsSw, r_tmp.GetReg(), displacement + LOWORD_OFFSET, r_base.GetReg());
+        NewLIR3(kMips64Dsrl32, r_tmp.GetReg(), r_tmp.GetReg(), 0x0);
+        store2 = NewLIR3(kMipsSw, r_tmp.GetReg(), displacement + HIWORD_OFFSET, r_base.GetReg());
+        FreeTemp(r_tmp);
+      } else if (!IsAligned<kMips64DoublewordSize>(displacement) && opcode == kMipsFsdc1) {
+        RegStorage r_tmp = AllocTemp();
+        r_src = Fp64ToSolo32(r_src);
+        store = res = NewLIR3(kMipsFswc1, r_src.GetReg(), displacement + LOWORD_OFFSET,
+                              r_base.GetReg());
+        NewLIR2(kMipsMfhc1, r_tmp.GetReg(), r_src.GetReg());
+        store2 = NewLIR3(kMipsSw, r_tmp.GetReg(), displacement + HIWORD_OFFSET, r_base.GetReg());
+        FreeTemp(r_tmp);
+      } else {
+        store = res = NewLIR3(opcode, r_src.GetReg(), displacement, r_base.GetReg());
+      }
     } else {
       RegStorage r_scratch = AllocTemp();
       res = OpRegRegImm(kOpAdd, r_scratch, r_base, displacement);
@@ -942,7 +984,12 @@
 
     if (mem_ref_type_ == ResourceMask::kDalvikReg) {
       DCHECK_EQ(r_base, TargetPtrReg(kSp));
-      AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit());
+      AnnotateDalvikRegAccess(store, (displacement + LOWORD_OFFSET) >> 2,
+                              false /* is_load */, r_src.Is64Bit() /* is64bit */);
+      if (store2 != nullptr) {
+        AnnotateDalvikRegAccess(store2, (displacement + HIWORD_OFFSET) >> 2,
+                                false /* is_load */, r_src.Is64Bit() /* is64bit */);
+      }
     }
     return res;
   }
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index f1b7458..f078bf6 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -921,7 +921,7 @@
       std::set<std::pair<uint16_t, const DexFile*>>& exceptions_to_resolve)
      : exceptions_to_resolve_(exceptions_to_resolve) {}
 
-  virtual bool Visit(mirror::Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+  virtual bool operator()(mirror::Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     const auto pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
     for (auto& m : c->GetMethods(pointer_size)) {
       ResolveExceptionsForMethod(&m, pointer_size);
@@ -975,7 +975,7 @@
   explicit RecordImageClassesVisitor(std::unordered_set<std::string>* image_classes)
       : image_classes_(image_classes) {}
 
-  bool Visit(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool operator()(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     std::string temp;
     image_classes_->insert(klass->GetDescriptor(&temp));
     return true;
@@ -1142,7 +1142,7 @@
    public:
     explicit FindImageClassesVisitor(ClinitImageUpdate* data) : data_(data) {}
 
-    bool Visit(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+    bool operator()(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
       std::string temp;
       const char* name = klass->GetDescriptor(&temp);
       if (data_->image_class_descriptors_->find(name) != data_->image_class_descriptors_->end()) {
@@ -2306,9 +2306,9 @@
           mirror::Class::SetStatus(klass, mirror::Class::kStatusVerified, soa.Self());
           // Mark methods as pre-verified. If we don't do this, the interpreter will run with
           // access checks.
-          klass->SetPreverifiedFlagOnAllMethods(
+          klass->SetSkipAccessChecksFlagOnAllMethods(
               GetInstructionSetPointerSize(manager_->GetCompiler()->GetInstructionSet()));
-          klass->SetPreverified();
+          klass->SetVerificationAttempted();
         }
         // Record the final class status if necessary.
         ClassReference ref(manager_->GetDexFile(), class_def_index);
diff --git a/compiler/dwarf/debug_abbrev_writer.h b/compiler/dwarf/debug_abbrev_writer.h
new file mode 100644
index 0000000..71367e8
--- /dev/null
+++ b/compiler/dwarf/debug_abbrev_writer.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DWARF_DEBUG_ABBREV_WRITER_H_
+#define ART_COMPILER_DWARF_DEBUG_ABBREV_WRITER_H_
+
+#include <cstdint>
+#include <type_traits>
+#include <unordered_map>
+
+#include "base/casts.h"
+#include "base/stl_util.h"
+#include "dwarf/dwarf_constants.h"
+#include "dwarf/writer.h"
+#include "leb128.h"
+
+namespace art {
+namespace dwarf {
+
+// Writer for the .debug_abbrev.
+//
+// Abbreviations specify the format of entries in .debug_info.
+// Each entry specifies abbreviation code, which in turns
+// determines all the attributes and their format.
+// It is possible to think of them as type definitions.
+template <typename Vector = std::vector<uint8_t>>
+class DebugAbbrevWriter FINAL : private Writer<Vector> {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+
+ public:
+  explicit DebugAbbrevWriter(Vector* buffer)
+      : Writer<Vector>(buffer),
+        current_abbrev_(buffer->get_allocator()) {
+    this->PushUint8(0);  // Add abbrev table terminator.
+  }
+
+  // Start abbreviation declaration.
+  void StartAbbrev(Tag tag) {
+    DCHECK(current_abbrev_.empty());
+    EncodeUnsignedLeb128(&current_abbrev_, tag);
+    has_children_offset_ = current_abbrev_.size();
+    current_abbrev_.push_back(0);  // Place-holder for DW_CHILDREN.
+  }
+
+  // Add attribute specification.
+  void AddAbbrevAttribute(Attribute name, Form type) {
+    EncodeUnsignedLeb128(&current_abbrev_, name);
+    EncodeUnsignedLeb128(&current_abbrev_, type);
+  }
+
+  // End abbreviation declaration and return its code.
+  // This will deduplicate abbreviations.
+  uint32_t EndAbbrev(Children has_children) {
+    DCHECK(!current_abbrev_.empty());
+    current_abbrev_[has_children_offset_] = has_children;
+    auto it = abbrev_codes_.insert(std::make_pair(std::move(current_abbrev_), NextAbbrevCode()));
+    uint32_t abbrev_code = it.first->second;
+    if (UNLIKELY(it.second)) {  // Inserted new entry.
+      const Vector& abbrev = it.first->first;
+      this->Pop();  // Remove abbrev table terminator.
+      this->PushUleb128(abbrev_code);
+      this->PushData(abbrev.data(), abbrev.size());
+      this->PushUint8(0);  // Attribute list end.
+      this->PushUint8(0);  // Attribute list end.
+      this->PushUint8(0);  // Add abbrev table terminator.
+    }
+    current_abbrev_.clear();
+    return abbrev_code;
+  }
+
+  // Get the next free abbrev code.
+  uint32_t NextAbbrevCode() {
+    return dchecked_integral_cast<uint32_t>(1 + abbrev_codes_.size());
+  }
+
+ private:
+  Vector current_abbrev_;
+  size_t has_children_offset_ = 0;
+  std::unordered_map<Vector, uint32_t, FNVHash<Vector> > abbrev_codes_;
+};
+
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DWARF_DEBUG_ABBREV_WRITER_H_
diff --git a/compiler/dwarf/debug_info_entry_writer.h b/compiler/dwarf/debug_info_entry_writer.h
index e5bbed3..1e29859 100644
--- a/compiler/dwarf/debug_info_entry_writer.h
+++ b/compiler/dwarf/debug_info_entry_writer.h
@@ -21,6 +21,7 @@
 #include <unordered_map>
 
 #include "base/casts.h"
+#include "dwarf/debug_abbrev_writer.h"
 #include "dwarf/dwarf_constants.h"
 #include "dwarf/expression.h"
 #include "dwarf/writer.h"
@@ -29,24 +30,8 @@
 namespace art {
 namespace dwarf {
 
-// 32-bit FNV-1a hash function which we use to find duplicate abbreviations.
-// See http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
-template <typename Vector>
-struct FNVHash {
-  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
-
-  size_t operator()(const Vector& v) const {
-    uint32_t hash = 2166136261u;
-    for (size_t i = 0; i < v.size(); i++) {
-      hash = (hash ^ v[i]) * 16777619u;
-    }
-    return hash;
-  }
-};
-
 /*
  * Writer for debug information entries (DIE).
- * It also handles generation of abbreviations.
  *
  * Usage:
  *   StartTag(DW_TAG_compile_unit);
@@ -69,13 +54,13 @@
     if (inside_entry_) {
       // Write abbrev code for the previous entry.
       // Parent entry is finalized before any children are written.
-      this->UpdateUleb128(abbrev_code_offset_, EndAbbrev(DW_CHILDREN_yes));
+      this->UpdateUleb128(abbrev_code_offset_, debug_abbrev_->EndAbbrev(DW_CHILDREN_yes));
       inside_entry_ = false;
     }
-    StartAbbrev(tag);
+    debug_abbrev_->StartAbbrev(tag);
     // Abbrev code placeholder of sufficient size.
     abbrev_code_offset_ = this->data()->size();
-    this->PushUleb128(NextAbbrevCode());
+    this->PushUleb128(debug_abbrev_->NextAbbrevCode());
     depth_++;
     inside_entry_ = true;
     return abbrev_code_offset_ + kCompilationUnitHeaderSize;
@@ -86,7 +71,7 @@
     DCHECK_GT(depth_, 0);
     if (inside_entry_) {
       // Write abbrev code for this entry.
-      this->UpdateUleb128(abbrev_code_offset_, EndAbbrev(DW_CHILDREN_no));
+      this->UpdateUleb128(abbrev_code_offset_, debug_abbrev_->EndAbbrev(DW_CHILDREN_no));
       inside_entry_ = false;
       // This entry has no children and so there is no terminator.
     } else {
@@ -98,7 +83,7 @@
   }
 
   void WriteAddr(Attribute attrib, uint64_t value) {
-    AddAbbrevAttribute(attrib, DW_FORM_addr);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_addr);
     patch_locations_.push_back(this->data()->size());
     if (is64bit_) {
       this->PushUint64(value);
@@ -108,85 +93,89 @@
   }
 
   void WriteBlock(Attribute attrib, const uint8_t* ptr, size_t num_bytes) {
-    AddAbbrevAttribute(attrib, DW_FORM_block);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_block);
     this->PushUleb128(num_bytes);
     this->PushData(ptr, num_bytes);
   }
 
   void WriteExprLoc(Attribute attrib, const Expression& expr) {
-    AddAbbrevAttribute(attrib, DW_FORM_exprloc);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_exprloc);
     this->PushUleb128(dchecked_integral_cast<uint32_t>(expr.size()));
     this->PushData(expr.data());
   }
 
   void WriteData1(Attribute attrib, uint8_t value) {
-    AddAbbrevAttribute(attrib, DW_FORM_data1);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_data1);
     this->PushUint8(value);
   }
 
   void WriteData2(Attribute attrib, uint16_t value) {
-    AddAbbrevAttribute(attrib, DW_FORM_data2);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_data2);
     this->PushUint16(value);
   }
 
   void WriteData4(Attribute attrib, uint32_t value) {
-    AddAbbrevAttribute(attrib, DW_FORM_data4);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_data4);
     this->PushUint32(value);
   }
 
   void WriteData8(Attribute attrib, uint64_t value) {
-    AddAbbrevAttribute(attrib, DW_FORM_data8);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_data8);
     this->PushUint64(value);
   }
 
   void WriteSecOffset(Attribute attrib, uint32_t offset) {
-    AddAbbrevAttribute(attrib, DW_FORM_sec_offset);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_sec_offset);
     this->PushUint32(offset);
   }
 
   void WriteSdata(Attribute attrib, int value) {
-    AddAbbrevAttribute(attrib, DW_FORM_sdata);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_sdata);
     this->PushSleb128(value);
   }
 
   void WriteUdata(Attribute attrib, int value) {
-    AddAbbrevAttribute(attrib, DW_FORM_udata);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_udata);
     this->PushUleb128(value);
   }
 
   void WriteUdata(Attribute attrib, uint32_t value) {
-    AddAbbrevAttribute(attrib, DW_FORM_udata);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_udata);
     this->PushUleb128(value);
   }
 
   void WriteFlag(Attribute attrib, bool value) {
-    AddAbbrevAttribute(attrib, DW_FORM_flag);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_flag);
     this->PushUint8(value ? 1 : 0);
   }
 
+  void WriteFlagPresent(Attribute attrib) {
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_flag_present);
+  }
+
   void WriteRef4(Attribute attrib, uint32_t cu_offset) {
-    AddAbbrevAttribute(attrib, DW_FORM_ref4);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_ref4);
     this->PushUint32(cu_offset);
   }
 
   void WriteRef(Attribute attrib, uint32_t cu_offset) {
-    AddAbbrevAttribute(attrib, DW_FORM_ref_udata);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_ref_udata);
     this->PushUleb128(cu_offset);
   }
 
   void WriteString(Attribute attrib, const char* value) {
-    AddAbbrevAttribute(attrib, DW_FORM_string);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_string);
     this->PushString(value);
   }
 
   void WriteStrp(Attribute attrib, size_t debug_str_offset) {
-    AddAbbrevAttribute(attrib, DW_FORM_strp);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_strp);
     this->PushUint32(dchecked_integral_cast<uint32_t>(debug_str_offset));
   }
 
   void WriteStrp(Attribute attrib, const char* str, size_t len,
                  std::vector<uint8_t>* debug_str) {
-    AddAbbrevAttribute(attrib, DW_FORM_strp);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_strp);
     this->PushUint32(debug_str->size());
     debug_str->insert(debug_str->end(), str, str + len);
     debug_str->push_back(0);
@@ -209,16 +198,13 @@
   using Writer<Vector>::UpdateUint32;
 
   DebugInfoEntryWriter(bool is64bitArch,
-                       Vector* debug_abbrev,
+                       DebugAbbrevWriter<Vector>* debug_abbrev,
                        const typename Vector::allocator_type& alloc =
                            typename Vector::allocator_type())
       : Writer<Vector>(&entries_),
         debug_abbrev_(debug_abbrev),
-        current_abbrev_(alloc),
-        abbrev_codes_(alloc),
         entries_(alloc),
         is64bit_(is64bitArch) {
-    debug_abbrev_.PushUint8(0);  // Add abbrev table terminator.
   }
 
   ~DebugInfoEntryWriter() {
@@ -227,53 +213,7 @@
   }
 
  private:
-  // Start abbreviation declaration.
-  void StartAbbrev(Tag tag) {
-    current_abbrev_.clear();
-    EncodeUnsignedLeb128(&current_abbrev_, tag);
-    has_children_offset_ = current_abbrev_.size();
-    current_abbrev_.push_back(0);  // Place-holder for DW_CHILDREN.
-  }
-
-  // Add attribute specification.
-  void AddAbbrevAttribute(Attribute name, Form type) {
-    DCHECK(inside_entry_) << "Call StartTag before adding attributes.";
-    EncodeUnsignedLeb128(&current_abbrev_, name);
-    EncodeUnsignedLeb128(&current_abbrev_, type);
-  }
-
-  int NextAbbrevCode() {
-    return 1 + abbrev_codes_.size();
-  }
-
-  // End abbreviation declaration and return its code.
-  int EndAbbrev(Children has_children) {
-    DCHECK(!current_abbrev_.empty());
-    current_abbrev_[has_children_offset_] = has_children;
-    auto it = abbrev_codes_.insert(std::make_pair(std::move(current_abbrev_),
-                                                  NextAbbrevCode()));
-    int abbrev_code = it.first->second;
-    if (UNLIKELY(it.second)) {  // Inserted new entry.
-      const Vector& abbrev = it.first->first;
-      debug_abbrev_.Pop();  // Remove abbrev table terminator.
-      debug_abbrev_.PushUleb128(abbrev_code);
-      debug_abbrev_.PushData(abbrev.data(), abbrev.size());
-      debug_abbrev_.PushUint8(0);  // Attribute list end.
-      debug_abbrev_.PushUint8(0);  // Attribute list end.
-      debug_abbrev_.PushUint8(0);  // Add abbrev table terminator.
-    }
-    return abbrev_code;
-  }
-
- private:
-  // Fields for writing and deduplication of abbrevs.
-  Writer<Vector> debug_abbrev_;
-  Vector current_abbrev_;
-  size_t has_children_offset_ = 0;
-  std::unordered_map<Vector, int,
-                     FNVHash<Vector> > abbrev_codes_;
-
-  // Fields for writing of debugging information entries.
+  DebugAbbrevWriter<Vector>* debug_abbrev_;
   Vector entries_;
   bool is64bit_;
   int depth_ = 0;
diff --git a/compiler/dwarf/dedup_vector.h b/compiler/dwarf/dedup_vector.h
deleted file mode 100644
index 7fb21b7..0000000
--- a/compiler/dwarf/dedup_vector.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DWARF_DEDUP_VECTOR_H_
-#define ART_COMPILER_DWARF_DEDUP_VECTOR_H_
-
-#include <vector>
-#include <unordered_map>
-
-namespace art {
-namespace dwarf {
-  class DedupVector {
-   public:
-    // Returns an offset to previously inserted identical block of data,
-    // or appends the data at the end of the vector and returns offset to it.
-    size_t Insert(const uint8_t* ptr, size_t num_bytes) {
-      // See http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
-      uint32_t hash = 2166136261u;
-      for (size_t i = 0; i < num_bytes; i++) {
-        hash = (hash ^ ptr[i]) * 16777619u;
-      }
-      // Try to find existing copy of the data.
-      const auto& range = hash_to_offset_.equal_range(hash);
-      for (auto it = range.first; it != range.second; ++it) {
-        const size_t offset = it->second;
-        if (offset + num_bytes <= vector_.size() &&
-            memcmp(vector_.data() + offset, ptr, num_bytes) == 0) {
-          return offset;
-        }
-      }
-      // Append the data at the end of the vector.
-      const size_t new_offset = vector_.size();
-      hash_to_offset_.emplace(hash, new_offset);
-      vector_.insert(vector_.end(), ptr, ptr + num_bytes);
-      return new_offset;
-    }
-
-    const std::vector<uint8_t>& Data() const { return vector_; }
-
-   private:
-    struct IdentityHash {
-      size_t operator()(uint32_t v) const { return v; }
-    };
-
-    // We store the full hash as the key to simplify growing of the table.
-    // It avoids storing or referencing the actual data in the hash-table.
-    std::unordered_multimap<uint32_t, size_t, IdentityHash> hash_to_offset_;
-
-    std::vector<uint8_t> vector_;
-  };
-}  // namespace dwarf
-}  // namespace art
-
-#endif  // ART_COMPILER_DWARF_DEDUP_VECTOR_H_
diff --git a/compiler/dwarf/dwarf_test.cc b/compiler/dwarf/dwarf_test.cc
index e9cd421..3237311 100644
--- a/compiler/dwarf/dwarf_test.cc
+++ b/compiler/dwarf/dwarf_test.cc
@@ -283,7 +283,8 @@
 
 TEST_F(DwarfTest, DebugInfo) {
   constexpr bool is64bit = false;
-  DebugInfoEntryWriter<> info(is64bit, &debug_abbrev_data_);
+  DebugAbbrevWriter<> debug_abbrev(&debug_abbrev_data_);
+  DebugInfoEntryWriter<> info(is64bit, &debug_abbrev);
   DW_CHECK("Contents of the .debug_info section:");
   info.StartTag(dwarf::DW_TAG_compile_unit);
   DW_CHECK("Abbrev Number: 1 (DW_TAG_compile_unit)");
diff --git a/compiler/dwarf/register.h b/compiler/dwarf/register.h
index 35b3e15..aa3070a 100644
--- a/compiler/dwarf/register.h
+++ b/compiler/dwarf/register.h
@@ -42,6 +42,8 @@
   static Reg Arm64Fp(int num) { return Reg(64 + num); }  // V0-V31.
   static Reg MipsCore(int num) { return Reg(num); }
   static Reg Mips64Core(int num) { return Reg(num); }
+  static Reg MipsFp(int num) { return Reg(32 + num); }
+  static Reg Mips64Fp(int num) { return Reg(32 + num); }
   static Reg X86Core(int num) { return Reg(num); }
   static Reg X86Fp(int num) { return Reg(21 + num); }
   static Reg X86_64Core(int num) {
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index 3d24d19..b673eeb 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -110,18 +110,27 @@
       CHECK(sections.empty() || sections.back()->finished_);
       // The first ELF section index is 1. Index 0 is reserved for NULL.
       section_index_ = sections.size() + 1;
-      // Push this section on the list of written sections.
-      sections.push_back(this);
+      // Page-align if we switch between allocated and non-allocated sections,
+      // or if we change the type of allocation (e.g. executable vs non-executable).
+      if (!sections.empty()) {
+        if (header_.sh_flags != sections.back()->header_.sh_flags) {
+          header_.sh_addralign = kPageSize;
+        }
+      }
       // Align file position.
       if (header_.sh_type != SHT_NOBITS) {
-        header_.sh_offset = RoundUp(owner_->stream_.Seek(0, kSeekCurrent), header_.sh_addralign);
-        owner_->stream_.Seek(header_.sh_offset, kSeekSet);
+        header_.sh_offset = owner_->AlignFileOffset(header_.sh_addralign);
+      } else {
+        header_.sh_offset = 0;
       }
       // Align virtual memory address.
       if ((header_.sh_flags & SHF_ALLOC) != 0) {
-        header_.sh_addr = RoundUp(owner_->virtual_address_, header_.sh_addralign);
-        owner_->virtual_address_ = header_.sh_addr;
+        header_.sh_addr = owner_->AlignVirtualAddress(header_.sh_addralign);
+      } else {
+        header_.sh_addr = 0;
       }
+      // Push this section on the list of written sections.
+      sections.push_back(this);
     }
 
     // Finish writing of this section.
@@ -170,8 +179,8 @@
     // and it will be zero-initialized when the ELF file is loaded in the running program.
     void WriteNoBitsSection(Elf_Word size) {
       DCHECK_NE(header_.sh_flags & SHF_ALLOC, 0u);
-      Start();
       header_.sh_type = SHT_NOBITS;
+      Start();
       header_.sh_size = size;
       End();
     }
@@ -293,12 +302,13 @@
         dynamic_(this, ".dynamic", SHT_DYNAMIC, SHF_ALLOC, &dynstr_, 0, kPageSize, sizeof(Elf_Dyn)),
         eh_frame_(this, ".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
         eh_frame_hdr_(this, ".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0),
-        strtab_(this, ".strtab", 0, kPageSize),
+        strtab_(this, ".strtab", 0, 1),
         symtab_(this, ".symtab", SHT_SYMTAB, 0, &strtab_),
         debug_frame_(this, ".debug_frame", SHT_PROGBITS, 0, nullptr, 0, sizeof(Elf_Addr), 0),
         debug_info_(this, ".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0),
         debug_line_(this, ".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0),
         shstrtab_(this, ".shstrtab", 0, 1),
+        started_(false),
         virtual_address_(0) {
     text_.phdr_flags_ = PF_R | PF_X;
     bss_.phdr_flags_ = PF_R | PF_W;
@@ -351,22 +361,25 @@
     other_sections_.push_back(std::move(s));
   }
 
-  // Set where the next section will be allocated in the virtual address space.
-  void SetVirtualAddress(Elf_Addr address) {
-    DCHECK_GE(address, virtual_address_);
-    virtual_address_ = address;
-  }
-
-  void Start() {
-    // Reserve space for ELF header and program headers.
-    // We do not know the number of headers until later, so
-    // it is easiest to just reserve a fixed amount of space.
-    int size = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) * kMaxProgramHeaders;
+  // Reserve space for ELF header and program headers.
+  // We do not know the number of headers until later, so
+  // it is easiest to just reserve a fixed amount of space.
+  // Program headers are required for loading by the linker.
+  // It is possible to omit them for ELF files used for debugging.
+  void Start(bool write_program_headers = true) {
+    int size = sizeof(Elf_Ehdr);
+    if (write_program_headers) {
+      size += sizeof(Elf_Phdr) * kMaxProgramHeaders;
+    }
     stream_.Seek(size, kSeekSet);
+    started_ = true;
     virtual_address_ += size;
+    write_program_headers_ = write_program_headers;
   }
 
   void End() {
+    DCHECK(started_);
+
     // Write section names and finish the section headers.
     shstrtab_.Start();
     shstrtab_.Write("");
@@ -386,8 +399,7 @@
       shdrs.push_back(section->header_);
     }
     Elf_Off section_headers_offset;
-    section_headers_offset = RoundUp(stream_.Seek(0, kSeekCurrent), sizeof(Elf_Off));
-    stream_.Seek(section_headers_offset, kSeekSet);
+    section_headers_offset = AlignFileOffset(sizeof(Elf_Off));
     stream_.WriteFully(shdrs.data(), shdrs.size() * sizeof(shdrs[0]));
 
     // Flush everything else before writing the program headers. This should prevent
@@ -395,14 +407,21 @@
     // and partially written data if we suddenly lose power, for example.
     stream_.Flush();
 
-    // Write the initial file headers.
-    std::vector<Elf_Phdr> phdrs = MakeProgramHeaders();
+    // The main ELF header.
     Elf_Ehdr elf_header = MakeElfHeader(isa_);
-    elf_header.e_phoff = sizeof(Elf_Ehdr);
     elf_header.e_shoff = section_headers_offset;
-    elf_header.e_phnum = phdrs.size();
     elf_header.e_shnum = shdrs.size();
     elf_header.e_shstrndx = shstrtab_.GetSectionIndex();
+
+    // Program headers (i.e. mmap instructions).
+    std::vector<Elf_Phdr> phdrs;
+    if (write_program_headers_) {
+      phdrs = MakeProgramHeaders();
+      CHECK_LE(phdrs.size(), kMaxProgramHeaders);
+      elf_header.e_phoff = sizeof(Elf_Ehdr);
+      elf_header.e_phnum = phdrs.size();
+    }
+
     stream_.Seek(0, kSeekSet);
     stream_.WriteFully(&elf_header, sizeof(elf_header));
     stream_.WriteFully(phdrs.data(), phdrs.size() * sizeof(phdrs[0]));
@@ -492,6 +511,14 @@
     return &stream_;
   }
 
+  off_t AlignFileOffset(size_t alignment) {
+     return stream_.Seek(RoundUp(stream_.Seek(0, kSeekCurrent), alignment), kSeekSet);
+  }
+
+  Elf_Addr AlignVirtualAddress(size_t alignment) {
+     return virtual_address_ = RoundUp(virtual_address_, alignment);
+  }
+
  private:
   static Elf_Ehdr MakeElfHeader(InstructionSet isa) {
     Elf_Ehdr elf_header = Elf_Ehdr();
@@ -666,9 +693,13 @@
   // List of used section in the order in which they were written.
   std::vector<Section*> sections_;
 
+  bool started_;
+
   // Used for allocation of virtual address space.
   Elf_Addr virtual_address_;
 
+  size_t write_program_headers_;
+
   DISALLOW_COPY_AND_ASSIGN(ElfBuilder);
 };
 
diff --git a/compiler/elf_writer.h b/compiler/elf_writer.h
index c5a0fd5..5dbf736 100644
--- a/compiler/elf_writer.h
+++ b/compiler/elf_writer.h
@@ -52,6 +52,9 @@
   virtual ~ElfWriter() {}
 
   virtual void Start() = 0;
+  virtual void PrepareDebugInfo(size_t rodata_section_size,
+                                size_t text_section_size,
+                                const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) = 0;
   virtual OutputStream* StartRoData() = 0;
   virtual void EndRoData(OutputStream* rodata) = 0;
   virtual OutputStream* StartText() = 0;
diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc
index 2e98b69..e2481b0 100644
--- a/compiler/elf_writer_debug.cc
+++ b/compiler/elf_writer_debug.cc
@@ -27,7 +27,6 @@
 #include "compiled_method.h"
 #include "dex_file-inl.h"
 #include "driver/compiler_driver.h"
-#include "dwarf/dedup_vector.h"
 #include "dwarf/expression.h"
 #include "dwarf/headers.h"
 #include "dwarf/method_debug_info.h"
@@ -90,6 +89,10 @@
       return Reg::X86Fp(machine_reg);
     case kX86_64:
       return Reg::X86_64Fp(machine_reg);
+    case kMips:
+      return Reg::MipsFp(machine_reg);
+    case kMips64:
+      return Reg::Mips64Fp(machine_reg);
     default:
       LOG(FATAL) << "Unknown instruction set: " << isa;
       UNREACHABLE();
@@ -162,6 +165,14 @@
           opcodes.SameValue(Reg::MipsCore(reg));
         }
       }
+      // fp registers.
+      for (int reg = 0; reg < 32; reg++) {
+        if (reg < 24) {
+          opcodes.Undefined(Reg::Mips64Fp(reg));
+        } else {
+          opcodes.SameValue(Reg::Mips64Fp(reg));
+        }
+      }
       auto return_reg = Reg::MipsCore(31);  // R31(RA).
       WriteCIE(is64bit, return_reg, opcodes, format, buffer);
       return;
@@ -474,7 +485,7 @@
    public:
     explicit CompilationUnitWriter(DebugInfoWriter* owner)
       : owner_(owner),
-        info_(Is64BitInstructionSet(owner_->builder_->GetIsa()), &debug_abbrev_) {
+        info_(Is64BitInstructionSet(owner_->builder_->GetIsa()), &owner->debug_abbrev_) {
     }
 
     void Write(const CompilationUnit& compilation_unit) {
@@ -485,9 +496,9 @@
       const uintptr_t cu_size = compilation_unit.high_pc_ - compilation_unit.low_pc_;
 
       info_.StartTag(DW_TAG_compile_unit);
-      info_.WriteStrp(DW_AT_producer, owner_->WriteString("Android dex2oat"));
+      info_.WriteString(DW_AT_producer, "Android dex2oat");
       info_.WriteData1(DW_AT_language, DW_LANG_Java);
-      info_.WriteStrp(DW_AT_comp_dir, owner_->WriteString("$JAVA_SRC_ROOT"));
+      info_.WriteString(DW_AT_comp_dir, "$JAVA_SRC_ROOT");
       info_.WriteAddr(DW_AT_low_pc, text_address + compilation_unit.low_pc_);
       info_.WriteUdata(DW_AT_high_pc, dchecked_integral_cast<uint32_t>(cu_size));
       info_.WriteSecOffset(DW_AT_stmt_list, compilation_unit.debug_line_offset_);
@@ -505,7 +516,7 @@
         // Enclose the method in correct class definition.
         if (last_dex_class_desc != dex_class_desc) {
           if (last_dex_class_desc != nullptr) {
-            EndClassTag(last_dex_class_desc);
+            EndClassTag();
           }
           // Write reference tag for the class we are about to declare.
           size_t reference_tag_offset = info_.StartTag(DW_TAG_reference_type);
@@ -516,7 +527,7 @@
           // Declare the class that owns this method.
           size_t class_offset = StartClassTag(dex_class_desc);
           info_.UpdateUint32(type_attrib_offset, class_offset);
-          info_.WriteFlag(DW_AT_declaration, true);
+          info_.WriteFlagPresent(DW_AT_declaration);
           // Check that each class is defined only once.
           bool unique = owner_->defined_dex_classes_.insert(dex_class_desc).second;
           CHECK(unique) << "Redefinition of " << dex_class_desc;
@@ -542,7 +553,7 @@
         if (!is_static) {
           info_.StartTag(DW_TAG_formal_parameter);
           WriteName("this");
-          info_.WriteFlag(DW_AT_artificial, true);
+          info_.WriteFlagPresent(DW_AT_artificial);
           WriteLazyType(dex_class_desc);
           if (dex_code != nullptr) {
             // Write the stack location of the parameter.
@@ -601,25 +612,32 @@
         CHECK_EQ(info_.Depth(), start_depth);  // Balanced start/end.
       }
       if (last_dex_class_desc != nullptr) {
-        EndClassTag(last_dex_class_desc);
+        EndClassTag();
       }
-      CHECK_EQ(info_.Depth(), 1);
       FinishLazyTypes();
+      CloseNamespacesAboveDepth(0);
       info_.EndTag();  // DW_TAG_compile_unit
+      CHECK_EQ(info_.Depth(), 0);
       std::vector<uint8_t> buffer;
       buffer.reserve(info_.data()->size() + KB);
       const size_t offset = owner_->builder_->GetDebugInfo()->GetSize();
-      const size_t debug_abbrev_offset =
-          owner_->debug_abbrev_.Insert(debug_abbrev_.data(), debug_abbrev_.size());
+      // All compilation units share single table which is at the start of .debug_abbrev.
+      const size_t debug_abbrev_offset = 0;
       WriteDebugInfoCU(debug_abbrev_offset, info_, offset, &buffer, &owner_->debug_info_patches_);
       owner_->builder_->GetDebugInfo()->WriteFully(buffer.data(), buffer.size());
     }
 
     void Write(const ArrayRef<mirror::Class*>& types) SHARED_REQUIRES(Locks::mutator_lock_) {
       info_.StartTag(DW_TAG_compile_unit);
-      info_.WriteStrp(DW_AT_producer, owner_->WriteString("Android dex2oat"));
+      info_.WriteString(DW_AT_producer, "Android dex2oat");
       info_.WriteData1(DW_AT_language, DW_LANG_Java);
 
+      // Base class references to be patched at the end.
+      std::map<size_t, mirror::Class*> base_class_references;
+
+      // Already written declarations or definitions.
+      std::map<mirror::Class*, size_t> class_declarations;
+
       std::vector<uint8_t> expr_buffer;
       for (mirror::Class* type : types) {
         if (type->IsPrimitive()) {
@@ -633,6 +651,7 @@
           uint32_t data_offset = mirror::Array::DataOffset(component_size).Uint32Value();
           uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
 
+          CloseNamespacesAboveDepth(0);  // Declare in root namespace.
           info_.StartTag(DW_TAG_array_type);
           std::string descriptor_string;
           WriteLazyType(element_type->GetDescriptor(&descriptor_string));
@@ -650,22 +669,10 @@
           // Skip.  Variables cannot have an interface as a dynamic type.
           // We do not expose the interface information to the debugger in any way.
         } else {
-          // Declare base class.  We can not use the standard WriteLazyType
-          // since we want to avoid the DW_TAG_reference_tag wrapping.
-          mirror::Class* base_class = type->GetSuperClass();
-          size_t base_class_declaration_offset = 0;
-          if (base_class != nullptr) {
-            std::string tmp_storage;
-            const char* base_class_desc = base_class->GetDescriptor(&tmp_storage);
-            base_class_declaration_offset = StartClassTag(base_class_desc);
-            info_.WriteFlag(DW_AT_declaration, true);
-            WriteLinkageName(base_class);
-            EndClassTag(base_class_desc);
-          }
-
           std::string descriptor_string;
           const char* desc = type->GetDescriptor(&descriptor_string);
-          StartClassTag(desc);
+          size_t class_offset = StartClassTag(desc);
+          class_declarations.emplace(type, class_offset);
 
           if (!type->IsVariableSize()) {
             info_.WriteUdata(DW_AT_byte_size, type->GetObjectSize());
@@ -680,7 +687,7 @@
             info_.StartTag(DW_TAG_member);
             WriteName(".dynamic_type");
             WriteLazyType(sizeof(uintptr_t) == 8 ? "J" : "I");
-            info_.WriteFlag(DW_AT_artificial, true);
+            info_.WriteFlagPresent(DW_AT_artificial);
             // Create DWARF expression to get the value of the methods_ field.
             Expression expr(&expr_buffer);
             // The address of the object has been implicitly pushed on the stack.
@@ -702,9 +709,11 @@
           }
 
           // Base class.
+          mirror::Class* base_class = type->GetSuperClass();
           if (base_class != nullptr) {
             info_.StartTag(DW_TAG_inheritance);
-            info_.WriteRef4(DW_AT_type, base_class_declaration_offset);
+            base_class_references.emplace(info_.size(), base_class);
+            info_.WriteRef4(DW_AT_type, 0);
             info_.WriteUdata(DW_AT_data_member_location, 0);
             info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_public);
             info_.EndTag();  // DW_TAG_inheritance.
@@ -743,18 +752,40 @@
             info_.EndTag();  // DW_TAG_member.
           }
 
-          EndClassTag(desc);
+          EndClassTag();
         }
       }
 
-      CHECK_EQ(info_.Depth(), 1);
+      // Write base class declarations.
+      for (const auto& base_class_reference : base_class_references) {
+        size_t reference_offset = base_class_reference.first;
+        mirror::Class* base_class = base_class_reference.second;
+        const auto& it = class_declarations.find(base_class);
+        if (it != class_declarations.end()) {
+          info_.UpdateUint32(reference_offset, it->second);
+        } else {
+          // Declare base class.  We can not use the standard WriteLazyType
+          // since we want to avoid the DW_TAG_reference_tag wrapping.
+          std::string tmp_storage;
+          const char* base_class_desc = base_class->GetDescriptor(&tmp_storage);
+          size_t base_class_declaration_offset = StartClassTag(base_class_desc);
+          info_.WriteFlagPresent(DW_AT_declaration);
+          WriteLinkageName(base_class);
+          EndClassTag();
+          class_declarations.emplace(base_class, base_class_declaration_offset);
+          info_.UpdateUint32(reference_offset, base_class_declaration_offset);
+        }
+      }
+
       FinishLazyTypes();
+      CloseNamespacesAboveDepth(0);
       info_.EndTag();  // DW_TAG_compile_unit.
+      CHECK_EQ(info_.Depth(), 0);
       std::vector<uint8_t> buffer;
       buffer.reserve(info_.data()->size() + KB);
       const size_t offset = owner_->builder_->GetDebugInfo()->GetSize();
-      const size_t debug_abbrev_offset =
-          owner_->debug_abbrev_.Insert(debug_abbrev_.data(), debug_abbrev_.size());
+      // All compilation units share single table which is at the start of .debug_abbrev.
+      const size_t debug_abbrev_offset = 0;
       WriteDebugInfoCU(debug_abbrev_offset, info_, offset, &buffer, &owner_->debug_info_patches_);
       owner_->builder_->GetDebugInfo()->WriteFully(buffer.data(), buffer.size());
     }
@@ -840,10 +871,6 @@
               expr.WriteOpReg(Reg::ArmDp(value / 2).num());
               break;
             }
-            if (isa == kMips || isa == kMips64) {
-              // TODO: Find what the DWARF floating point register numbers are on MIPS.
-              break;
-            }
             expr.WriteOpReg(GetDwarfFpReg(isa, value).num());
             if (piece == 0 && reg_hi.GetKind() == Kind::kInFpuRegisterHigh &&
                 reg_hi.GetValue() == reg_lo.GetValue()) {
@@ -940,7 +967,7 @@
    private:
     void WriteName(const char* name) {
       if (name != nullptr) {
-        info_.WriteStrp(DW_AT_name, owner_->WriteString(name));
+        info_.WriteString(DW_AT_name, name);
       }
     }
 
@@ -957,8 +984,8 @@
       if (desc[0] == 'L') {
         // Class type. For example: Lpackage/name;
         size_t class_offset = StartClassTag(desc.c_str());
-        info_.WriteFlag(DW_AT_declaration, true);
-        EndClassTag(desc.c_str());
+        info_.WriteFlagPresent(DW_AT_declaration);
+        EndClassTag();
         // Reference to the class type.
         offset = info_.StartTag(DW_TAG_reference_type);
         info_.WriteRef(DW_AT_type, class_offset);
@@ -966,8 +993,9 @@
       } else if (desc[0] == '[') {
         // Array type.
         size_t element_type = WriteTypeDeclaration(desc.substr(1));
+        CloseNamespacesAboveDepth(0);  // Declare in root namespace.
         size_t array_type = info_.StartTag(DW_TAG_array_type);
-        info_.WriteFlag(DW_AT_declaration, true);
+        info_.WriteFlagPresent(DW_AT_declaration);
         info_.WriteRef(DW_AT_type, element_type);
         info_.EndTag();
         offset = info_.StartTag(DW_TAG_reference_type);
@@ -1028,6 +1056,7 @@
           LOG(FATAL) << "Unknown dex type descriptor: \"" << desc << "\"";
           UNREACHABLE();
         }
+        CloseNamespacesAboveDepth(0);  // Declare in root namespace.
         offset = info_.StartTag(DW_TAG_base_type);
         WriteName(name);
         info_.WriteData1(DW_AT_encoding, encoding);
@@ -1042,36 +1071,52 @@
     // Start DW_TAG_class_type tag nested in DW_TAG_namespace tags.
     // Returns offset of the class tag in the compilation unit.
     size_t StartClassTag(const char* desc) {
-      DCHECK(desc != nullptr && desc[0] == 'L');
-      // Enclose the type in namespace tags.
-      const char* end;
-      for (desc = desc + 1; (end = strchr(desc, '/')) != nullptr; desc = end + 1) {
-        info_.StartTag(DW_TAG_namespace);
-        WriteName(std::string(desc, end - desc).c_str());
-      }
-      // Start the class tag.
+      std::string name = SetNamespaceForClass(desc);
       size_t offset = info_.StartTag(DW_TAG_class_type);
-      end = strchr(desc, ';');
-      CHECK(end != nullptr);
-      WriteName(std::string(desc, end - desc).c_str());
+      WriteName(name.c_str());
       return offset;
     }
 
-    void EndClassTag(const char* desc) {
-      DCHECK(desc != nullptr && desc[0] == 'L');
-      // End the class tag.
+    void EndClassTag() {
       info_.EndTag();
-      // Close namespace tags.
-      const char* end;
-      for (desc = desc + 1; (end = strchr(desc, '/')) != nullptr; desc = end + 1) {
+    }
+
+    // Set the current namespace nesting to one required by the given class.
+    // Returns the class name with namespaces, 'L', and ';' stripped.
+    std::string SetNamespaceForClass(const char* desc) {
+      DCHECK(desc != nullptr && desc[0] == 'L');
+      desc++;  // Skip the initial 'L'.
+      size_t depth = 0;
+      for (const char* end; (end = strchr(desc, '/')) != nullptr; desc = end + 1, ++depth) {
+        // Check whether the name at this depth is already what we need.
+        if (depth < current_namespace_.size()) {
+          const std::string& name = current_namespace_[depth];
+          if (name.compare(0, name.size(), desc, end - desc) == 0) {
+            continue;
+          }
+        }
+        // Otherwise we need to open a new namespace tag at this depth.
+        CloseNamespacesAboveDepth(depth);
+        info_.StartTag(DW_TAG_namespace);
+        std::string name(desc, end - desc);
+        WriteName(name.c_str());
+        current_namespace_.push_back(std::move(name));
+      }
+      CloseNamespacesAboveDepth(depth);
+      return std::string(desc, strchr(desc, ';') - desc);
+    }
+
+    // Close namespace tags to reach the given nesting depth.
+    void CloseNamespacesAboveDepth(size_t depth) {
+      DCHECK_LE(depth, current_namespace_.size());
+      while (current_namespace_.size() > depth) {
         info_.EndTag();
+        current_namespace_.pop_back();
       }
     }
 
     // For access to the ELF sections.
     DebugInfoWriter<ElfTypes>* owner_;
-    // Debug abbrevs for this compilation unit only.
-    std::vector<uint8_t> debug_abbrev_;
     // Temporary buffer to create and store the entries.
     DebugInfoEntryWriter<> info_;
     // Cache of already translated type descriptors.
@@ -1079,10 +1124,14 @@
     // 32-bit references which need to be resolved to a type later.
     // Given type may be used multiple times.  Therefore we need a multimap.
     std::multimap<std::string, size_t> lazy_types_;  // type_desc -> patch_offset.
+    // The current set of open namespace tags which are active and not closed yet.
+    std::vector<std::string> current_namespace_;
   };
 
  public:
-  explicit DebugInfoWriter(ElfBuilder<ElfTypes>* builder) : builder_(builder) {
+  explicit DebugInfoWriter(ElfBuilder<ElfTypes>* builder)
+      : builder_(builder),
+        debug_abbrev_(&debug_abbrev_buffer_) {
   }
 
   void Start() {
@@ -1099,25 +1148,26 @@
     writer.Write(types);
   }
 
-  void End() {
+  void End(bool write_oat_patches) {
     builder_->GetDebugInfo()->End();
-    builder_->WritePatches(".debug_info.oat_patches",
-                           ArrayRef<const uintptr_t>(debug_info_patches_));
-    builder_->WriteSection(".debug_abbrev", &debug_abbrev_.Data());
-    builder_->WriteSection(".debug_str", &debug_str_.Data());
-    builder_->WriteSection(".debug_loc", &debug_loc_);
-    builder_->WriteSection(".debug_ranges", &debug_ranges_);
+    if (write_oat_patches) {
+      builder_->WritePatches(".debug_info.oat_patches",
+                             ArrayRef<const uintptr_t>(debug_info_patches_));
+    }
+    builder_->WriteSection(".debug_abbrev", &debug_abbrev_buffer_);
+    if (!debug_loc_.empty()) {
+      builder_->WriteSection(".debug_loc", &debug_loc_);
+    }
+    if (!debug_ranges_.empty()) {
+      builder_->WriteSection(".debug_ranges", &debug_ranges_);
+    }
   }
 
  private:
-  size_t WriteString(const char* str) {
-    return debug_str_.Insert(reinterpret_cast<const uint8_t*>(str), strlen(str) + 1);
-  }
-
   ElfBuilder<ElfTypes>* builder_;
   std::vector<uintptr_t> debug_info_patches_;
-  DedupVector debug_abbrev_;
-  DedupVector debug_str_;
+  std::vector<uint8_t> debug_abbrev_buffer_;
+  DebugAbbrevWriter<> debug_abbrev_;
   std::vector<uint8_t> debug_loc_;
   std::vector<uint8_t> debug_ranges_;
 
@@ -1313,10 +1363,12 @@
     return buffer.size();
   }
 
-  void End() {
+  void End(bool write_oat_patches) {
     builder_->GetDebugLine()->End();
-    builder_->WritePatches(".debug_line.oat_patches",
-                           ArrayRef<const uintptr_t>(debug_line_patches));
+    if (write_oat_patches) {
+      builder_->WritePatches(".debug_line.oat_patches",
+                             ArrayRef<const uintptr_t>(debug_line_patches));
+    }
   }
 
  private:
@@ -1326,7 +1378,8 @@
 
 template<typename ElfTypes>
 static void WriteDebugSections(ElfBuilder<ElfTypes>* builder,
-                               const ArrayRef<const MethodDebugInfo>& method_infos) {
+                               const ArrayRef<const MethodDebugInfo>& method_infos,
+                               bool write_oat_patches) {
   // Group the methods into compilation units based on source file.
   std::vector<CompilationUnit> compilation_units;
   const char* last_source_file = nullptr;
@@ -1350,7 +1403,7 @@
     for (auto& compilation_unit : compilation_units) {
       line_writer.WriteCompilationUnit(compilation_unit);
     }
-    line_writer.End();
+    line_writer.End(write_oat_patches);
   }
 
   // Write .debug_info section.
@@ -1360,7 +1413,7 @@
     for (const auto& compilation_unit : compilation_units) {
       info_writer.WriteCompilationUnit(compilation_unit);
     }
-    info_writer.End();
+    info_writer.End(write_oat_patches);
   }
 }
 
@@ -1440,13 +1493,14 @@
 template <typename ElfTypes>
 void WriteDebugInfo(ElfBuilder<ElfTypes>* builder,
                     const ArrayRef<const MethodDebugInfo>& method_infos,
-                    CFIFormat cfi_format) {
+                    CFIFormat cfi_format,
+                    bool write_oat_patches) {
   // Add methods to .symtab.
   WriteDebugSymbols(builder, method_infos, true /* with_signature */);
   // Generate CFI (stack unwinding information).
-  WriteCFISection(builder, method_infos, cfi_format, true /* write_oat_patches */);
+  WriteCFISection(builder, method_infos, cfi_format, write_oat_patches);
   // Write DWARF .debug_* sections.
-  WriteDebugSections(builder, method_infos);
+  WriteDebugSections(builder, method_infos, write_oat_patches);
 }
 
 static void XzCompress(const std::vector<uint8_t>* src, std::vector<uint8_t>* dst) {
@@ -1495,20 +1549,20 @@
 }
 
 template <typename ElfTypes>
-void WriteMiniDebugInfo(ElfBuilder<ElfTypes>* parent_builder,
-                        const ArrayRef<const MethodDebugInfo>& method_infos) {
-  const InstructionSet isa = parent_builder->GetIsa();
+std::vector<uint8_t> MakeMiniDebugInfoInternal(
+    InstructionSet isa,
+    size_t rodata_section_size,
+    size_t text_section_size,
+    const ArrayRef<const MethodDebugInfo>& method_infos) {
   std::vector<uint8_t> buffer;
   buffer.reserve(KB);
   VectorOutputStream out("Mini-debug-info ELF file", &buffer);
   std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, &out));
   builder->Start();
-  // Write .rodata and .text as NOBITS sections.
-  // This allows tools to detect virtual address relocation of the parent ELF file.
-  builder->SetVirtualAddress(parent_builder->GetRoData()->GetAddress());
-  builder->GetRoData()->WriteNoBitsSection(parent_builder->GetRoData()->GetSize());
-  builder->SetVirtualAddress(parent_builder->GetText()->GetAddress());
-  builder->GetText()->WriteNoBitsSection(parent_builder->GetText()->GetSize());
+  // Mirror .rodata and .text as NOBITS sections.
+  // It is needed to detected relocations after compression.
+  builder->GetRoData()->WriteNoBitsSection(rodata_section_size);
+  builder->GetText()->WriteNoBitsSection(text_section_size);
   WriteDebugSymbols(builder.get(), method_infos, false /* with_signature */);
   WriteCFISection(builder.get(), method_infos, DW_DEBUG_FRAME_FORMAT, false /* write_oat_paches */);
   builder->End();
@@ -1516,7 +1570,19 @@
   std::vector<uint8_t> compressed_buffer;
   compressed_buffer.reserve(buffer.size() / 4);
   XzCompress(&buffer, &compressed_buffer);
-  parent_builder->WriteSection(".gnu_debugdata", &compressed_buffer);
+  return compressed_buffer;
+}
+
+std::vector<uint8_t> MakeMiniDebugInfo(
+    InstructionSet isa,
+    size_t rodata_size,
+    size_t text_size,
+    const ArrayRef<const MethodDebugInfo>& method_infos) {
+  if (Is64BitInstructionSet(isa)) {
+    return MakeMiniDebugInfoInternal<ElfTypes64>(isa, rodata_size, text_size, method_infos);
+  } else {
+    return MakeMiniDebugInfoInternal<ElfTypes32>(isa, rodata_size, text_size, method_infos);
+  }
 }
 
 template <typename ElfTypes>
@@ -1527,10 +1593,12 @@
   buffer.reserve(KB);
   VectorOutputStream out("Debug ELF file", &buffer);
   std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, &out));
-  builder->Start();
+  // No program headers since the ELF file is not linked and has no allocated sections.
+  builder->Start(false /* write_program_headers */);
   WriteDebugInfo(builder.get(),
                  ArrayRef<const MethodDebugInfo>(&method_info, 1),
-                 DW_DEBUG_FRAME_FORMAT);
+                 DW_DEBUG_FRAME_FORMAT,
+                 false /* write_oat_patches */);
   builder->End();
   CHECK(builder->Good());
   // Make a copy of the buffer.  We want to shrink it anyway.
@@ -1557,12 +1625,12 @@
   buffer.reserve(KB);
   VectorOutputStream out("Debug ELF file", &buffer);
   std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, &out));
-  builder->Start();
-
+  // No program headers since the ELF file is not linked and has no allocated sections.
+  builder->Start(false /* write_program_headers */);
   DebugInfoWriter<ElfTypes> info_writer(builder.get());
   info_writer.Start();
   info_writer.WriteTypes(types);
-  info_writer.End();
+  info_writer.End(false /* write_oat_patches */);
 
   builder->End();
   CHECK(builder->Good());
@@ -1586,17 +1654,13 @@
 template void WriteDebugInfo<ElfTypes32>(
     ElfBuilder<ElfTypes32>* builder,
     const ArrayRef<const MethodDebugInfo>& method_infos,
-    CFIFormat cfi_format);
+    CFIFormat cfi_format,
+    bool write_oat_patches);
 template void WriteDebugInfo<ElfTypes64>(
     ElfBuilder<ElfTypes64>* builder,
     const ArrayRef<const MethodDebugInfo>& method_infos,
-    CFIFormat cfi_format);
-template void WriteMiniDebugInfo<ElfTypes32>(
-    ElfBuilder<ElfTypes32>* builder,
-    const ArrayRef<const MethodDebugInfo>& method_infos);
-template void WriteMiniDebugInfo<ElfTypes64>(
-    ElfBuilder<ElfTypes64>* builder,
-    const ArrayRef<const MethodDebugInfo>& method_infos);
+    CFIFormat cfi_format,
+    bool write_oat_patches);
 
 }  // namespace dwarf
 }  // namespace art
diff --git a/compiler/elf_writer_debug.h b/compiler/elf_writer_debug.h
index e19da08..e289197 100644
--- a/compiler/elf_writer_debug.h
+++ b/compiler/elf_writer_debug.h
@@ -33,11 +33,13 @@
 template <typename ElfTypes>
 void WriteDebugInfo(ElfBuilder<ElfTypes>* builder,
                     const ArrayRef<const MethodDebugInfo>& method_infos,
-                    CFIFormat cfi_format);
+                    CFIFormat cfi_format,
+                    bool write_oat_patches);
 
-template <typename ElfTypes>
-void WriteMiniDebugInfo(ElfBuilder<ElfTypes>* builder,
-                        const ArrayRef<const MethodDebugInfo>& method_infos);
+std::vector<uint8_t> MakeMiniDebugInfo(InstructionSet isa,
+                                       size_t rodata_section_size,
+                                       size_t text_section_size,
+                                       const ArrayRef<const MethodDebugInfo>& method_infos);
 
 ArrayRef<const uint8_t> WriteDebugElfFileForMethod(const dwarf::MethodDebugInfo& method_info);
 
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 6bf080a..864e1b1 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -33,6 +33,8 @@
 #include "leb128.h"
 #include "linker/buffered_output_stream.h"
 #include "linker/file_output_stream.h"
+#include "thread-inl.h"
+#include "thread_pool.h"
 #include "utils.h"
 
 namespace art {
@@ -46,6 +48,37 @@
 // Let's use .debug_frame because it is easier to strip or compress.
 constexpr dwarf::CFIFormat kCFIFormat = dwarf::DW_DEBUG_FRAME_FORMAT;
 
+class DebugInfoTask : public Task {
+ public:
+  DebugInfoTask(InstructionSet isa,
+                size_t rodata_section_size,
+                size_t text_section_size,
+                const ArrayRef<const dwarf::MethodDebugInfo>& method_infos)
+      : isa_(isa),
+        rodata_section_size_(rodata_section_size),
+        text_section_size_(text_section_size),
+        method_infos_(method_infos) {
+  }
+
+  void Run(Thread*) {
+    result_ = dwarf::MakeMiniDebugInfo(isa_,
+                                       rodata_section_size_,
+                                       text_section_size_,
+                                       method_infos_);
+  }
+
+  std::vector<uint8_t>* GetResult() {
+    return &result_;
+  }
+
+ private:
+  InstructionSet isa_;
+  size_t rodata_section_size_;
+  size_t text_section_size_;
+  const ArrayRef<const dwarf::MethodDebugInfo>& method_infos_;
+  std::vector<uint8_t> result_;
+};
+
 template <typename ElfTypes>
 class ElfWriterQuick FINAL : public ElfWriter {
  public:
@@ -55,6 +88,9 @@
   ~ElfWriterQuick();
 
   void Start() OVERRIDE;
+  void PrepareDebugInfo(size_t rodata_section_size,
+                        size_t text_section_size,
+                        const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) OVERRIDE;
   OutputStream* StartRoData() OVERRIDE;
   void EndRoData(OutputStream* rodata) OVERRIDE;
   OutputStream* StartText() OVERRIDE;
@@ -75,6 +111,8 @@
   File* const elf_file_;
   std::unique_ptr<BufferedOutputStream> output_stream_;
   std::unique_ptr<ElfBuilder<ElfTypes>> builder_;
+  std::unique_ptr<DebugInfoTask> debug_info_task_;
+  std::unique_ptr<ThreadPool> debug_info_thread_pool_;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(ElfWriterQuick);
 };
@@ -147,15 +185,40 @@
 }
 
 template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::PrepareDebugInfo(
+    size_t rodata_section_size,
+    size_t text_section_size,
+    const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) {
+  if (!method_infos.empty() && compiler_options_->GetGenerateMiniDebugInfo()) {
+    // Prepare the mini-debug-info in background while we do other I/O.
+    Thread* self = Thread::Current();
+    debug_info_task_ = std::unique_ptr<DebugInfoTask>(
+        new DebugInfoTask(builder_->GetIsa(),
+                          rodata_section_size,
+                          text_section_size,
+                          method_infos));
+    debug_info_thread_pool_ = std::unique_ptr<ThreadPool>(
+        new ThreadPool("Mini-debug-info writer", 1));
+    debug_info_thread_pool_->AddTask(self, debug_info_task_.get());
+    debug_info_thread_pool_->StartWorkers(self);
+  }
+}
+
+template <typename ElfTypes>
 void ElfWriterQuick<ElfTypes>::WriteDebugInfo(
     const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) {
-  if (compiler_options_->GetGenerateDebugInfo()) {
-    // Generate all the debug information we can.
-    dwarf::WriteDebugInfo(builder_.get(), method_infos, kCFIFormat);
-  }
-  if (compiler_options_->GetGenerateMiniDebugInfo()) {
-    // Generate only some information and compress it.
-    dwarf::WriteMiniDebugInfo(builder_.get(), method_infos);
+  if (!method_infos.empty()) {
+    if (compiler_options_->GetGenerateDebugInfo()) {
+      // Generate all the debug information we can.
+      dwarf::WriteDebugInfo(builder_.get(), method_infos, kCFIFormat, true /* write_oat_patches */);
+    }
+    if (compiler_options_->GetGenerateMiniDebugInfo()) {
+      // Wait for the mini-debug-info generation to finish and write it to disk.
+      Thread* self = Thread::Current();
+      DCHECK(debug_info_thread_pool_ != nullptr);
+      debug_info_thread_pool_->Wait(self, true, false);
+      builder_->WriteSection(".gnu_debugdata", debug_info_task_->GetResult());
+    }
   }
 }
 
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 60dfcfb..73574ba 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -714,7 +714,7 @@
 
 class ComputeLazyFieldsForClassesVisitor : public ClassVisitor {
  public:
-  bool Visit(Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool operator()(Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     StackHandleScope<1> hs(Thread::Current());
     mirror::Class::ComputeName(hs.NewHandle(c));
     return true;
@@ -852,7 +852,7 @@
  public:
   explicit NonImageClassesVisitor(ImageWriter* image_writer) : image_writer_(image_writer) {}
 
-  bool Visit(Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool operator()(Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     if (!image_writer_->KeepClass(klass)) {
       classes_to_prune_.insert(klass);
     }
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 6774758..68f4783 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -60,11 +60,12 @@
   delete reinterpret_cast<JitCompiler*>(handle);
 }
 
-extern "C" bool jit_compile_method(void* handle, ArtMethod* method, Thread* self)
+extern "C" bool jit_compile_method(
+    void* handle, ArtMethod* method, Thread* self, bool osr)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   auto* jit_compiler = reinterpret_cast<JitCompiler*>(handle);
   DCHECK(jit_compiler != nullptr);
-  return jit_compiler->CompileMethod(self, method);
+  return jit_compiler->CompileMethod(self, method, osr);
 }
 
 extern "C" void jit_types_loaded(void* handle, mirror::Class** types, size_t count)
@@ -201,7 +202,7 @@
   }
 }
 
-bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method) {
+bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method, bool osr) {
   TimingLogger logger("JIT compiler timing logger", true, VLOG_IS_ON(jit));
   const uint64_t start_time = NanoTime();
   StackHandleScope<2> hs(self);
@@ -223,8 +224,8 @@
     // of that proxy method, as the compiler does not expect a proxy method.
     ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(sizeof(void*));
     JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache();
-    success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method_to_compile);
-    if (success && perf_file_ != nullptr) {
+    success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method_to_compile, osr);
+    if (success && (perf_file_ != nullptr)) {
       const void* ptr = method_to_compile->GetEntryPointFromQuickCompiledCode();
       std::ostringstream stream;
       stream << std::hex
diff --git a/compiler/jit/jit_compiler.h b/compiler/jit/jit_compiler.h
index 037a18a..5294d0e 100644
--- a/compiler/jit/jit_compiler.h
+++ b/compiler/jit/jit_compiler.h
@@ -37,7 +37,7 @@
  public:
   static JitCompiler* Create();
   virtual ~JitCompiler();
-  bool CompileMethod(Thread* self, ArtMethod* method)
+  bool CompileMethod(Thread* self, ArtMethod* method, bool osr)
       SHARED_REQUIRES(Locks::mutator_lock_);
   CompilerCallbacks* GetCompilerCallbacks() const;
   size_t GetTotalCompileTime() const {
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index eee6116..c307522 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -1227,27 +1227,28 @@
     InductionVarRange::Value v1;
     InductionVarRange::Value v2;
     bool needs_finite_test = false;
-    induction_range_.GetInductionRange(context, index, &v1, &v2, &needs_finite_test);
-    do {
-      if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) &&
-          v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) {
-        DCHECK(v1.a_constant == 1 || v1.instruction == nullptr);
-        DCHECK(v2.a_constant == 1 || v2.instruction == nullptr);
-        ValueRange index_range(GetGraph()->GetArena(),
-                               ValueBound(v1.instruction, v1.b_constant),
-                               ValueBound(v2.instruction, v2.b_constant));
-        // If analysis reveals a certain OOB, disable dynamic BCE.
-        if (index_range.GetLower().LessThan(array_range->GetLower()) ||
-            index_range.GetUpper().GreaterThan(array_range->GetUpper())) {
-          *try_dynamic_bce = false;
-          return false;
+    if (induction_range_.GetInductionRange(context, index, &v1, &v2, &needs_finite_test)) {
+      do {
+        if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) &&
+            v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) {
+          DCHECK(v1.a_constant == 1 || v1.instruction == nullptr);
+          DCHECK(v2.a_constant == 1 || v2.instruction == nullptr);
+          ValueRange index_range(GetGraph()->GetArena(),
+                                 ValueBound(v1.instruction, v1.b_constant),
+                                 ValueBound(v2.instruction, v2.b_constant));
+          // If analysis reveals a certain OOB, disable dynamic BCE.
+          if (index_range.GetLower().LessThan(array_range->GetLower()) ||
+              index_range.GetUpper().GreaterThan(array_range->GetUpper())) {
+            *try_dynamic_bce = false;
+            return false;
+          }
+          // Use analysis for static bce only if loop is finite.
+          if (!needs_finite_test && index_range.FitsIn(array_range)) {
+            return true;
+          }
         }
-        // Use analysis for static bce only if loop is finite.
-        if (!needs_finite_test && index_range.FitsIn(array_range)) {
-          return true;
-        }
-      }
-    } while (induction_range_.RefineOuter(&v1, &v2));
+      } while (induction_range_.RefineOuter(&v1, &v2));
+    }
     return false;
   }
 
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index c7430e7..8d77daf 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -72,74 +72,6 @@
   size_t index_;
 };
 
-class SwitchTable : public ValueObject {
- public:
-  SwitchTable(const Instruction& instruction, uint32_t dex_pc, bool sparse)
-      : instruction_(instruction), dex_pc_(dex_pc), sparse_(sparse) {
-    int32_t table_offset = instruction.VRegB_31t();
-    const uint16_t* table = reinterpret_cast<const uint16_t*>(&instruction) + table_offset;
-    if (sparse) {
-      CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kSparseSwitchSignature));
-    } else {
-      CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kPackedSwitchSignature));
-    }
-    num_entries_ = table[1];
-    values_ = reinterpret_cast<const int32_t*>(&table[2]);
-  }
-
-  uint16_t GetNumEntries() const {
-    return num_entries_;
-  }
-
-  void CheckIndex(size_t index) const {
-    if (sparse_) {
-      // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
-      DCHECK_LT(index, 2 * static_cast<size_t>(num_entries_));
-    } else {
-      // In a packed table, we have the starting key and num_entries_ values.
-      DCHECK_LT(index, 1 + static_cast<size_t>(num_entries_));
-    }
-  }
-
-  int32_t GetEntryAt(size_t index) const {
-    CheckIndex(index);
-    return values_[index];
-  }
-
-  uint32_t GetDexPcForIndex(size_t index) const {
-    CheckIndex(index);
-    return dex_pc_ +
-        (reinterpret_cast<const int16_t*>(values_ + index) -
-         reinterpret_cast<const int16_t*>(&instruction_));
-  }
-
-  // Index of the first value in the table.
-  size_t GetFirstValueIndex() const {
-    if (sparse_) {
-      // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
-      return num_entries_;
-    } else {
-      // In a packed table, we have the starting key and num_entries_ values.
-      return 1;
-    }
-  }
-
- private:
-  const Instruction& instruction_;
-  const uint32_t dex_pc_;
-
-  // Whether this is a sparse-switch table (or a packed-switch one).
-  const bool sparse_;
-
-  // This can't be const as it needs to be computed off of the given instruction, and complicated
-  // expressions in the initializer list seemed very ugly.
-  uint16_t num_entries_;
-
-  const int32_t* values_;
-
-  DISALLOW_COPY_AND_ASSIGN(SwitchTable);
-};
-
 void HGraphBuilder::InitializeLocals(uint16_t count) {
   graph_->SetNumberOfVRegs(count);
   locals_.resize(count);
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 1d604e7..93e17d6 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -30,7 +30,6 @@
 namespace art {
 
 class Instruction;
-class SwitchTable;
 
 class HGraphBuilder : public ValueObject {
  public:
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index a3bbfdb..e1b83f0 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -629,8 +629,76 @@
   return stack_map_stream_.PrepareForFillIn();
 }
 
-void CodeGenerator::BuildStackMaps(MemoryRegion region) {
+static void CheckCovers(uint32_t dex_pc,
+                        const HGraph& graph,
+                        const CodeInfo& code_info,
+                        const ArenaVector<HSuspendCheck*>& loop_headers,
+                        ArenaVector<size_t>* covered) {
+  StackMapEncoding encoding = code_info.ExtractEncoding();
+  for (size_t i = 0; i < loop_headers.size(); ++i) {
+    if (loop_headers[i]->GetDexPc() == dex_pc) {
+      if (graph.IsCompilingOsr()) {
+        DCHECK(code_info.GetOsrStackMapForDexPc(dex_pc, encoding).IsValid());
+      }
+      ++(*covered)[i];
+    }
+  }
+}
+
+// Debug helper to ensure loop entries in compiled code are matched by
+// dex branch instructions.
+static void CheckLoopEntriesCanBeUsedForOsr(const HGraph& graph,
+                                            const CodeInfo& code_info,
+                                            const DexFile::CodeItem& code_item) {
+  if (graph.HasTryCatch()) {
+    // One can write loops through try/catch, which we do not support for OSR anyway.
+    return;
+  }
+  ArenaVector<HSuspendCheck*> loop_headers(graph.GetArena()->Adapter(kArenaAllocMisc));
+  for (HReversePostOrderIterator it(graph); !it.Done(); it.Advance()) {
+    if (it.Current()->IsLoopHeader()) {
+      HSuspendCheck* suspend_check = it.Current()->GetLoopInformation()->GetSuspendCheck();
+      if (!suspend_check->GetEnvironment()->IsFromInlinedInvoke()) {
+        loop_headers.push_back(suspend_check);
+      }
+    }
+  }
+  ArenaVector<size_t> covered(loop_headers.size(), 0, graph.GetArena()->Adapter(kArenaAllocMisc));
+  const uint16_t* code_ptr = code_item.insns_;
+  const uint16_t* code_end = code_item.insns_ + code_item.insns_size_in_code_units_;
+
+  size_t dex_pc = 0;
+  while (code_ptr < code_end) {
+    const Instruction& instruction = *Instruction::At(code_ptr);
+    if (instruction.IsBranch()) {
+      uint32_t target = dex_pc + instruction.GetTargetOffset();
+      CheckCovers(target, graph, code_info, loop_headers, &covered);
+    } else if (instruction.IsSwitch()) {
+      SwitchTable table(instruction, dex_pc, instruction.Opcode() == Instruction::SPARSE_SWITCH);
+      uint16_t num_entries = table.GetNumEntries();
+      size_t offset = table.GetFirstValueIndex();
+
+      // Use a larger loop counter type to avoid overflow issues.
+      for (size_t i = 0; i < num_entries; ++i) {
+        // The target of the case.
+        uint32_t target = dex_pc + table.GetEntryAt(i + offset);
+        CheckCovers(target, graph, code_info, loop_headers, &covered);
+      }
+    }
+    dex_pc += instruction.SizeInCodeUnits();
+    code_ptr += instruction.SizeInCodeUnits();
+  }
+
+  for (size_t i = 0; i < covered.size(); ++i) {
+    DCHECK_NE(covered[i], 0u) << "Loop in compiled code has no dex branch equivalent";
+  }
+}
+
+void CodeGenerator::BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item) {
   stack_map_stream_.FillIn(region);
+  if (kIsDebugBuild) {
+    CheckLoopEntriesCanBeUsedForOsr(*graph_, CodeInfo(region), code_item);
+  }
 }
 
 void CodeGenerator::RecordPcInfo(HInstruction* instruction,
@@ -705,6 +773,46 @@
 
   EmitEnvironment(instruction->GetEnvironment(), slow_path);
   stack_map_stream_.EndStackMapEntry();
+
+  HLoopInformation* info = instruction->GetBlock()->GetLoopInformation();
+  if (instruction->IsSuspendCheck() &&
+      (info != nullptr) &&
+      graph_->IsCompilingOsr() &&
+      (inlining_depth == 0)) {
+    DCHECK_EQ(info->GetSuspendCheck(), instruction);
+    // We duplicate the stack map as a marker that this stack map can be an OSR entry.
+    // Duplicating it avoids having the runtime recognize and skip an OSR stack map.
+    DCHECK(info->IsIrreducible());
+    stack_map_stream_.BeginStackMapEntry(
+        dex_pc, native_pc, register_mask, locations->GetStackMask(), outer_environment_size, 0);
+    EmitEnvironment(instruction->GetEnvironment(), slow_path);
+    stack_map_stream_.EndStackMapEntry();
+    if (kIsDebugBuild) {
+      HEnvironment* environment = instruction->GetEnvironment();
+      for (size_t i = 0, environment_size = environment->Size(); i < environment_size; ++i) {
+        HInstruction* in_environment = environment->GetInstructionAt(i);
+        if (in_environment != nullptr) {
+          DCHECK(in_environment->IsPhi() || in_environment->IsConstant());
+          Location location = environment->GetLocationAt(i);
+          DCHECK(location.IsStackSlot() ||
+                 location.IsDoubleStackSlot() ||
+                 location.IsConstant() ||
+                 location.IsInvalid());
+          if (location.IsStackSlot() || location.IsDoubleStackSlot()) {
+            DCHECK_LT(location.GetStackIndex(), static_cast<int32_t>(GetFrameSize()));
+          }
+        }
+      }
+    }
+  } else if (kIsDebugBuild) {
+    // Ensure stack maps are unique, by checking that the native pc in the stack map
+    // last emitted is different than the native pc of the stack map just emitted.
+    size_t number_of_stack_maps = stack_map_stream_.GetNumberOfStackMaps();
+    if (number_of_stack_maps > 1) {
+      DCHECK_NE(stack_map_stream_.GetStackMap(number_of_stack_maps - 1).native_pc_offset,
+                stack_map_stream_.GetStackMap(number_of_stack_maps - 2).native_pc_offset);
+    }
+  }
 }
 
 bool CodeGenerator::HasStackMapAtCurrentPc() {
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 4f8f146..0a688cf 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -288,7 +288,7 @@
     slow_paths_.push_back(slow_path);
   }
 
-  void BuildStackMaps(MemoryRegion region);
+  void BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item);
   size_t ComputeStackMapsSize();
 
   bool IsLeafMethod() const {
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index c2d9edd..e434932 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -3750,6 +3750,7 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
   switch (compare->InputAt(0)->GetType()) {
+    case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RequiresRegister());
@@ -3779,6 +3780,13 @@
   Primitive::Type type = compare->InputAt(0)->GetType();
   Condition less_cond;
   switch (type) {
+    case Primitive::kPrimInt: {
+      __ LoadImmediate(out, 0);
+      __ cmp(left.AsRegister<Register>(),
+             ShifterOperand(right.AsRegister<Register>()));  // Signed compare.
+      less_cond = LT;
+      break;
+    }
     case Primitive::kPrimLong: {
       __ cmp(left.AsRegisterPairHigh<Register>(),
              ShifterOperand(right.AsRegisterPairHigh<Register>()));  // Signed compare.
@@ -3808,6 +3816,7 @@
       LOG(FATAL) << "Unexpected compare type " << type;
       UNREACHABLE();
   }
+
   __ b(&done, EQ);
   __ b(&less, less_cond);
 
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index a59024e..e20e044 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1556,21 +1556,13 @@
   UseScratchRegisterScope temps(GetVIXLAssembler());
   Register temp = temps.AcquireW();
   size_t status_offset = mirror::Class::StatusOffset().SizeValue();
-  bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
 
   // Even if the initialized flag is set, we need to ensure consistent memory ordering.
-  if (use_acquire_release) {
-    // TODO(vixl): Let the MacroAssembler handle MemOperand.
-    __ Add(temp, class_reg, status_offset);
-    __ Ldar(temp, HeapOperand(temp));
-    __ Cmp(temp, mirror::Class::kStatusInitialized);
-    __ B(lt, slow_path->GetEntryLabel());
-  } else {
-    __ Ldr(temp, HeapOperand(class_reg, status_offset));
-    __ Cmp(temp, mirror::Class::kStatusInitialized);
-    __ B(lt, slow_path->GetEntryLabel());
-    __ Dmb(InnerShareable, BarrierReads);
-  }
+  // TODO(vixl): Let the MacroAssembler handle MemOperand.
+  __ Add(temp, class_reg, status_offset);
+  __ Ldar(temp, HeapOperand(temp));
+  __ Cmp(temp, mirror::Class::kStatusInitialized);
+  __ B(lt, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -1716,9 +1708,7 @@
   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
   Primitive::Type field_type = field_info.GetFieldType();
   BlockPoolsScope block_pools(GetVIXLAssembler());
-
   MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset());
-  bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
 
   if (field_type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
     // Object FieldGet with Baker's read barrier case.
@@ -1736,26 +1726,15 @@
         offset,
         temp,
         /* needs_null_check */ true,
-        field_info.IsVolatile() && use_acquire_release);
-    if (field_info.IsVolatile() && !use_acquire_release) {
-      // For IRIW sequential consistency kLoadAny is not sufficient.
-      codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
-    }
+        field_info.IsVolatile());
   } else {
     // General case.
     if (field_info.IsVolatile()) {
-      if (use_acquire_release) {
-        // Note that a potential implicit null check is handled in this
-        // CodeGeneratorARM64::LoadAcquire call.
-        // NB: LoadAcquire will record the pc info if needed.
-        codegen_->LoadAcquire(
-            instruction, OutputCPURegister(instruction), field, /* needs_null_check */ true);
-      } else {
-        codegen_->Load(field_type, OutputCPURegister(instruction), field);
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
-        // For IRIW sequential consistency kLoadAny is not sufficient.
-        codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
-      }
+      // Note that a potential implicit null check is handled in this
+      // CodeGeneratorARM64::LoadAcquire call.
+      // NB: LoadAcquire will record the pc info if needed.
+      codegen_->LoadAcquire(
+          instruction, OutputCPURegister(instruction), field, /* needs_null_check */ true);
     } else {
       codegen_->Load(field_type, OutputCPURegister(instruction), field);
       codegen_->MaybeRecordImplicitNullCheck(instruction);
@@ -1791,7 +1770,6 @@
   CPURegister source = value;
   Offset offset = field_info.GetFieldOffset();
   Primitive::Type field_type = field_info.GetFieldType();
-  bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
 
   {
     // We use a block to end the scratch scope before the write barrier, thus
@@ -1807,15 +1785,8 @@
     }
 
     if (field_info.IsVolatile()) {
-      if (use_acquire_release) {
-        codegen_->StoreRelease(field_type, source, HeapOperand(obj, offset));
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
-      } else {
-        codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
-        codegen_->Store(field_type, source, HeapOperand(obj, offset));
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
-        codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
-      }
+      codegen_->StoreRelease(field_type, source, HeapOperand(obj, offset));
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
     } else {
       codegen_->Store(field_type, source, HeapOperand(obj, offset));
       codegen_->MaybeRecordImplicitNullCheck(instruction);
@@ -2437,6 +2408,7 @@
       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
   Primitive::Type in_type = compare->InputAt(0)->GetType();
   switch (in_type) {
+    case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, ARM64EncodableConstantOrRegister(compare->InputAt(1), compare));
@@ -2465,14 +2437,14 @@
   //  1 if: left  > right
   // -1 if: left  < right
   switch (in_type) {
+    case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
       Register result = OutputRegister(compare);
       Register left = InputRegisterAt(compare, 0);
       Operand right = InputOperandAt(compare, 1);
-
       __ Cmp(left, right);
-      __ Cset(result, ne);
-      __ Cneg(result, result, lt);
+      __ Cset(result, ne);          // result == +1 if NE or 0 otherwise
+      __ Cneg(result, result, lt);  // result == -1 if LT or unchanged otherwise
       break;
     }
     case Primitive::kPrimFloat:
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index fa119bb..e9c0b6a 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -1132,11 +1132,11 @@
 }
 
 void CodeGeneratorMIPS::DumpCoreRegister(std::ostream& stream, int reg) const {
-  stream << MipsManagedRegister::FromCoreRegister(Register(reg));
+  stream << Register(reg);
 }
 
 void CodeGeneratorMIPS::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
-  stream << MipsManagedRegister::FromFRegister(FRegister(reg));
+  stream << FRegister(reg);
 }
 
 void CodeGeneratorMIPS::InvokeRuntime(QuickEntrypointEnum entrypoint,
@@ -2123,6 +2123,7 @@
       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
 
   switch (in_type) {
+    case Primitive::kPrimInt:
     case Primitive::kPrimLong:
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RequiresRegister());
@@ -2153,6 +2154,14 @@
   //  1 if: left  > right
   // -1 if: left  < right
   switch (in_type) {
+    case Primitive::kPrimInt: {
+      Register lhs = locations->InAt(0).AsRegister<Register>();
+      Register rhs = locations->InAt(1).AsRegister<Register>();
+      __ Slt(TMP, lhs, rhs);
+      __ Slt(res, rhs, lhs);
+      __ Subu(res, res, TMP);
+      break;
+    }
     case Primitive::kPrimLong: {
       MipsLabel done;
       Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>();
@@ -5287,12 +5296,27 @@
   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
 }
 
-void LocationsBuilderMIPS::VisitClassTableGet(HClassTableGet*) {
-  UNIMPLEMENTED(FATAL) << "ClassTableGet is unimplemented on mips";
+void LocationsBuilderMIPS::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
 }
 
-void InstructionCodeGeneratorMIPS::VisitClassTableGet(HClassTableGet*) {
-  UNIMPLEMENTED(FATAL) << "ClassTableGet is unimplemented on mips";
+void InstructionCodeGeneratorMIPS::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  uint32_t method_offset = 0;
+  if (instruction->GetTableKind() == HClassTableGet::kVTable) {
+    method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+        instruction->GetIndex(), kMipsPointerSize).SizeValue();
+  } else {
+    method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+        instruction->GetIndex() % mirror::Class::kImtSize, kMipsPointerSize).Uint32Value();
+  }
+  __ LoadFromOffset(kLoadWord,
+                    locations->Out().AsRegister<Register>(),
+                    locations->InAt(0).AsRegister<Register>(),
+                    method_offset);
 }
 
 #undef __
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 3c928de..da98a89 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -106,7 +106,7 @@
 }
 
 #define __ down_cast<CodeGeneratorMIPS64*>(codegen)->GetAssembler()->
-#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, x).Int32Value()
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, x).Int32Value()
 
 class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
  public:
@@ -437,7 +437,7 @@
 
 #undef __
 #define __ down_cast<Mips64Assembler*>(GetAssembler())->
-#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, x).Int32Value()
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, x).Int32Value()
 
 void CodeGeneratorMIPS64::Finalize(CodeAllocator* allocator) {
   // Ensure that we fix up branches.
@@ -486,12 +486,12 @@
 void ParallelMoveResolverMIPS64::RestoreScratch(int reg) {
   // Pop reg
   __ Ld(GpuRegister(reg), SP, 0);
-  __ DecreaseFrameSize(kMips64WordSize);
+  __ DecreaseFrameSize(kMips64DoublewordSize);
 }
 
 void ParallelMoveResolverMIPS64::SpillScratch(int reg) {
   // Push reg
-  __ IncreaseFrameSize(kMips64WordSize);
+  __ IncreaseFrameSize(kMips64DoublewordSize);
   __ Sd(GpuRegister(reg), SP, 0);
 }
 
@@ -503,7 +503,7 @@
   // automatically unspilled when the scratch scope object is destroyed).
   ScratchRegisterScope ensure_scratch(this, TMP, V0, codegen_->GetNumberOfCoreRegisters());
   // If V0 spills onto the stack, SP-relative offsets need to be adjusted.
-  int stack_offset = ensure_scratch.IsSpilled() ? kMips64WordSize : 0;
+  int stack_offset = ensure_scratch.IsSpilled() ? kMips64DoublewordSize : 0;
   __ LoadFromOffset(load_type,
                     GpuRegister(ensure_scratch.GetRegister()),
                     SP,
@@ -523,7 +523,9 @@
   return dwarf::Reg::Mips64Core(static_cast<int>(reg));
 }
 
-// TODO: mapping of floating-point registers to DWARF
+static dwarf::Reg DWARFReg(FpuRegister reg) {
+  return dwarf::Reg::Mips64Fp(static_cast<int>(reg));
+}
 
 void CodeGeneratorMIPS64::GenerateFrameEntry() {
   __ Bind(&frame_entry_label_);
@@ -562,7 +564,7 @@
   for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
     GpuRegister reg = kCoreCalleeSaves[i];
     if (allocated_registers_.ContainsCoreRegister(reg)) {
-      ofs -= kMips64WordSize;
+      ofs -= kMips64DoublewordSize;
       __ Sd(reg, SP, ofs);
       __ cfi().RelOffset(DWARFReg(reg), ofs);
     }
@@ -571,9 +573,9 @@
   for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
     FpuRegister reg = kFpuCalleeSaves[i];
     if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
-      ofs -= kMips64WordSize;
+      ofs -= kMips64DoublewordSize;
       __ Sdc1(reg, SP, ofs);
-      // TODO: __ cfi().RelOffset(DWARFReg(reg), ofs);
+      __ cfi().RelOffset(DWARFReg(reg), ofs);
     }
   }
 
@@ -609,8 +611,8 @@
       FpuRegister reg = kFpuCalleeSaves[i];
       if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
         __ Ldc1(reg, SP, ofs);
-        ofs += kMips64WordSize;
-        // TODO: __ cfi().Restore(DWARFReg(reg));
+        ofs += kMips64DoublewordSize;
+        __ cfi().Restore(DWARFReg(reg));
       }
     }
 
@@ -618,7 +620,7 @@
       GpuRegister reg = kCoreCalleeSaves[i];
       if (allocated_registers_.ContainsCoreRegister(reg)) {
         __ Ld(reg, SP, ofs);
-        ofs += kMips64WordSize;
+        ofs += kMips64DoublewordSize;
         __ cfi().Restore(DWARFReg(reg));
       }
     }
@@ -976,7 +978,7 @@
   __ LoadFromOffset(kLoadDoubleword,
                     card,
                     TR,
-                    Thread::CardTableOffset<kMips64WordSize>().Int32Value());
+                    Thread::CardTableOffset<kMips64DoublewordSize>().Int32Value());
   __ Dsrl(temp, object, gc::accounting::CardTable::kCardShift);
   __ Daddu(temp, card, temp);
   __ Sb(card, temp, 0);
@@ -994,10 +996,11 @@
   blocked_core_registers_[SP] = true;
   blocked_core_registers_[RA] = true;
 
-  // AT and TMP(T8) are used as temporary/scratch registers
-  // (similar to how AT is used by MIPS assemblers).
+  // AT, TMP(T8) and TMP2(T3) are used as temporary/scratch
+  // registers (similar to how AT is used by MIPS assemblers).
   blocked_core_registers_[AT] = true;
   blocked_core_registers_[TMP] = true;
+  blocked_core_registers_[TMP2] = true;
   blocked_fpu_registers_[FTMP] = true;
 
   // Reserve suspend and thread registers.
@@ -1021,22 +1024,22 @@
 
 size_t CodeGeneratorMIPS64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
   __ StoreToOffset(kStoreDoubleword, GpuRegister(reg_id), SP, stack_index);
-  return kMips64WordSize;
+  return kMips64DoublewordSize;
 }
 
 size_t CodeGeneratorMIPS64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
   __ LoadFromOffset(kLoadDoubleword, GpuRegister(reg_id), SP, stack_index);
-  return kMips64WordSize;
+  return kMips64DoublewordSize;
 }
 
 size_t CodeGeneratorMIPS64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
   __ StoreFpuToOffset(kStoreDoubleword, FpuRegister(reg_id), SP, stack_index);
-  return kMips64WordSize;
+  return kMips64DoublewordSize;
 }
 
 size_t CodeGeneratorMIPS64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
   __ LoadFpuFromOffset(kLoadDoubleword, FpuRegister(reg_id), SP, stack_index);
-  return kMips64WordSize;
+  return kMips64DoublewordSize;
 }
 
 void CodeGeneratorMIPS64::DumpCoreRegister(std::ostream& stream, int reg) const {
@@ -1051,7 +1054,7 @@
                                      HInstruction* instruction,
                                      uint32_t dex_pc,
                                      SlowPathCode* slow_path) {
-  InvokeRuntime(GetThreadOffset<kMips64WordSize>(entrypoint).Int32Value(),
+  InvokeRuntime(GetThreadOffset<kMips64DoublewordSize>(entrypoint).Int32Value(),
                 instruction,
                 dex_pc,
                 slow_path);
@@ -1091,7 +1094,7 @@
   __ LoadFromOffset(kLoadUnsignedHalfword,
                     TMP,
                     TR,
-                    Thread::ThreadFlagsOffset<kMips64WordSize>().Int32Value());
+                    Thread::ThreadFlagsOffset<kMips64DoublewordSize>().Int32Value());
   if (successor == nullptr) {
     __ Bnezc(TMP, slow_path->GetEntryLabel());
     __ Bind(slow_path->GetReturnLabel());
@@ -1760,6 +1763,7 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare);
 
   switch (in_type) {
+    case Primitive::kPrimInt:
     case Primitive::kPrimLong:
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RegisterOrConstant(compare->InputAt(1)));
@@ -1788,16 +1792,25 @@
   //  1 if: left  > right
   // -1 if: left  < right
   switch (in_type) {
+    case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
       GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
       Location rhs_location = locations->InAt(1);
       bool use_imm = rhs_location.IsConstant();
       GpuRegister rhs = ZERO;
       if (use_imm) {
-        int64_t value = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()->AsConstant());
-        if (value != 0) {
-          rhs = AT;
-          __ LoadConst64(rhs, value);
+        if (in_type == Primitive::kPrimInt) {
+          int32_t value = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()->AsConstant());
+          if (value != 0) {
+            rhs = AT;
+            __ LoadConst32(rhs, value);
+          }
+        } else {
+          int64_t value = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()->AsConstant());
+          if (value != 0) {
+            rhs = AT;
+            __ LoadConst64(rhs, value);
+          }
         }
       } else {
         rhs = rhs_location.AsRegister<GpuRegister>();
@@ -3014,7 +3027,7 @@
       invoke->GetImtIndex() % mirror::Class::kImtSize, kMips64PointerSize).Uint32Value();
   Location receiver = invoke->GetLocations()->InAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64WordSize);
+  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64DoublewordSize);
 
   // Set the hidden argument.
   __ LoadConst32(invoke->GetLocations()->GetTemp(1).AsRegister<GpuRegister>(),
@@ -3190,7 +3203,7 @@
                         T9,
                         callee_method.AsRegister<GpuRegister>(),
                         ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-                            kMips64WordSize).Int32Value());
+                            kMips64DoublewordSize).Int32Value());
       // T9()
       __ Jalr(T9);
       __ Nop();
@@ -3228,7 +3241,7 @@
   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kMips64PointerSize).SizeValue();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64WordSize);
+  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64DoublewordSize);
 
   // temp = object->GetClass();
   __ LoadFromOffset(kLoadUnsignedWord, temp, receiver, class_offset);
@@ -3306,7 +3319,7 @@
 }
 
 static int32_t GetExceptionTlsOffset() {
-  return Thread::ExceptionOffset<kMips64WordSize>().Int32Value();
+  return Thread::ExceptionOffset<kMips64DoublewordSize>().Int32Value();
 }
 
 void LocationsBuilderMIPS64::VisitLoadException(HLoadException* load) {
@@ -3546,7 +3559,8 @@
   if (instruction->IsStringAlloc()) {
     // String is allocated through StringFactory. Call NewEmptyString entry point.
     GpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<GpuRegister>();
-    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64WordSize);
+    MemberOffset code_offset =
+        ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64DoublewordSize);
     __ LoadFromOffset(kLoadDoubleword, temp, TR, QUICK_ENTRY_POINT(pNewEmptyString));
     __ LoadFromOffset(kLoadDoubleword, T9, temp, code_offset.Int32Value());
     __ Jalr(T9);
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 08e5615..c836f83 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -27,10 +27,6 @@
 namespace art {
 namespace mips64 {
 
-// Use a local definition to prevent copying mistakes.
-static constexpr size_t kMips64WordSize = kMips64PointerSize;
-
-
 // InvokeDexCallingConvention registers
 
 static constexpr GpuRegister kParameterCoreRegisters[] =
@@ -274,9 +270,9 @@
 
   void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
 
-  size_t GetWordSize() const OVERRIDE { return kMips64WordSize; }
+  size_t GetWordSize() const OVERRIDE { return kMips64DoublewordSize; }
 
-  size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return kMips64WordSize; }
+  size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return kMips64DoublewordSize; }
 
   uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE {
     return assembler_.GetLabelLocation(GetLabelOf(block));
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 18d70da..de62010 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1350,11 +1350,7 @@
     int32_t val_high = High32Bits(value);
     int32_t val_low = Low32Bits(value);
 
-    if (val_high == 0) {
-      __ testl(left_high, left_high);
-    } else {
-      __ cmpl(left_high, Immediate(val_high));
-    }
+    codegen_->Compare32BitValue(left_high, val_high);
     if (if_cond == kCondNE) {
       __ j(X86Condition(true_high_cond), true_label);
     } else if (if_cond == kCondEQ) {
@@ -1364,11 +1360,7 @@
       __ j(X86Condition(false_high_cond), false_label);
     }
     // Must be equal high, so compare the lows.
-    if (val_low == 0) {
-      __ testl(left_low, left_low);
-    } else {
-      __ cmpl(left_low, Immediate(val_low));
-    }
+    codegen_->Compare32BitValue(left_low, val_low);
   } else {
     Register right_high = right.AsRegisterPairHigh<Register>();
     Register right_low = right.AsRegisterPairLow<Register>();
@@ -1389,6 +1381,40 @@
   __ j(final_condition, true_label);
 }
 
+void InstructionCodeGeneratorX86::GenerateFPCompare(Location lhs,
+                                                    Location rhs,
+                                                    HInstruction* insn,
+                                                    bool is_double) {
+  HX86LoadFromConstantTable* const_area = insn->InputAt(1)->AsX86LoadFromConstantTable();
+  if (is_double) {
+    if (rhs.IsFpuRegister()) {
+      __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
+    } else if (const_area != nullptr) {
+      DCHECK(const_area->IsEmittedAtUseSite());
+      __ ucomisd(lhs.AsFpuRegister<XmmRegister>(),
+                 codegen_->LiteralDoubleAddress(
+                   const_area->GetConstant()->AsDoubleConstant()->GetValue(),
+                   const_area->GetLocations()->InAt(0).AsRegister<Register>()));
+    } else {
+      DCHECK(rhs.IsDoubleStackSlot());
+      __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
+    }
+  } else {
+    if (rhs.IsFpuRegister()) {
+      __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
+    } else if (const_area != nullptr) {
+      DCHECK(const_area->IsEmittedAtUseSite());
+      __ ucomiss(lhs.AsFpuRegister<XmmRegister>(),
+                 codegen_->LiteralFloatAddress(
+                   const_area->GetConstant()->AsFloatConstant()->GetValue(),
+                   const_area->GetLocations()->InAt(0).AsRegister<Register>()));
+    } else {
+      DCHECK(rhs.IsStackSlot());
+      __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
+    }
+  }
+}
+
 template<class LabelType>
 void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condition,
                                                                LabelType* true_target_in,
@@ -1409,11 +1435,11 @@
       GenerateLongComparesAndJumps(condition, true_target, false_target);
       break;
     case Primitive::kPrimFloat:
-      __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
+      GenerateFPCompare(left, right, condition, false);
       GenerateFPJumps(condition, true_target, false_target);
       break;
     case Primitive::kPrimDouble:
-      __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
+      GenerateFPCompare(left, right, condition, true);
       GenerateFPJumps(condition, true_target, false_target);
       break;
     default:
@@ -1513,11 +1539,7 @@
       __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>());
     } else if (rhs.IsConstant()) {
       int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
-      if (constant == 0) {
-        __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
-      } else {
-        __ cmpl(lhs.AsRegister<Register>(), Immediate(constant));
-      }
+      codegen_->Compare32BitValue(lhs.AsRegister<Register>(), constant);
     } else {
       __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex()));
     }
@@ -1665,7 +1687,13 @@
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::RequiresFpuRegister());
+      if (cond->InputAt(1)->IsX86LoadFromConstantTable()) {
+        DCHECK(cond->InputAt(1)->IsEmittedAtUseSite());
+      } else if (cond->InputAt(1)->IsConstant()) {
+        locations->SetInAt(1, Location::RequiresFpuRegister());
+      } else {
+        locations->SetInAt(1, Location::Any());
+      }
       if (!cond->IsEmittedAtUseSite()) {
         locations->SetOut(Location::RequiresRegister());
       }
@@ -1704,11 +1732,7 @@
         __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>());
       } else if (rhs.IsConstant()) {
         int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
-        if (constant == 0) {
-          __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
-        } else {
-          __ cmpl(lhs.AsRegister<Register>(), Immediate(constant));
-        }
+        codegen_->Compare32BitValue(lhs.AsRegister<Register>(), constant);
       } else {
         __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex()));
       }
@@ -1719,11 +1743,11 @@
       GenerateLongComparesAndJumps(cond, &true_label, &false_label);
       break;
     case Primitive::kPrimFloat:
-      __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
+      GenerateFPCompare(lhs, rhs, cond, false);
       GenerateFPJumps(cond, &true_label, &false_label);
       break;
     case Primitive::kPrimDouble:
-      __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
+      GenerateFPCompare(lhs, rhs, cond, true);
       GenerateFPJumps(cond, &true_label, &false_label);
       break;
   }
@@ -2159,6 +2183,32 @@
   }
 }
 
+void LocationsBuilderX86::VisitX86FPNeg(HX86FPNeg* neg) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
+  DCHECK(Primitive::IsFloatingPointType(neg->GetType()));
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresFpuRegister());
+}
+
+void InstructionCodeGeneratorX86::VisitX86FPNeg(HX86FPNeg* neg) {
+  LocationSummary* locations = neg->GetLocations();
+  Location out = locations->Out();
+  DCHECK(locations->InAt(0).Equals(out));
+
+  Register constant_area = locations->InAt(1).AsRegister<Register>();
+  XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+  if (neg->GetType() == Primitive::kPrimFloat) {
+    __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x80000000), constant_area));
+    __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
+  } else {
+     __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000), constant_area));
+     __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
+  }
+}
+
 void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
   Primitive::Type result_type = conversion->GetResultType();
   Primitive::Type input_type = conversion->GetInputType();
@@ -2688,6 +2738,8 @@
       locations->SetInAt(0, Location::RequiresFpuRegister());
       if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
         DCHECK(add->InputAt(1)->IsEmittedAtUseSite());
+      } else if (add->InputAt(1)->IsConstant()) {
+        locations->SetInAt(1, Location::RequiresFpuRegister());
       } else {
         locations->SetInAt(1, Location::Any());
       }
@@ -2804,6 +2856,8 @@
       locations->SetInAt(0, Location::RequiresFpuRegister());
       if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
         DCHECK(sub->InputAt(1)->IsEmittedAtUseSite());
+      } else if (sub->InputAt(1)->IsConstant()) {
+        locations->SetInAt(1, Location::RequiresFpuRegister());
       } else {
         locations->SetInAt(1, Location::Any());
       }
@@ -2918,6 +2972,8 @@
       locations->SetInAt(0, Location::RequiresFpuRegister());
       if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
         DCHECK(mul->InputAt(1)->IsEmittedAtUseSite());
+      } else if (mul->InputAt(1)->IsConstant()) {
+        locations->SetInAt(1, Location::RequiresFpuRegister());
       } else {
         locations->SetInAt(1, Location::Any());
       }
@@ -3415,6 +3471,8 @@
       locations->SetInAt(0, Location::RequiresFpuRegister());
       if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
         DCHECK(div->InputAt(1)->IsEmittedAtUseSite());
+      } else if (div->InputAt(1)->IsConstant()) {
+        locations->SetInAt(1, Location::RequiresFpuRegister());
       } else {
         locations->SetInAt(1, Location::Any());
       }
@@ -4069,6 +4127,7 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
   switch (compare->InputAt(0)->GetType()) {
+    case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::Any());
@@ -4078,7 +4137,13 @@
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::RequiresFpuRegister());
+      if (compare->InputAt(1)->IsX86LoadFromConstantTable()) {
+        DCHECK(compare->InputAt(1)->IsEmittedAtUseSite());
+      } else if (compare->InputAt(1)->IsConstant()) {
+        locations->SetInAt(1, Location::RequiresFpuRegister());
+      } else {
+        locations->SetInAt(1, Location::Any());
+      }
       locations->SetOut(Location::RequiresRegister());
       break;
     }
@@ -4094,7 +4159,21 @@
   Location right = locations->InAt(1);
 
   NearLabel less, greater, done;
+  Condition less_cond = kLess;
+
   switch (compare->InputAt(0)->GetType()) {
+    case Primitive::kPrimInt: {
+      Register left_reg = left.AsRegister<Register>();
+      if (right.IsConstant()) {
+        int32_t value = right.GetConstant()->AsIntConstant()->GetValue();
+        codegen_->Compare32BitValue(left_reg, value);
+      } else if (right.IsStackSlot()) {
+        __ cmpl(left_reg, Address(ESP, right.GetStackIndex()));
+      } else {
+        __ cmpl(left_reg, right.AsRegister<Register>());
+      }
+      break;
+    }
     case Primitive::kPrimLong: {
       Register left_low = left.AsRegisterPairLow<Register>();
       Register left_high = left.AsRegisterPairHigh<Register>();
@@ -4116,11 +4195,7 @@
         __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
       } else {
         DCHECK(right_is_const) << right;
-        if (val_high == 0) {
-          __ testl(left_high, left_high);
-        } else {
-          __ cmpl(left_high, Immediate(val_high));
-        }
+        codegen_->Compare32BitValue(left_high, val_high);
       }
       __ j(kLess, &less);  // Signed compare.
       __ j(kGreater, &greater);  // Signed compare.
@@ -4130,30 +4205,30 @@
         __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
       } else {
         DCHECK(right_is_const) << right;
-        if (val_low == 0) {
-          __ testl(left_low, left_low);
-        } else {
-          __ cmpl(left_low, Immediate(val_low));
-        }
+        codegen_->Compare32BitValue(left_low, val_low);
       }
+      less_cond = kBelow;  // for CF (unsigned).
       break;
     }
     case Primitive::kPrimFloat: {
-      __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
+      GenerateFPCompare(left, right, compare, false);
       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
+      less_cond = kBelow;  // for CF (floats).
       break;
     }
     case Primitive::kPrimDouble: {
-      __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
+      GenerateFPCompare(left, right, compare, true);
       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
+      less_cond = kBelow;  // for CF (floats).
       break;
     }
     default:
       LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
   }
+
   __ movl(out, Immediate(0));
   __ j(kEqual, &done);
-  __ j(kBelow, &less);  // kBelow is for CF (unsigned & floats).
+  __ j(less_cond, &less);
 
   __ Bind(&greater);
   __ movl(out, Immediate(1));
@@ -7113,6 +7188,22 @@
   return Address(reg, kDummy32BitOffset, fixup);
 }
 
+void CodeGeneratorX86::Load32BitValue(Register dest, int32_t value) {
+  if (value == 0) {
+    __ xorl(dest, dest);
+  } else {
+    __ movl(dest, Immediate(value));
+  }
+}
+
+void CodeGeneratorX86::Compare32BitValue(Register dest, int32_t value) {
+  if (value == 0) {
+    __ testl(dest, dest);
+  } else {
+    __ cmpl(dest, Immediate(value));
+  }
+}
+
 Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr,
                                            Register reg,
                                            Register value) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 0aef478..45e8ffa 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -296,6 +296,8 @@
                                    HBasicBlock* switch_block,
                                    HBasicBlock* default_block);
 
+  void GenerateFPCompare(Location lhs, Location rhs, HInstruction* insn, bool is_double);
+
   X86Assembler* const assembler_;
   CodeGeneratorX86* const codegen_;
 
@@ -450,6 +452,12 @@
   Address LiteralInt32Address(int32_t v, Register reg);
   Address LiteralInt64Address(int64_t v, Register reg);
 
+  // Load a 32-bit value into a register in the most efficient manner.
+  void Load32BitValue(Register dest, int32_t value);
+
+  // Compare a register with a 32-bit value in the most efficient manner.
+  void Compare32BitValue(Register dest, int32_t value);
+
   Address LiteralCaseTable(HX86PackedSwitch* switch_instr, Register reg, Register value);
 
   void Finalize(CodeAllocator* allocator) OVERRIDE;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 6795488..99396cd 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1126,30 +1126,43 @@
     return;
   }
   if (destination.IsRegister()) {
+    CpuRegister dest = destination.AsRegister<CpuRegister>();
     if (source.IsRegister()) {
-      __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
+      __ movq(dest, source.AsRegister<CpuRegister>());
     } else if (source.IsFpuRegister()) {
-      __ movd(destination.AsRegister<CpuRegister>(), source.AsFpuRegister<XmmRegister>());
+      __ movd(dest, source.AsFpuRegister<XmmRegister>());
     } else if (source.IsStackSlot()) {
-      __ movl(destination.AsRegister<CpuRegister>(),
-              Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
+    } else if (source.IsConstant()) {
+      HConstant* constant = source.GetConstant();
+      if (constant->IsLongConstant()) {
+        Load64BitValue(dest, constant->AsLongConstant()->GetValue());
+      } else {
+        Load32BitValue(dest, GetInt32ValueOf(constant));
+      }
     } else {
       DCHECK(source.IsDoubleStackSlot());
-      __ movq(destination.AsRegister<CpuRegister>(),
-              Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
     }
   } else if (destination.IsFpuRegister()) {
+    XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
     if (source.IsRegister()) {
-      __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<CpuRegister>());
+      __ movd(dest, source.AsRegister<CpuRegister>());
     } else if (source.IsFpuRegister()) {
-      __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
+      __ movaps(dest, source.AsFpuRegister<XmmRegister>());
+    } else if (source.IsConstant()) {
+      HConstant* constant = source.GetConstant();
+      int64_t value = CodeGenerator::GetInt64ValueOf(constant);
+      if (constant->IsFloatConstant()) {
+        Load32BitValue(dest, static_cast<int32_t>(value));
+      } else {
+        Load64BitValue(dest, value);
+      }
     } else if (source.IsStackSlot()) {
-      __ movss(destination.AsFpuRegister<XmmRegister>(),
-              Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
     } else {
       DCHECK(source.IsDoubleStackSlot());
-      __ movsd(destination.AsFpuRegister<XmmRegister>(),
-               Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
     }
   } else if (destination.IsStackSlot()) {
     if (source.IsRegister()) {
@@ -1345,42 +1358,44 @@
   __ j(X86_64FPCondition(cond->GetCondition()), true_label);
 }
 
-template<class LabelType>
-void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
-                                                                  LabelType* true_target_in,
-                                                                  LabelType* false_target_in) {
-  // Generated branching requires both targets to be explicit. If either of the
-  // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
-  LabelType fallthrough_target;
-  LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
-  LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
-
+void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
   LocationSummary* locations = condition->GetLocations();
+
   Location left = locations->InAt(0);
   Location right = locations->InAt(1);
-
   Primitive::Type type = condition->InputAt(0)->GetType();
   switch (type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot: {
+      CpuRegister left_reg = left.AsRegister<CpuRegister>();
+      if (right.IsConstant()) {
+        int32_t value = CodeGenerator::GetInt32ValueOf(right.GetConstant());
+        if (value == 0) {
+          __ testl(left_reg, left_reg);
+        } else {
+          __ cmpl(left_reg, Immediate(value));
+        }
+      } else if (right.IsStackSlot()) {
+        __ cmpl(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
+      } else {
+        __ cmpl(left_reg, right.AsRegister<CpuRegister>());
+      }
+      break;
+    }
     case Primitive::kPrimLong: {
       CpuRegister left_reg = left.AsRegister<CpuRegister>();
       if (right.IsConstant()) {
         int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
-        if (IsInt<32>(value)) {
-          if (value == 0) {
-            __ testq(left_reg, left_reg);
-          } else {
-            __ cmpq(left_reg, Immediate(static_cast<int32_t>(value)));
-          }
-        } else {
-          // Value won't fit in a 32-bit integer.
-          __ cmpq(left_reg, codegen_->LiteralInt64Address(value));
-        }
+        codegen_->Compare64BitValue(left_reg, value);
       } else if (right.IsDoubleStackSlot()) {
         __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
       } else {
         __ cmpq(left_reg, right.AsRegister<CpuRegister>());
       }
-      __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
       break;
     }
     case Primitive::kPrimFloat: {
@@ -1395,7 +1410,6 @@
         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
                    Address(CpuRegister(RSP), right.GetStackIndex()));
       }
-      GenerateFPJumps(condition, true_target, false_target);
       break;
     }
     case Primitive::kPrimDouble: {
@@ -1410,6 +1424,38 @@
         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
                    Address(CpuRegister(RSP), right.GetStackIndex()));
       }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected condition type " << type;
+  }
+}
+
+template<class LabelType>
+void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
+                                                                  LabelType* true_target_in,
+                                                                  LabelType* false_target_in) {
+  // Generated branching requires both targets to be explicit. If either of the
+  // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
+  LabelType fallthrough_target;
+  LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
+  LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
+
+  // Generate the comparison to set the CC.
+  GenerateCompareTest(condition);
+
+  // Now generate the correct jump(s).
+  Primitive::Type type = condition->InputAt(0)->GetType();
+  switch (type) {
+    case Primitive::kPrimLong: {
+      __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
+      break;
+    }
+    case Primitive::kPrimFloat: {
+      GenerateFPJumps(condition, true_target, false_target);
+      break;
+    }
+    case Primitive::kPrimDouble: {
       GenerateFPJumps(condition, true_target, false_target);
       break;
     }
@@ -1508,11 +1554,7 @@
       __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
     } else if (rhs.IsConstant()) {
       int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
-      if (constant == 0) {
-        __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
-      } else {
-        __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant));
-      }
+      codegen_->Compare32BitValue(lhs.AsRegister<CpuRegister>(), constant);
     } else {
       __ cmpl(lhs.AsRegister<CpuRegister>(),
               Address(CpuRegister(RSP), rhs.GetStackIndex()));
@@ -1564,14 +1606,37 @@
                                /* false_target */ nullptr);
 }
 
+static bool SelectCanUseCMOV(HSelect* select) {
+  // There are no conditional move instructions for XMMs.
+  if (Primitive::IsFloatingPointType(select->GetType())) {
+    return false;
+  }
+
+  // A FP condition doesn't generate the single CC that we need.
+  HInstruction* condition = select->GetCondition();
+  if (condition->IsCondition() &&
+      Primitive::IsFloatingPointType(condition->InputAt(0)->GetType())) {
+    return false;
+  }
+
+  // We can generate a CMOV for this Select.
+  return true;
+}
+
 void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
   if (Primitive::IsFloatingPointType(select->GetType())) {
     locations->SetInAt(0, Location::RequiresFpuRegister());
-    locations->SetInAt(1, Location::RequiresFpuRegister());
+    // Since we can't use CMOV, there is no need to force 'true' into a register.
+    locations->SetInAt(1, Location::Any());
   } else {
     locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RequiresRegister());
+    if (SelectCanUseCMOV(select)) {
+      locations->SetInAt(1, Location::RequiresRegister());
+    } else {
+      // Since we can't use CMOV, there is no need to force 'true' into a register.
+      locations->SetInAt(1, Location::Any());
+    }
   }
   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
     locations->SetInAt(2, Location::RequiresRegister());
@@ -1581,13 +1646,52 @@
 
 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
   LocationSummary* locations = select->GetLocations();
-  NearLabel false_target;
-  GenerateTestAndBranch<NearLabel>(select,
-                                   /* condition_input_index */ 2,
-                                   /* true_target */ nullptr,
-                                   &false_target);
-  codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
-  __ Bind(&false_target);
+  if (SelectCanUseCMOV(select)) {
+    // If both the condition and the source types are integer, we can generate
+    // a CMOV to implement Select.
+    CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
+    CpuRegister value_true = locations->InAt(1).AsRegister<CpuRegister>();
+    DCHECK(locations->InAt(0).Equals(locations->Out()));
+
+    HInstruction* select_condition = select->GetCondition();
+    Condition cond = kNotEqual;
+
+    // Figure out how to test the 'condition'.
+    if (select_condition->IsCondition()) {
+      HCondition* condition = select_condition->AsCondition();
+      if (!condition->IsEmittedAtUseSite()) {
+        // This was a previously materialized condition.
+        // Can we use the existing condition code?
+        if (AreEflagsSetFrom(condition, select)) {
+          // Materialization was the previous instruction.  Condition codes are right.
+          cond = X86_64IntegerCondition(condition->GetCondition());
+        } else {
+          // No, we have to recreate the condition code.
+          CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
+          __ testl(cond_reg, cond_reg);
+        }
+      } else {
+        GenerateCompareTest(condition);
+        cond = X86_64IntegerCondition(condition->GetCondition());
+      }
+    } else {
+      // Must be a boolean condition, which needs to be compared to 0.
+      CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
+      __ testl(cond_reg, cond_reg);
+    }
+
+    // If the condition is true, overwrite the output, which already contains false.
+    // Generate the correct sized CMOV.
+    __ cmov(cond, value_false, value_true, select->GetType() == Primitive::kPrimLong);
+  } else {
+    NearLabel false_target;
+    GenerateTestAndBranch<NearLabel>(select,
+                                     /* condition_input_index */ 2,
+                                     /* true_target */ nullptr,
+                                     &false_target);
+    codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
+    __ Bind(&false_target);
+  }
 }
 
 void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
@@ -1691,11 +1795,7 @@
         __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
       } else if (rhs.IsConstant()) {
         int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
-        if (constant == 0) {
-          __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
-        } else {
-          __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant));
-        }
+        codegen_->Compare32BitValue(lhs.AsRegister<CpuRegister>(), constant);
       } else {
         __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
       }
@@ -1709,16 +1809,7 @@
         __ cmpq(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
       } else if (rhs.IsConstant()) {
         int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
-        if (IsInt<32>(value)) {
-          if (value == 0) {
-            __ testq(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
-          } else {
-            __ cmpq(lhs.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
-          }
-        } else {
-          // Value won't fit in an int.
-          __ cmpq(lhs.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
-        }
+        codegen_->Compare64BitValue(lhs.AsRegister<CpuRegister>(), value);
       } else {
         __ cmpq(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
       }
@@ -1850,6 +1941,7 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
   switch (compare->InputAt(0)->GetType()) {
+    case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::Any());
@@ -1876,21 +1968,26 @@
 
   NearLabel less, greater, done;
   Primitive::Type type = compare->InputAt(0)->GetType();
+  Condition less_cond = kLess;
+
   switch (type) {
+    case Primitive::kPrimInt: {
+      CpuRegister left_reg = left.AsRegister<CpuRegister>();
+      if (right.IsConstant()) {
+        int32_t value = right.GetConstant()->AsIntConstant()->GetValue();
+        codegen_->Compare32BitValue(left_reg, value);
+      } else if (right.IsStackSlot()) {
+        __ cmpl(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
+      } else {
+        __ cmpl(left_reg, right.AsRegister<CpuRegister>());
+      }
+      break;
+    }
     case Primitive::kPrimLong: {
       CpuRegister left_reg = left.AsRegister<CpuRegister>();
       if (right.IsConstant()) {
         int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
-        if (IsInt<32>(value)) {
-          if (value == 0) {
-            __ testq(left_reg, left_reg);
-          } else {
-            __ cmpq(left_reg, Immediate(static_cast<int32_t>(value)));
-          }
-        } else {
-          // Value won't fit in an int.
-          __ cmpq(left_reg, codegen_->LiteralInt64Address(value));
-        }
+        codegen_->Compare64BitValue(left_reg, value);
       } else if (right.IsDoubleStackSlot()) {
         __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
       } else {
@@ -1909,6 +2006,7 @@
         __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
       }
       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
+      less_cond = kBelow;  //  ucomis{s,d} sets CF
       break;
     }
     case Primitive::kPrimDouble: {
@@ -1922,14 +2020,16 @@
         __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
       }
       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
+      less_cond = kBelow;  //  ucomis{s,d} sets CF
       break;
     }
     default:
       LOG(FATAL) << "Unexpected compare type " << type;
   }
+
   __ movl(out, Immediate(0));
   __ j(kEqual, &done);
-  __ j(type == Primitive::kPrimLong ? kLess : kBelow, &less);  //  ucomis{s,d} sets CF (kBelow)
+  __ j(less_cond, &less);
 
   __ Bind(&greater);
   __ movl(out, Immediate(1));
@@ -2750,11 +2850,7 @@
           } else if (in.IsConstant()) {
             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
-            if (v == 0) {
-              __ xorps(dest, dest);
-            } else {
-              __ movss(dest, codegen_->LiteralFloatAddress(static_cast<float>(v)));
-            }
+            codegen_->Load32BitValue(dest, static_cast<float>(v));
           } else {
             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
@@ -2768,11 +2864,7 @@
           } else if (in.IsConstant()) {
             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
-            if (v == 0) {
-              __ xorps(dest, dest);
-            } else {
-              __ movss(dest, codegen_->LiteralFloatAddress(static_cast<float>(v)));
-            }
+            codegen_->Load64BitValue(dest, static_cast<double>(v));
           } else {
             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
@@ -2786,11 +2878,7 @@
           } else if (in.IsConstant()) {
             double v = in.GetConstant()->AsDoubleConstant()->GetValue();
             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
-            if (bit_cast<int64_t, double>(v) == 0) {
-              __ xorps(dest, dest);
-            } else {
-              __ movss(dest, codegen_->LiteralFloatAddress(static_cast<float>(v)));
-            }
+            codegen_->Load32BitValue(dest, static_cast<float>(v));
           } else {
             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
                         Address(CpuRegister(RSP), in.GetStackIndex()));
@@ -2817,11 +2905,7 @@
           } else if (in.IsConstant()) {
             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
-            if (v == 0) {
-              __ xorpd(dest, dest);
-            } else {
-              __ movsd(dest, codegen_->LiteralDoubleAddress(static_cast<double>(v)));
-            }
+            codegen_->Load64BitValue(dest, static_cast<double>(v));
           } else {
             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
@@ -2835,11 +2919,7 @@
           } else if (in.IsConstant()) {
             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
-            if (v == 0) {
-              __ xorpd(dest, dest);
-            } else {
-              __ movsd(dest, codegen_->LiteralDoubleAddress(static_cast<double>(v)));
-            }
+            codegen_->Load64BitValue(dest, static_cast<double>(v));
           } else {
             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
@@ -2853,11 +2933,7 @@
           } else if (in.IsConstant()) {
             float v = in.GetConstant()->AsFloatConstant()->GetValue();
             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
-            if (bit_cast<int32_t, float>(v) == 0) {
-              __ xorpd(dest, dest);
-            } else {
-              __ movsd(dest, codegen_->LiteralDoubleAddress(static_cast<double>(v)));
-            }
+            codegen_->Load64BitValue(dest, static_cast<double>(v));
           } else {
             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
                         Address(CpuRegister(RSP), in.GetStackIndex()));
@@ -5196,18 +5272,12 @@
       }
     } else if (constant->IsFloatConstant()) {
       float fp_value = constant->AsFloatConstant()->GetValue();
-      int32_t value = bit_cast<int32_t, float>(fp_value);
       if (destination.IsFpuRegister()) {
         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
-        if (value == 0) {
-          // easy FP 0.0.
-          __ xorps(dest, dest);
-        } else {
-          __ movss(dest, codegen_->LiteralFloatAddress(fp_value));
-        }
+        codegen_->Load32BitValue(dest, fp_value);
       } else {
         DCHECK(destination.IsStackSlot()) << destination;
-        Immediate imm(value);
+        Immediate imm(bit_cast<int32_t, float>(fp_value));
         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
       }
     } else {
@@ -5216,11 +5286,7 @@
       int64_t value = bit_cast<int64_t, double>(fp_value);
       if (destination.IsFpuRegister()) {
         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
-        if (value == 0) {
-          __ xorpd(dest, dest);
-        } else {
-          __ movsd(dest, codegen_->LiteralDoubleAddress(fp_value));
-        }
+        codegen_->Load64BitValue(dest, fp_value);
       } else {
         DCHECK(destination.IsDoubleStackSlot()) << destination;
         codegen_->Store64BitValueToStack(destination, value);
@@ -6467,6 +6533,51 @@
   }
 }
 
+void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
+  if (value == 0) {
+    __ xorps(dest, dest);
+  } else {
+    __ movss(dest, LiteralInt32Address(value));
+  }
+}
+
+void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
+  if (value == 0) {
+    __ xorpd(dest, dest);
+  } else {
+    __ movsd(dest, LiteralInt64Address(value));
+  }
+}
+
+void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
+  Load32BitValue(dest, bit_cast<int32_t, float>(value));
+}
+
+void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
+  Load64BitValue(dest, bit_cast<int64_t, double>(value));
+}
+
+void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
+  if (value == 0) {
+    __ testl(dest, dest);
+  } else {
+    __ cmpl(dest, Immediate(value));
+  }
+}
+
+void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
+  if (IsInt<32>(value)) {
+    if (value == 0) {
+      __ testq(dest, dest);
+    } else {
+      __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
+    }
+  } else {
+    // Value won't fit in an int.
+    __ cmpq(dest, LiteralInt64Address(value));
+  }
+}
+
 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
   DCHECK(dest.IsDoubleStackSlot());
   if (IsInt<32>(value)) {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 318087e..72dddfd 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -264,6 +264,7 @@
   void GenerateExplicitNullCheck(HNullCheck* instruction);
   void PushOntoFPStack(Location source, uint32_t temp_offset,
                        uint32_t stack_adjustment, bool is_float);
+  void GenerateCompareTest(HCondition* condition);
   template<class LabelType>
   void GenerateTestAndBranch(HInstruction* instruction,
                              size_t condition_input_index,
@@ -478,9 +479,17 @@
   Address LiteralInt32Address(int32_t v);
   Address LiteralInt64Address(int64_t v);
 
-  // Load a 32/64 bit value into a register in the most efficient manner.
+  // Load a 32/64-bit value into a register in the most efficient manner.
   void Load32BitValue(CpuRegister dest, int32_t value);
   void Load64BitValue(CpuRegister dest, int64_t value);
+  void Load32BitValue(XmmRegister dest, int32_t value);
+  void Load64BitValue(XmmRegister dest, int64_t value);
+  void Load32BitValue(XmmRegister dest, float value);
+  void Load64BitValue(XmmRegister dest, double value);
+
+  // Compare a register with a 32/64-bit value in the most efficient manner.
+  void Compare32BitValue(CpuRegister dest, int32_t value);
+  void Compare64BitValue(CpuRegister dest, int64_t value);
 
   Address LiteralCaseTable(HPackedSwitch* switch_instr);
 
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index ae15fcf..9566c29 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -93,7 +93,7 @@
   DCHECK(induction_analysis != nullptr);
 }
 
-void InductionVarRange::GetInductionRange(HInstruction* context,
+bool InductionVarRange::GetInductionRange(HInstruction* context,
                                           HInstruction* instruction,
                                           /*out*/Value* min_val,
                                           /*out*/Value* max_val,
@@ -111,12 +111,9 @@
     *min_val = GetVal(info, trip, in_body, /* is_min */ true);
     *max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min */ false));
     *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip);
-  } else {
-    // No loop to analyze.
-    *min_val = Value();
-    *max_val = Value();
-    *needs_finite_test = false;
+    return true;
   }
+  return false;  // Nothing known
 }
 
 bool InductionVarRange::RefineOuter(/*in-out*/Value* min_val, /*in-out*/Value* max_val) const {
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index 974b8fb..3cb7b4b 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -60,13 +60,13 @@
    * Given a context denoted by the first instruction, returns a possibly conservative
    * lower and upper bound on the instruction's value in the output parameters min_val
    * and max_val, respectively. The need_finite_test flag denotes if an additional finite-test
-   * is needed to protect the range evaluation inside its loop.
+   * is needed to protect the range evaluation inside its loop. Returns false on failure.
    */
-  void GetInductionRange(HInstruction* context,
+  bool GetInductionRange(HInstruction* context,
                          HInstruction* instruction,
-                         /*out*/Value* min_val,
-                         /*out*/Value* max_val,
-                         /*out*/bool* needs_finite_test);
+                         /*out*/ Value* min_val,
+                         /*out*/ Value* max_val,
+                         /*out*/ bool* needs_finite_test);
 
   /** Refines the values with induction of next outer loop. Returns true on change. */
   bool RefineOuter(/*in-out*/Value* min_val, /*in-out*/Value* max_val) const;
@@ -79,8 +79,8 @@
    */
   bool CanGenerateCode(HInstruction* context,
                        HInstruction* instruction,
-                       /*out*/bool* needs_finite_test,
-                       /*out*/bool* needs_taken_test);
+                       /*out*/ bool* needs_finite_test,
+                       /*out*/ bool* needs_taken_test);
 
   /**
    * Generates the actual code in the HIR for the lower and upper bound expressions on the
@@ -101,8 +101,8 @@
                          HInstruction* instruction,
                          HGraph* graph,
                          HBasicBlock* block,
-                         /*out*/HInstruction** lower,
-                         /*out*/HInstruction** upper);
+                         /*out*/ HInstruction** lower,
+                         /*out*/ HInstruction** upper);
 
   /**
    * Generates explicit taken-test for the loop in the given context. Code is generated in
@@ -113,7 +113,7 @@
   void GenerateTakenTest(HInstruction* context,
                          HGraph* graph,
                          HBasicBlock* block,
-                         /*out*/HInstruction** taken_test);
+                         /*out*/ HInstruction** taken_test);
 
  private:
   bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) const;
@@ -168,17 +168,17 @@
                     HInstruction* instruction,
                     HGraph* graph,
                     HBasicBlock* block,
-                    /*out*/HInstruction** lower,
-                    /*out*/HInstruction** upper,
-                    /*out*/HInstruction** taken_test,
-                    /*out*/bool* needs_finite_test,
-                    /*out*/bool* needs_taken_test) const;
+                    /*out*/ HInstruction** lower,
+                    /*out*/ HInstruction** upper,
+                    /*out*/ HInstruction** taken_test,
+                    /*out*/ bool* needs_finite_test,
+                    /*out*/ bool* needs_taken_test) const;
 
   bool GenerateCode(HInductionVarAnalysis::InductionInfo* info,
                     HInductionVarAnalysis::InductionInfo* trip,
                     HGraph* graph,
                     HBasicBlock* block,
-                    /*out*/HInstruction** result,
+                    /*out*/ HInstruction** result,
                     bool in_body,
                     bool is_min) const;
 
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index a839d2d..a8841d3 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -392,8 +392,8 @@
       << invoke_instruction->DebugName();
   // This optimization only works under JIT for now.
   DCHECK(Runtime::Current()->UseJit());
-  if (graph_->GetInstructionSet() == kMips || graph_->GetInstructionSet() == kMips64) {
-    // TODO: Support HClassTableGet for mips and mips64.
+  if (graph_->GetInstructionSet() == kMips64) {
+    // TODO: Support HClassTableGet for mips64.
     return false;
   }
   ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
@@ -758,6 +758,7 @@
       compiler_driver_->GetInstructionSet(),
       invoke_type,
       graph_->IsDebuggable(),
+      /* osr */ false,
       graph_->GetCurrentInstructionId());
   callee_graph->SetArtMethod(resolved_method);
 
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 7d3a723..0029cc3 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -46,6 +46,10 @@
   bool TryReplaceWithRotateRegisterSubPattern(HBinaryOperation* op, HUShr* ushr, HShl* shl);
 
   bool TryMoveNegOnInputsAfterBinop(HBinaryOperation* binop);
+  // `op` should be either HOr or HAnd.
+  // De Morgan's laws:
+  // ~a & ~b = ~(a | b)  and  ~a | ~b = ~(a & b)
+  bool TryDeMorganNegationFactoring(HBinaryOperation* op);
   void VisitShift(HBinaryOperation* shift);
 
   void VisitSuspendCheck(HSuspendCheck* check) OVERRIDE;
@@ -87,6 +91,7 @@
   void SimplifyRotate(HInvoke* invoke, bool is_left);
   void SimplifySystemArrayCopy(HInvoke* invoke);
   void SimplifyStringEquals(HInvoke* invoke);
+  void SimplifyCompare(HInvoke* invoke, bool has_zero_op);
 
   OptimizingCompilerStats* stats_;
   bool simplification_occurred_ = false;
@@ -164,6 +169,59 @@
   return true;
 }
 
+bool InstructionSimplifierVisitor::TryDeMorganNegationFactoring(HBinaryOperation* op) {
+  DCHECK(op->IsAnd() || op->IsOr()) << op->DebugName();
+  Primitive::Type type = op->GetType();
+  HInstruction* left = op->GetLeft();
+  HInstruction* right = op->GetRight();
+
+  // We can apply De Morgan's laws if both inputs are Not's and are only used
+  // by `op`.
+  if (((left->IsNot() && right->IsNot()) ||
+       (left->IsBooleanNot() && right->IsBooleanNot())) &&
+      left->HasOnlyOneNonEnvironmentUse() &&
+      right->HasOnlyOneNonEnvironmentUse()) {
+    // Replace code looking like
+    //    NOT nota, a
+    //    NOT notb, b
+    //    AND dst, nota, notb (respectively OR)
+    // with
+    //    OR or, a, b         (respectively AND)
+    //    NOT dest, or
+    HInstruction* src_left = left->InputAt(0);
+    HInstruction* src_right = right->InputAt(0);
+    uint32_t dex_pc = op->GetDexPc();
+
+    // Remove the negations on the inputs.
+    left->ReplaceWith(src_left);
+    right->ReplaceWith(src_right);
+    left->GetBlock()->RemoveInstruction(left);
+    right->GetBlock()->RemoveInstruction(right);
+
+    // Replace the `HAnd` or `HOr`.
+    HBinaryOperation* hbin;
+    if (op->IsAnd()) {
+      hbin = new (GetGraph()->GetArena()) HOr(type, src_left, src_right, dex_pc);
+    } else {
+      hbin = new (GetGraph()->GetArena()) HAnd(type, src_left, src_right, dex_pc);
+    }
+    HInstruction* hnot;
+    if (left->IsBooleanNot()) {
+      hnot = new (GetGraph()->GetArena()) HBooleanNot(hbin, dex_pc);
+    } else {
+      hnot = new (GetGraph()->GetArena()) HNot(type, hbin, dex_pc);
+    }
+
+    op->GetBlock()->InsertInstructionBefore(hbin, op);
+    op->GetBlock()->ReplaceAndRemoveInstructionWith(op, hnot);
+
+    RecordSimplification();
+    return true;
+  }
+
+  return false;
+}
+
 void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) {
   DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr());
   HConstant* input_cst = instruction->GetConstantRight();
@@ -813,7 +871,10 @@
     //    src
     instruction->ReplaceWith(instruction->GetLeft());
     instruction->GetBlock()->RemoveInstruction(instruction);
+    return;
   }
+
+  TryDeMorganNegationFactoring(instruction);
 }
 
 void InstructionSimplifierVisitor::VisitGreaterThan(HGreaterThan* condition) {
@@ -1127,6 +1188,8 @@
     return;
   }
 
+  if (TryDeMorganNegationFactoring(instruction)) return;
+
   TryReplaceWithRotate(instruction);
 }
 
@@ -1249,6 +1312,26 @@
     return;
   }
 
+  HInstruction* left = instruction->GetLeft();
+  HInstruction* right = instruction->GetRight();
+  if (((left->IsNot() && right->IsNot()) ||
+       (left->IsBooleanNot() && right->IsBooleanNot())) &&
+      left->HasOnlyOneNonEnvironmentUse() &&
+      right->HasOnlyOneNonEnvironmentUse()) {
+    // Replace code looking like
+    //    NOT nota, a
+    //    NOT notb, b
+    //    XOR dst, nota, notb
+    // with
+    //    XOR dst, a, b
+    instruction->ReplaceInput(left->InputAt(0), 0);
+    instruction->ReplaceInput(right->InputAt(0), 1);
+    left->GetBlock()->RemoveInstruction(left);
+    right->GetBlock()->RemoveInstruction(right);
+    RecordSimplification();
+    return;
+  }
+
   TryReplaceWithRotate(instruction);
 }
 
@@ -1364,6 +1447,24 @@
   }
 }
 
+void InstructionSimplifierVisitor::SimplifyCompare(HInvoke* invoke, bool is_signum) {
+  DCHECK(invoke->IsInvokeStaticOrDirect());
+  uint32_t dex_pc = invoke->GetDexPc();
+  HInstruction* left = invoke->InputAt(0);
+  HInstruction* right;
+  Primitive::Type type = left->GetType();
+  if (!is_signum) {
+    right = invoke->InputAt(1);
+  } else if (type == Primitive::kPrimLong) {
+    right = GetGraph()->GetLongConstant(0);
+  } else {
+    right = GetGraph()->GetIntConstant(0);
+  }
+  HCompare* compare = new (GetGraph()->GetArena())
+      HCompare(type, left, right, ComparisonBias::kNoBias, dex_pc);
+  invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, compare);
+}
+
 void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) {
   if (instruction->GetIntrinsic() == Intrinsics::kStringEquals) {
     SimplifyStringEquals(instruction);
@@ -1375,6 +1476,12 @@
   } else if (instruction->GetIntrinsic() == Intrinsics::kIntegerRotateLeft ||
              instruction->GetIntrinsic() == Intrinsics::kLongRotateLeft) {
     SimplifyRotate(instruction, true);
+  } else if (instruction->GetIntrinsic() == Intrinsics::kIntegerCompare ||
+             instruction->GetIntrinsic() == Intrinsics::kLongCompare) {
+    SimplifyCompare(instruction, /* is_signum */ false);
+  } else if (instruction->GetIntrinsic() == Intrinsics::kIntegerSignum ||
+             instruction->GetIntrinsic() == Intrinsics::kLongSignum) {
+    SimplifyCompare(instruction, /* is_signum */ true);
   }
 }
 
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index e8912b3..96a3c3c 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1633,21 +1633,21 @@
 UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
 UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 
-UNIMPLEMENTED_INTRINSIC(IntegerCompare)
-UNIMPLEMENTED_INTRINSIC(LongCompare)
 UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(LongHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit)
 UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
+
+// Handled as HIR instructions.
+UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
+UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
+UNIMPLEMENTED_INTRINSIC(LongRotateRight)
+UNIMPLEMENTED_INTRINSIC(IntegerCompare)
+UNIMPLEMENTED_INTRINSIC(LongCompare)
 UNIMPLEMENTED_INTRINSIC(IntegerSignum)
 UNIMPLEMENTED_INTRINSIC(LongSignum)
 
-// Rotate operations are handled as HRor instructions.
-UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
-UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
-UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
-UNIMPLEMENTED_INTRINSIC(LongRotateRight)
-
 #undef UNIMPLEMENTED_INTRINSIC
 
 #undef __
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 1376695..4140d94 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -284,36 +284,6 @@
   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
 }
 
-static void GenCompare(LocationSummary* locations, bool is_long, vixl::MacroAssembler* masm) {
-  Location op1 = locations->InAt(0);
-  Location op2 = locations->InAt(1);
-  Location out = locations->Out();
-
-  Register op1_reg = is_long ? XRegisterFrom(op1) : WRegisterFrom(op1);
-  Register op2_reg = is_long ? XRegisterFrom(op2) : WRegisterFrom(op2);
-  Register out_reg = WRegisterFrom(out);
-
-  __ Cmp(op1_reg, op2_reg);
-  __ Cset(out_reg, gt);           // out == +1 if GT or 0 otherwise
-  __ Cinv(out_reg, out_reg, lt);  // out == -1 if LT or unchanged otherwise
-}
-
-void IntrinsicLocationsBuilderARM64::VisitIntegerCompare(HInvoke* invoke) {
-  CreateIntIntToIntLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitIntegerCompare(HInvoke* invoke) {
-  GenCompare(invoke->GetLocations(), /* is_long */ false, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitLongCompare(HInvoke* invoke) {
-  CreateIntIntToIntLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitLongCompare(HInvoke* invoke) {
-  GenCompare(invoke->GetLocations(), /* is_long */ true,  GetVIXLAssembler());
-}
-
 static void GenNumberOfLeadingZeros(LocationSummary* locations,
                                     Primitive::Type type,
                                     vixl::MacroAssembler* masm) {
@@ -780,7 +750,6 @@
   Register offset = XRegisterFrom(offset_loc);  // Long offset.
   Location trg_loc = locations->Out();
   Register trg = RegisterFrom(trg_loc, type);
-  bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
 
   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
     // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
@@ -788,19 +757,11 @@
     Register temp = temps.AcquireW();
     codegen->GenerateArrayLoadWithBakerReadBarrier(
         invoke, trg_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
-    if (is_volatile && !use_acquire_release) {
-      __ Dmb(InnerShareable, BarrierReads);
-    }
   } else {
     // Other cases.
     MemOperand mem_op(base.X(), offset);
     if (is_volatile) {
-      if (use_acquire_release) {
-        codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check */ true);
-      } else {
-        codegen->Load(type, trg, mem_op);
-        __ Dmb(InnerShareable, BarrierReads);
-      }
+      codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check */ true);
     } else {
       codegen->Load(type, trg, mem_op);
     }
@@ -914,8 +875,6 @@
   Register offset = XRegisterFrom(locations->InAt(2));  // Long offset.
   Register value = RegisterFrom(locations->InAt(3), type);
   Register source = value;
-  bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
-
   MemOperand mem_op(base.X(), offset);
 
   {
@@ -932,15 +891,7 @@
     }
 
     if (is_volatile || is_ordered) {
-      if (use_acquire_release) {
-        codegen->StoreRelease(type, source, mem_op);
-      } else {
-        __ Dmb(InnerShareable, BarrierAll);
-        codegen->Store(type, source, mem_op);
-        if (is_volatile) {
-          __ Dmb(InnerShareable, BarrierReads);
-        }
-      }
+      codegen->StoreRelease(type, source, mem_op);
     } else {
       codegen->Store(type, source, mem_op);
     }
@@ -1037,7 +988,6 @@
 }
 
 static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM64* codegen) {
-  bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
   vixl::MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_;
 
   Register out = WRegisterFrom(locations->Out());                  // Boolean result.
@@ -1078,43 +1028,20 @@
   // result = tmp_value != 0;
 
   vixl::Label loop_head, exit_loop;
-  if (use_acquire_release) {
-    __ Bind(&loop_head);
-    // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
-    // the reference stored in the object before attempting the CAS,
-    // similar to the one in the art::Unsafe_compareAndSwapObject JNI
-    // implementation.
-    //
-    // Note that this code is not (yet) used when read barriers are
-    // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
-    DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
-    __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
-    __ Cmp(tmp_value, expected);
-    __ B(&exit_loop, ne);
-    __ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
-    __ Cbnz(tmp_32, &loop_head);
-  } else {
-    // Emit a `Dmb(InnerShareable, BarrierAll)` (DMB ISH) instruction
-    // instead of a `Dmb(InnerShareable, BarrierWrites)` (DMB ISHST)
-    // one, as the latter allows a preceding load to be delayed past
-    // the STXR instruction below.
-    __ Dmb(InnerShareable, BarrierAll);
-    __ Bind(&loop_head);
-    // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
-    // the reference stored in the object before attempting the CAS,
-    // similar to the one in the art::Unsafe_compareAndSwapObject JNI
-    // implementation.
-    //
-    // Note that this code is not (yet) used when read barriers are
-    // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
-    DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
-    __ Ldxr(tmp_value, MemOperand(tmp_ptr));
-    __ Cmp(tmp_value, expected);
-    __ B(&exit_loop, ne);
-    __ Stxr(tmp_32, value, MemOperand(tmp_ptr));
-    __ Cbnz(tmp_32, &loop_head);
-    __ Dmb(InnerShareable, BarrierAll);
-  }
+  __ Bind(&loop_head);
+  // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
+  // the reference stored in the object before attempting the CAS,
+  // similar to the one in the art::Unsafe_compareAndSwapObject JNI
+  // implementation.
+  //
+  // Note that this code is not (yet) used when read barriers are
+  // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
+  DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
+  __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
+  __ Cmp(tmp_value, expected);
+  __ B(&exit_loop, ne);
+  __ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
+  __ Cbnz(tmp_32, &loop_head);
   __ Bind(&exit_loop);
   __ Cset(out, eq);
 
@@ -1499,32 +1426,179 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
-static void GenSignum(LocationSummary* locations, bool is_long, vixl::MacroAssembler* masm) {
-  Location op1 = locations->InAt(0);
-  Location out = locations->Out();
+static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
+  DCHECK(Primitive::IsFloatingPointType(invoke->InputAt(0)->GetType()));
+  DCHECK(Primitive::IsFloatingPointType(invoke->GetType()));
 
-  Register op1_reg = is_long ? XRegisterFrom(op1) : WRegisterFrom(op1);
-  Register out_reg = WRegisterFrom(out);
+  LocationSummary* const locations = new (arena) LocationSummary(invoke,
+                                                                 LocationSummary::kCall,
+                                                                 kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
 
-  __ Cmp(op1_reg, 0);
-  __ Cset(out_reg, gt);           // out == +1 if GT or 0 otherwise
-  __ Cinv(out_reg, out_reg, lt);  // out == -1 if LT or unchanged otherwise
+  locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
+  locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
 }
 
-void IntrinsicLocationsBuilderARM64::VisitIntegerSignum(HInvoke* invoke) {
-  CreateIntToIntLocations(arena_, invoke);
+static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
+  DCHECK(Primitive::IsFloatingPointType(invoke->InputAt(0)->GetType()));
+  DCHECK(Primitive::IsFloatingPointType(invoke->InputAt(1)->GetType()));
+  DCHECK(Primitive::IsFloatingPointType(invoke->GetType()));
+
+  LocationSummary* const locations = new (arena) LocationSummary(invoke,
+                                                                 LocationSummary::kCall,
+                                                                 kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+
+  locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
+  locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
+  locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
 }
 
-void IntrinsicCodeGeneratorARM64::VisitIntegerSignum(HInvoke* invoke) {
-  GenSignum(invoke->GetLocations(), /* is_long */ false, GetVIXLAssembler());
+static void GenFPToFPCall(HInvoke* invoke,
+                          vixl::MacroAssembler* masm,
+                          CodeGeneratorARM64* codegen,
+                          QuickEntrypointEnum entry) {
+  __ Ldr(lr, MemOperand(tr, GetThreadOffset<kArm64WordSize>(entry).Int32Value()));
+  __ Blr(lr);
+  codegen->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
-void IntrinsicLocationsBuilderARM64::VisitLongSignum(HInvoke* invoke) {
-  CreateIntToIntLocations(arena_, invoke);
+void IntrinsicLocationsBuilderARM64::VisitMathCos(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
 }
 
-void IntrinsicCodeGeneratorARM64::VisitLongSignum(HInvoke* invoke) {
-  GenSignum(invoke->GetLocations(), /* is_long */ true,  GetVIXLAssembler());
+void IntrinsicCodeGeneratorARM64::VisitMathCos(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickCos);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathSin(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathSin(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickSin);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathAcos(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathAcos(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickAcos);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathAsin(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathAsin(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickAsin);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathAtan(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathAtan(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickAtan);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathCbrt(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathCbrt(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickCbrt);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathCosh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathCosh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickCosh);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathExp(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathExp(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickExp);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathExpm1(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathExpm1(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickExpm1);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathLog(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathLog(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickLog);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathLog10(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathLog10(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickLog10);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathSinh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathSinh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickSinh);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathTan(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathTan(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickTan);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathTanh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathTanh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickTanh);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathAtan2(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathAtan2(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickAtan2);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathHypot(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickHypot);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathNextAfter(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathNextAfter(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickNextAfter);
 }
 
 // Unimplemented intrinsics.
@@ -1542,24 +1616,6 @@
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 
-UNIMPLEMENTED_INTRINSIC(MathCos)
-UNIMPLEMENTED_INTRINSIC(MathSin)
-UNIMPLEMENTED_INTRINSIC(MathAcos)
-UNIMPLEMENTED_INTRINSIC(MathAsin)
-UNIMPLEMENTED_INTRINSIC(MathAtan)
-UNIMPLEMENTED_INTRINSIC(MathAtan2)
-UNIMPLEMENTED_INTRINSIC(MathCbrt)
-UNIMPLEMENTED_INTRINSIC(MathCosh)
-UNIMPLEMENTED_INTRINSIC(MathExp)
-UNIMPLEMENTED_INTRINSIC(MathExpm1)
-UNIMPLEMENTED_INTRINSIC(MathHypot)
-UNIMPLEMENTED_INTRINSIC(MathLog)
-UNIMPLEMENTED_INTRINSIC(MathLog10)
-UNIMPLEMENTED_INTRINSIC(MathNextAfter)
-UNIMPLEMENTED_INTRINSIC(MathSinh)
-UNIMPLEMENTED_INTRINSIC(MathTan)
-UNIMPLEMENTED_INTRINSIC(MathTanh)
-
 UNIMPLEMENTED_INTRINSIC(FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite)
 UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
@@ -1570,11 +1626,15 @@
 UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit)
 UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
 
-// Rotate operations are handled as HRor instructions.
+// Handled as HIR instructions.
 UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
-UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
 UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
 UNIMPLEMENTED_INTRINSIC(LongRotateRight)
+UNIMPLEMENTED_INTRINSIC(IntegerCompare)
+UNIMPLEMENTED_INTRINSIC(LongCompare)
+UNIMPLEMENTED_INTRINSIC(IntegerSignum)
+UNIMPLEMENTED_INTRINSIC(LongSignum)
 
 #undef UNIMPLEMENTED_INTRINSIC
 
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 0d9cf09..2294713 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -1019,12 +1019,14 @@
 UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
 UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 
-UNIMPLEMENTED_INTRINSIC(IntegerCompare)
-UNIMPLEMENTED_INTRINSIC(LongCompare)
 UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(LongHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit)
 UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
+
+// Handled as HIR instructions.
+UNIMPLEMENTED_INTRINSIC(IntegerCompare)
+UNIMPLEMENTED_INTRINSIC(LongCompare)
 UNIMPLEMENTED_INTRINSIC(IntegerSignum)
 UNIMPLEMENTED_INTRINSIC(LongSignum)
 
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index cba84fa..ac28503 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1429,8 +1429,7 @@
   __ LoadFromOffset(kLoadDoubleword,
                     TMP,
                     TR,
-                    QUICK_ENTRYPOINT_OFFSET(kMips64WordSize,
-                                            pStringCompareTo).Int32Value());
+                    QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, pStringCompareTo).Int32Value());
   __ Jalr(TMP);
   __ Nop();
   __ Bind(slow_path->GetExitLabel());
@@ -1583,7 +1582,7 @@
   __ LoadFromOffset(kLoadDoubleword,
                     TMP,
                     TR,
-                    QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, pIndexOf).Int32Value());
+                    QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, pIndexOf).Int32Value());
   __ Jalr(TMP);
   __ Nop();
 
@@ -1659,7 +1658,8 @@
   __ LoadFromOffset(kLoadDoubleword,
                     TMP,
                     TR,
-                    QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, pAllocStringFromBytes).Int32Value());
+                    QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize,
+                                            pAllocStringFromBytes).Int32Value());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Jalr(TMP);
   __ Nop();
@@ -1685,7 +1685,8 @@
   __ LoadFromOffset(kLoadDoubleword,
                     TMP,
                     TR,
-                    QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, pAllocStringFromChars).Int32Value());
+                    QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize,
+                                            pAllocStringFromChars).Int32Value());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Jalr(TMP);
   __ Nop();
@@ -1716,7 +1717,8 @@
   __ LoadFromOffset(kLoadDoubleword,
                     TMP,
                     TR,
-                    QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, pAllocStringFromString).Int32Value());
+                    QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize,
+                                            pAllocStringFromString).Int32Value());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Jalr(TMP);
   __ Nop();
@@ -1765,12 +1767,14 @@
 UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
 UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 
-UNIMPLEMENTED_INTRINSIC(IntegerCompare)
-UNIMPLEMENTED_INTRINSIC(LongCompare)
 UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(LongHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit)
 UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
+
+// Handled as HIR instructions.
+UNIMPLEMENTED_INTRINSIC(IntegerCompare)
+UNIMPLEMENTED_INTRINSIC(LongCompare)
 UNIMPLEMENTED_INTRINSIC(IntegerSignum)
 UNIMPLEMENTED_INTRINSIC(LongSignum)
 
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index acc40bc..ab4f6f9 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -37,10 +37,12 @@
 
 static constexpr int kDoubleNaNHigh = 0x7FF80000;
 static constexpr int kDoubleNaNLow = 0x00000000;
-static constexpr int kFloatNaN = 0x7FC00000;
+static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
+static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
 
 IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
-  : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
+  : arena_(codegen->GetGraph()->GetArena()),
+    codegen_(codegen) {
 }
 
 
@@ -256,15 +258,37 @@
                                                            LocationSummary::kNoCall,
                                                            kIntrinsified);
   locations->SetInAt(0, Location::RequiresFpuRegister());
-  // TODO: Allow x86 to work with memory. This requires assembler support, see below.
-  // locations->SetInAt(0, Location::Any());               // X86 can work on memory directly.
   locations->SetOut(Location::SameAsFirstInput());
+  HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
+  DCHECK(static_or_direct != nullptr);
+  if (invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
+    // We need addressibility for the constant area.
+    locations->SetInAt(1, Location::RequiresRegister());
+    // We need a temporary to hold the constant.
+    locations->AddTemp(Location::RequiresFpuRegister());
+  }
 }
 
-static void MathAbsFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
+static void MathAbsFP(LocationSummary* locations,
+                      bool is64bit,
+                      X86Assembler* assembler,
+                      CodeGeneratorX86* codegen) {
   Location output = locations->Out();
 
-  if (output.IsFpuRegister()) {
+  DCHECK(output.IsFpuRegister());
+  if (locations->InAt(1).IsValid()) {
+    DCHECK(locations->InAt(1).IsRegister());
+    // We also have a constant area pointer.
+    Register constant_area = locations->InAt(1).AsRegister<Register>();
+    XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+    if (is64bit) {
+      __ movsd(temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF), constant_area));
+      __ andpd(output.AsFpuRegister<XmmRegister>(), temp);
+    } else {
+      __ movss(temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF), constant_area));
+      __ andps(output.AsFpuRegister<XmmRegister>(), temp);
+    }
+  } else {
     // Create the right constant on an aligned stack.
     if (is64bit) {
       __ subl(ESP, Immediate(8));
@@ -277,19 +301,6 @@
       __ andps(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
     }
     __ addl(ESP, Immediate(16));
-  } else {
-    // TODO: update when assember support is available.
-    UNIMPLEMENTED(FATAL) << "Needs assembler support.";
-//  Once assembler support is available, in-memory operations look like this:
-//    if (is64bit) {
-//      DCHECK(output.IsDoubleStackSlot());
-//      __ andl(Address(Register(RSP), output.GetHighStackIndex(kX86WordSize)),
-//              Immediate(0x7FFFFFFF));
-//    } else {
-//      DCHECK(output.IsStackSlot());
-//      // Can use and with a literal directly.
-//      __ andl(Address(Register(RSP), output.GetStackIndex()), Immediate(0x7FFFFFFF));
-//    }
   }
 }
 
@@ -298,7 +309,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler(), codegen_);
 }
 
 void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) {
@@ -306,7 +317,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler(), codegen_);
 }
 
 static void CreateAbsIntLocation(ArenaAllocator* arena, HInvoke* invoke) {
@@ -388,8 +399,11 @@
   GenAbsLong(invoke->GetLocations(), GetAssembler());
 }
 
-static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
-                        X86Assembler* assembler) {
+static void GenMinMaxFP(LocationSummary* locations,
+                        bool is_min,
+                        bool is_double,
+                        X86Assembler* assembler,
+                        CodeGeneratorX86* codegen) {
   Location op1_loc = locations->InAt(0);
   Location op2_loc = locations->InAt(1);
   Location out_loc = locations->Out();
@@ -450,15 +464,26 @@
 
   // NaN handling.
   __ Bind(&nan);
-  if (is_double) {
-    __ pushl(Immediate(kDoubleNaNHigh));
-    __ pushl(Immediate(kDoubleNaNLow));
-    __ movsd(out, Address(ESP, 0));
-    __ addl(ESP, Immediate(8));
+  // Do we have a constant area pointer?
+  if (locations->InAt(2).IsValid()) {
+    DCHECK(locations->InAt(2).IsRegister());
+    Register constant_area = locations->InAt(2).AsRegister<Register>();
+    if (is_double) {
+      __ movsd(out, codegen->LiteralInt64Address(kDoubleNaN, constant_area));
+    } else {
+      __ movss(out, codegen->LiteralInt32Address(kFloatNaN, constant_area));
+    }
   } else {
-    __ pushl(Immediate(kFloatNaN));
-    __ movss(out, Address(ESP, 0));
-    __ addl(ESP, Immediate(4));
+    if (is_double) {
+      __ pushl(Immediate(kDoubleNaNHigh));
+      __ pushl(Immediate(kDoubleNaNLow));
+      __ movsd(out, Address(ESP, 0));
+      __ addl(ESP, Immediate(8));
+    } else {
+      __ pushl(Immediate(kFloatNaN));
+      __ movss(out, Address(ESP, 0));
+      __ addl(ESP, Immediate(4));
+    }
   }
   __ jmp(&done);
 
@@ -483,6 +508,11 @@
   // The following is sub-optimal, but all we can do for now. It would be fine to also accept
   // the second input to be the output (we can simply swap inputs).
   locations->SetOut(Location::SameAsFirstInput());
+  HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
+  DCHECK(static_or_direct != nullptr);
+  if (invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
+    locations->SetInAt(2, Location::RequiresRegister());
+  }
 }
 
 void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
@@ -490,7 +520,11 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(),
+              /* is_min */ true,
+              /* is_double */ true,
+              GetAssembler(),
+              codegen_);
 }
 
 void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) {
@@ -498,7 +532,11 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(),
+              /* is_min */ true,
+              /* is_double */ false,
+              GetAssembler(),
+              codegen_);
 }
 
 void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
@@ -506,7 +544,11 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(),
+              /* is_min */ false,
+              /* is_double */ true,
+              GetAssembler(),
+              codegen_);
 }
 
 void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
@@ -514,7 +556,11 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(),
+              /* is_min */ false,
+              /* is_double */ false,
+              GetAssembler(),
+              codegen_);
 }
 
 static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
@@ -2245,7 +2291,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
-  X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
+  X86Assembler* assembler = GetAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
   Register reg = locations->InAt(0).AsRegister<Register>();
@@ -2276,7 +2322,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
-  X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
+  X86Assembler* assembler = GetAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
   Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
@@ -2320,7 +2366,9 @@
   locations->SetOut(Location::RequiresRegister());
 }
 
-static void GenBitCount(X86Assembler* assembler, HInvoke* invoke, bool is_long) {
+static void GenBitCount(X86Assembler* assembler,
+                        CodeGeneratorX86* codegen,
+                        HInvoke* invoke, bool is_long) {
   LocationSummary* locations = invoke->GetLocations();
   Location src = locations->InAt(0);
   Register out = locations->Out().AsRegister<Register>();
@@ -2331,11 +2379,7 @@
     value = is_long
         ? POPCOUNT(static_cast<uint64_t>(value))
         : POPCOUNT(static_cast<uint32_t>(value));
-    if (value == 0) {
-      __ xorl(out, out);
-    } else {
-      __ movl(out, Immediate(value));
-    }
+    codegen->Load32BitValue(out, value);
     return;
   }
 
@@ -2367,7 +2411,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) {
-  GenBitCount(GetAssembler(), invoke, /* is_long */ false);
+  GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false);
 }
 
 void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
@@ -2375,7 +2419,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) {
-  GenBitCount(GetAssembler(), invoke, /* is_long */ true);
+  GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true);
 }
 
 static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) {
@@ -2390,7 +2434,9 @@
   locations->SetOut(Location::RequiresRegister());
 }
 
-static void GenLeadingZeros(X86Assembler* assembler, HInvoke* invoke, bool is_long) {
+static void GenLeadingZeros(X86Assembler* assembler,
+                            CodeGeneratorX86* codegen,
+                            HInvoke* invoke, bool is_long) {
   LocationSummary* locations = invoke->GetLocations();
   Location src = locations->InAt(0);
   Register out = locations->Out().AsRegister<Register>();
@@ -2403,11 +2449,7 @@
     } else {
       value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
     }
-    if (value == 0) {
-      __ xorl(out, out);
-    } else {
-      __ movl(out, Immediate(value));
-    }
+    codegen->Load32BitValue(out, value);
     return;
   }
 
@@ -2474,8 +2516,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
-  X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
-  GenLeadingZeros(assembler, invoke, /* is_long */ false);
+  GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
 }
 
 void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
@@ -2483,8 +2524,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
-  X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
-  GenLeadingZeros(assembler, invoke, /* is_long */ true);
+  GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
 }
 
 static void CreateTrailingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) {
@@ -2499,7 +2539,9 @@
   locations->SetOut(Location::RequiresRegister());
 }
 
-static void GenTrailingZeros(X86Assembler* assembler, HInvoke* invoke, bool is_long) {
+static void GenTrailingZeros(X86Assembler* assembler,
+                             CodeGeneratorX86* codegen,
+                             HInvoke* invoke, bool is_long) {
   LocationSummary* locations = invoke->GetLocations();
   Location src = locations->InAt(0);
   Register out = locations->Out().AsRegister<Register>();
@@ -2512,11 +2554,7 @@
     } else {
       value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
     }
-    if (value == 0) {
-      __ xorl(out, out);
-    } else {
-      __ movl(out, Immediate(value));
-    }
+    codegen->Load32BitValue(out, value);
     return;
   }
 
@@ -2570,8 +2608,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
-  X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
-  GenTrailingZeros(assembler, invoke, /* is_long */ false);
+  GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
 }
 
 void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
@@ -2579,8 +2616,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
-  X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
-  GenTrailingZeros(assembler, invoke, /* is_long */ true);
+  GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
 }
 
 // Unimplemented intrinsics.
@@ -2600,20 +2636,20 @@
 UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
 UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 
-UNIMPLEMENTED_INTRINSIC(IntegerCompare)
-UNIMPLEMENTED_INTRINSIC(LongCompare)
 UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(LongHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit)
 UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
-UNIMPLEMENTED_INTRINSIC(IntegerSignum)
-UNIMPLEMENTED_INTRINSIC(LongSignum)
 
-// Rotate operations are handled as HRor instructions.
+// Handled as HIR instructions.
 UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
+UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
 UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
 UNIMPLEMENTED_INTRINSIC(LongRotateRight)
-UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerCompare)
+UNIMPLEMENTED_INTRINSIC(LongCompare)
+UNIMPLEMENTED_INTRINSIC(IntegerSignum)
+UNIMPLEMENTED_INTRINSIC(LongSignum)
 
 #undef UNIMPLEMENTED_INTRINSIC
 
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 51fa514..c9a4344 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -2431,58 +2431,6 @@
   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true);
 }
 
-static void CreateCompareLocations(ArenaAllocator* arena, HInvoke* invoke) {
-  LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister());
-}
-
-static void GenCompare(X86_64Assembler* assembler, HInvoke* invoke, bool is_long) {
-  LocationSummary* locations = invoke->GetLocations();
-  CpuRegister src1 = locations->InAt(0).AsRegister<CpuRegister>();
-  CpuRegister src2 = locations->InAt(1).AsRegister<CpuRegister>();
-  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
-
-  NearLabel is_lt, done;
-
-  __ xorl(out, out);
-
-  if (is_long) {
-    __ cmpq(src1, src2);
-  } else {
-    __ cmpl(src1, src2);
-  }
-  __ j(kEqual, &done);
-  __ j(kLess, &is_lt);
-
-  __ movl(out, Immediate(1));
-  __ jmp(&done);
-
-  __ Bind(&is_lt);
-  __ movl(out, Immediate(-1));
-
-  __ Bind(&done);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitIntegerCompare(HInvoke* invoke) {
-  CreateCompareLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitIntegerCompare(HInvoke* invoke) {
-  GenCompare(GetAssembler(), invoke, /* is_long */ false);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitLongCompare(HInvoke* invoke) {
-  CreateCompareLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitLongCompare(HInvoke* invoke) {
-  GenCompare(GetAssembler(), invoke, /* is_long */ true);
-}
-
 static void CreateOneBitLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_high) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
                                                            LocationSummary::kNoCall,
@@ -2757,74 +2705,6 @@
   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
 }
 
-static void CreateSignLocations(ArenaAllocator* arena, HInvoke* invoke) {
-  LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::Any());
-  locations->SetOut(Location::RequiresRegister());
-  locations->AddTemp(Location::RequiresRegister());  // Need a writeable register.
-}
-
-static void GenSign(X86_64Assembler* assembler,
-                    CodeGeneratorX86_64* codegen,
-                    HInvoke* invoke, bool is_long) {
-  LocationSummary* locations = invoke->GetLocations();
-  Location src = locations->InAt(0);
-  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
-
-  if (invoke->InputAt(0)->IsConstant()) {
-    // Evaluate this at compile time.
-    int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
-    codegen->Load32BitValue(out, value == 0 ? 0 : (value > 0 ? 1 : -1));
-    return;
-  }
-
-  // Copy input into temporary.
-  CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
-  if (src.IsRegister()) {
-    if (is_long) {
-      __ movq(tmp, src.AsRegister<CpuRegister>());
-    } else {
-      __ movl(tmp, src.AsRegister<CpuRegister>());
-    }
-  } else if (is_long) {
-    DCHECK(src.IsDoubleStackSlot());
-    __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
-  } else {
-    DCHECK(src.IsStackSlot());
-    __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
-  }
-
-  // Do the bit twiddling: basically tmp >> 63/31 | -tmp >>> 63/31 for long/int.
-  if (is_long) {
-    __ movq(out, tmp);
-    __ sarq(out, Immediate(63));
-    __ negq(tmp);
-    __ shrq(tmp, Immediate(63));
-    __ orq(out, tmp);
-  } else {
-    __ movl(out, tmp);
-    __ sarl(out, Immediate(31));
-    __ negl(tmp);
-    __ shrl(tmp, Immediate(31));
-    __ orl(out, tmp);
-  }
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitIntegerSignum(HInvoke* invoke) {
-  CreateSignLocations(arena_, invoke);
-}
-void IntrinsicCodeGeneratorX86_64::VisitIntegerSignum(HInvoke* invoke) {
-  GenSign(GetAssembler(), codegen_, invoke, /* is_long */ false);
-}
-void IntrinsicLocationsBuilderX86_64::VisitLongSignum(HInvoke* invoke) {
-  CreateSignLocations(arena_, invoke);
-}
-void IntrinsicCodeGeneratorX86_64::VisitLongSignum(HInvoke* invoke) {
-  GenSign(GetAssembler(), codegen_, invoke, /* is_long */ true);
-}
-
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                   \
@@ -2840,11 +2720,15 @@
 UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
 UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
 
-// Rotate operations are handled as HRor instructions.
+// Handled as HIR instructions.
 UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
-UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
 UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
 UNIMPLEMENTED_INTRINSIC(LongRotateRight)
+UNIMPLEMENTED_INTRINSIC(IntegerCompare)
+UNIMPLEMENTED_INTRINSIC(LongCompare)
+UNIMPLEMENTED_INTRINSIC(IntegerSignum)
+UNIMPLEMENTED_INTRINSIC(LongSignum)
 
 #undef UNIMPLEMENTED_INTRINSIC
 
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index c4492c8..9a97f54 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -55,13 +55,13 @@
         is_singleton_and_not_returned_ = false;
         return;
       }
-      if (use->IsPhi() || use->IsInvoke() ||
+      if (use->IsPhi() || use->IsSelect() || use->IsInvoke() ||
           (use->IsInstanceFieldSet() && (reference_ == use->InputAt(1))) ||
           (use->IsUnresolvedInstanceFieldSet() && (reference_ == use->InputAt(1))) ||
           (use->IsStaticFieldSet() && (reference_ == use->InputAt(1))) ||
           (use->IsUnresolvedStaticFieldSet() && (reference_ == use->InputAt(0))) ||
           (use->IsArraySet() && (reference_ == use->InputAt(2)))) {
-        // reference_ is merged to a phi, passed to a callee, or stored to heap.
+        // reference_ is merged to a phi/HSelect, passed to a callee, or stored to heap.
         // reference_ isn't the only name that can refer to its value anymore.
         is_singleton_ = false;
         is_singleton_and_not_returned_ = false;
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index c057eca..f269885 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -647,6 +647,10 @@
       header_->GetGraph()->SetHasIrreducibleLoops(true);
       PopulateIrreducibleRecursive(back_edge);
     } else {
+      if (header_->GetGraph()->IsCompilingOsr()) {
+        irreducible_ = true;
+        header_->GetGraph()->SetHasIrreducibleLoops(true);
+      }
       PopulateRecursive(back_edge);
     }
   }
@@ -858,7 +862,6 @@
       // At the end of the loop pre-header, the corresponding value for instruction
       // is the first input of the phi.
       HInstruction* initial = instruction->AsPhi()->InputAt(0);
-      DCHECK(initial->GetBlock()->Dominates(loop_header));
       SetRawEnvAt(i, initial);
       initial->AddEnvUseAt(this, i);
     } else {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index b808347..daec096 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -274,6 +274,7 @@
          InstructionSet instruction_set,
          InvokeType invoke_type = kInvalidInvokeType,
          bool debuggable = false,
+         bool osr = false,
          int start_instruction_id = 0)
       : arena_(arena),
         blocks_(arena->Adapter(kArenaAllocBlockList)),
@@ -302,7 +303,8 @@
         cached_long_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)),
         cached_double_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)),
         cached_current_method_(nullptr),
-        inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()) {
+        inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()),
+        osr_(osr) {
     blocks_.reserve(kDefaultNumberOfBlocks);
   }
 
@@ -478,6 +480,8 @@
     return instruction_set_;
   }
 
+  bool IsCompilingOsr() const { return osr_; }
+
   bool HasTryCatch() const { return has_try_catch_; }
   void SetHasTryCatch(bool value) { has_try_catch_ = value; }
 
@@ -606,6 +610,11 @@
   // collection pointer to passes which may create NullConstant.
   ReferenceTypeInfo inexact_object_rti_;
 
+  // Whether we are compiling this graph for on stack replacement: this will
+  // make all loops seen as irreducible and emit special stack maps to mark
+  // compiled code entries which the interpreter can directly jump to.
+  const bool osr_;
+
   friend class SsaBuilder;           // For caching constants.
   friend class SsaLivenessAnalysis;  // For the linear order.
   ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1);
@@ -1259,6 +1268,7 @@
 #define FOR_EACH_CONCRETE_INSTRUCTION_X86(M)                            \
   M(X86ComputeBaseMethodAddress, Instruction)                           \
   M(X86LoadFromConstantTable, Instruction)                              \
+  M(X86FPNeg, Instruction)                                              \
   M(X86PackedSwitch, Instruction)
 #endif
 
@@ -6040,6 +6050,74 @@
   FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
 #undef INSTRUCTION_TYPE_CHECK
 
+class SwitchTable : public ValueObject {
+ public:
+  SwitchTable(const Instruction& instruction, uint32_t dex_pc, bool sparse)
+      : instruction_(instruction), dex_pc_(dex_pc), sparse_(sparse) {
+    int32_t table_offset = instruction.VRegB_31t();
+    const uint16_t* table = reinterpret_cast<const uint16_t*>(&instruction) + table_offset;
+    if (sparse) {
+      CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kSparseSwitchSignature));
+    } else {
+      CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kPackedSwitchSignature));
+    }
+    num_entries_ = table[1];
+    values_ = reinterpret_cast<const int32_t*>(&table[2]);
+  }
+
+  uint16_t GetNumEntries() const {
+    return num_entries_;
+  }
+
+  void CheckIndex(size_t index) const {
+    if (sparse_) {
+      // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
+      DCHECK_LT(index, 2 * static_cast<size_t>(num_entries_));
+    } else {
+      // In a packed table, we have the starting key and num_entries_ values.
+      DCHECK_LT(index, 1 + static_cast<size_t>(num_entries_));
+    }
+  }
+
+  int32_t GetEntryAt(size_t index) const {
+    CheckIndex(index);
+    return values_[index];
+  }
+
+  uint32_t GetDexPcForIndex(size_t index) const {
+    CheckIndex(index);
+    return dex_pc_ +
+        (reinterpret_cast<const int16_t*>(values_ + index) -
+         reinterpret_cast<const int16_t*>(&instruction_));
+  }
+
+  // Index of the first value in the table.
+  size_t GetFirstValueIndex() const {
+    if (sparse_) {
+      // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
+      return num_entries_;
+    } else {
+      // In a packed table, we have the starting key and num_entries_ values.
+      return 1;
+    }
+  }
+
+ private:
+  const Instruction& instruction_;
+  const uint32_t dex_pc_;
+
+  // Whether this is a sparse-switch table (or a packed-switch one).
+  const bool sparse_;
+
+  // This can't be const as it needs to be computed off of the given instruction, and complicated
+  // expressions in the initializer list seemed very ugly.
+  uint16_t num_entries_;
+
+  const int32_t* values_;
+
+  DISALLOW_COPY_AND_ASSIGN(SwitchTable);
+};
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_H_
diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h
index b1bf939..0b3a84d 100644
--- a/compiler/optimizing/nodes_x86.h
+++ b/compiler/optimizing/nodes_x86.h
@@ -56,6 +56,25 @@
   DISALLOW_COPY_AND_ASSIGN(HX86LoadFromConstantTable);
 };
 
+// Version of HNeg with access to the constant table for FP types.
+class HX86FPNeg : public HExpression<2> {
+ public:
+  HX86FPNeg(Primitive::Type result_type,
+            HInstruction* input,
+            HX86ComputeBaseMethodAddress* method_base,
+            uint32_t dex_pc)
+      : HExpression(result_type, SideEffects::None(), dex_pc) {
+    DCHECK(Primitive::IsFloatingPointType(result_type));
+    SetRawInputAt(0, input);
+    SetRawInputAt(1, method_base);
+  }
+
+  DECLARE_INSTRUCTION(X86FPNeg);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HX86FPNeg);
+};
+
 // X86 version of HPackedSwitch that holds a pointer to the base method address.
 class HX86PackedSwitch : public HTemplateInstruction<2> {
  public:
diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc
index de85729..fc66823 100644
--- a/compiler/optimizing/optimizing_cfi_test_expected.inc
+++ b/compiler/optimizing/optimizing_cfi_test_expected.inc
@@ -198,8 +198,9 @@
 };
 static constexpr uint8_t expected_cfi_kMips64[] = {
     0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06,
-    0x4C, 0x0E, 0x40, 0x44, 0x0A, 0x44, 0x0E, 0x28, 0x4C, 0xD0, 0x44, 0xD1,
-    0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+    0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x44, 0x0E, 0x40, 0x44, 0x0A, 0x44,
+    0x0E, 0x28, 0x44, 0xF8, 0x44, 0xF9, 0x44, 0xD0, 0x44, 0xD1, 0x44, 0xDF,
+    0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
 };
 // 0x00000000: daddiu r29, r29, -40
 // 0x00000004: .cfi_def_cfa_offset: 40
@@ -210,7 +211,9 @@
 // 0x0000000c: sd r16, +16(r29)
 // 0x00000010: .cfi_offset: r16 at cfa-24
 // 0x00000010: sdc1 f25, +8(r29)
+// 0x00000014: .cfi_offset: r57 at cfa-32
 // 0x00000014: sdc1 f24, +0(r29)
+// 0x00000018: .cfi_offset: r56 at cfa-40
 // 0x00000018: daddiu r29, r29, -24
 // 0x0000001c: .cfi_def_cfa_offset: 64
 // 0x0000001c: sd r4, +0(r29)
@@ -218,7 +221,9 @@
 // 0x00000020: daddiu r29, r29, 24
 // 0x00000024: .cfi_def_cfa_offset: 40
 // 0x00000024: ldc1 f24, +0(r29)
+// 0x00000028: .cfi_restore: r56
 // 0x00000028: ldc1 f25, +8(r29)
+// 0x0000002c: .cfi_restore: r57
 // 0x0000002c: ld r16, +16(r29)
 // 0x00000030: .cfi_restore: r16
 // 0x00000030: ld r17, +24(r29)
@@ -427,9 +432,9 @@
 };
 static constexpr uint8_t expected_cfi_kMips64_adjust[] = {
     0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06,
-    0x4C, 0x0E, 0x40, 0x04, 0x14, 0x00, 0x02, 0x00, 0x0A, 0x44, 0x0E, 0x28,
-    0x4C, 0xD0, 0x44, 0xD1, 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E,
-    0x40,
+    0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x44, 0x0E, 0x40, 0x04, 0x14, 0x00,
+    0x02, 0x00, 0x0A, 0x44, 0x0E, 0x28, 0x44, 0xF8, 0x44, 0xF9, 0x44, 0xD0,
+    0x44, 0xD1, 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
 };
 // 0x00000000: daddiu r29, r29, -40
 // 0x00000004: .cfi_def_cfa_offset: 40
@@ -440,7 +445,9 @@
 // 0x0000000c: sd r16, +16(r29)
 // 0x00000010: .cfi_offset: r16 at cfa-24
 // 0x00000010: sdc1 f25, +8(r29)
+// 0x00000014: .cfi_offset: r57 at cfa-32
 // 0x00000014: sdc1 f24, +0(r29)
+// 0x00000018: .cfi_offset: r56 at cfa-40
 // 0x00000018: daddiu r29, r29, -24
 // 0x0000001c: .cfi_def_cfa_offset: 64
 // 0x0000001c: sd r4, +0(r29)
@@ -454,7 +461,9 @@
 // 0x00020030: daddiu r29, r29, 24
 // 0x00020034: .cfi_def_cfa_offset: 40
 // 0x00020034: ldc1 f24, +0(r29)
+// 0x00020038: .cfi_restore: r56
 // 0x00020038: ldc1 f25, +8(r29)
+// 0x0002003c: .cfi_restore: r57
 // 0x0002003c: ld r16, +16(r29)
 // 0x00020040: .cfi_restore: r16
 // 0x00020040: ld r17, +24(r29)
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index bdc664b..736ac32 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -300,7 +300,7 @@
     }
   }
 
-  bool JitCompile(Thread* self, jit::JitCodeCache* code_cache, ArtMethod* method)
+  bool JitCompile(Thread* self, jit::JitCodeCache* code_cache, ArtMethod* method, bool osr)
       OVERRIDE
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -309,7 +309,8 @@
   CompiledMethod* Emit(ArenaAllocator* arena,
                        CodeVectorAllocator* code_allocator,
                        CodeGenerator* codegen,
-                       CompilerDriver* driver) const;
+                       CompilerDriver* driver,
+                       const DexFile::CodeItem* item) const;
 
   // Try compiling a method and return the code generator used for
   // compiling it.
@@ -327,7 +328,8 @@
                             uint32_t method_idx,
                             jobject class_loader,
                             const DexFile& dex_file,
-                            Handle<mirror::DexCache> dex_cache) const;
+                            Handle<mirror::DexCache> dex_cache,
+                            bool osr) const;
 
   std::unique_ptr<OptimizingCompilerStats> compilation_stats_;
 
@@ -580,11 +582,12 @@
 CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* arena,
                                          CodeVectorAllocator* code_allocator,
                                          CodeGenerator* codegen,
-                                         CompilerDriver* compiler_driver) const {
+                                         CompilerDriver* compiler_driver,
+                                         const DexFile::CodeItem* code_item) const {
   ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
   ArenaVector<uint8_t> stack_map(arena->Adapter(kArenaAllocStackMaps));
   stack_map.resize(codegen->ComputeStackMapsSize());
-  codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size()));
+  codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size()), *code_item);
 
   CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
       compiler_driver,
@@ -615,7 +618,8 @@
                                               uint32_t method_idx,
                                               jobject class_loader,
                                               const DexFile& dex_file,
-                                              Handle<mirror::DexCache> dex_cache) const {
+                                              Handle<mirror::DexCache> dex_cache,
+                                              bool osr) const {
   MaybeRecordStat(MethodCompilationStat::kAttemptCompilation);
   CompilerDriver* compiler_driver = GetCompilerDriver();
   InstructionSet instruction_set = compiler_driver->GetInstructionSet();
@@ -663,8 +667,14 @@
                                                      dex_compilation_unit.GetDexFile(),
                                                      dex_compilation_unit.GetClassDefIndex());
   HGraph* graph = new (arena) HGraph(
-      arena, dex_file, method_idx, requires_barrier, compiler_driver->GetInstructionSet(),
-      kInvalidInvokeType, compiler_driver->GetCompilerOptions().GetDebuggable());
+      arena,
+      dex_file,
+      method_idx,
+      requires_barrier,
+      compiler_driver->GetInstructionSet(),
+      kInvalidInvokeType,
+      compiler_driver->GetCompilerOptions().GetDebuggable(),
+      osr);
 
   std::unique_ptr<CodeGenerator> codegen(
       CodeGenerator::Create(graph,
@@ -797,10 +807,11 @@
                    method_idx,
                    jclass_loader,
                    dex_file,
-                   dex_cache));
+                   dex_cache,
+                   /* osr */ false));
     if (codegen.get() != nullptr) {
       MaybeRecordStat(MethodCompilationStat::kCompiled);
-      method = Emit(&arena, &code_allocator, codegen.get(), compiler_driver);
+      method = Emit(&arena, &code_allocator, codegen.get(), compiler_driver, code_item);
     }
   } else {
     if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) {
@@ -843,7 +854,8 @@
 
 bool OptimizingCompiler::JitCompile(Thread* self,
                                     jit::JitCodeCache* code_cache,
-                                    ArtMethod* method) {
+                                    ArtMethod* method,
+                                    bool osr) {
   StackHandleScope<2> hs(self);
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
       method->GetDeclaringClass()->GetClassLoader()));
@@ -873,7 +885,8 @@
                    method_idx,
                    jclass_loader,
                    *dex_file,
-                   dex_cache));
+                   dex_cache,
+                   osr));
     if (codegen.get() == nullptr) {
       return false;
     }
@@ -885,7 +898,7 @@
     return false;
   }
   MaybeRecordStat(MethodCompilationStat::kCompiled);
-  codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size));
+  codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size), *code_item);
   const void* code = code_cache->CommitCode(
       self,
       method,
@@ -896,7 +909,8 @@
       codegen->GetCoreSpillMask(),
       codegen->GetFpuSpillMask(),
       code_allocator.GetMemory().data(),
-      code_allocator.GetSize());
+      code_allocator.GetSize(),
+      osr);
 
   if (code == nullptr) {
     code_cache->ClearData(self, stack_map_data);
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index a2180bc..a6f1461 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -53,6 +53,10 @@
     BinaryFP(div);
   }
 
+  void VisitCompare(HCompare* compare) OVERRIDE {
+    BinaryFP(compare);
+  }
+
   void VisitReturn(HReturn* ret) OVERRIDE {
     HConstant* value = ret->InputAt(0)->AsConstant();
     if ((value != nullptr && Primitive::IsFloatingPointType(value->GetType()))) {
@@ -74,11 +78,50 @@
 
   void BinaryFP(HBinaryOperation* bin) {
     HConstant* rhs = bin->InputAt(1)->AsConstant();
-    if (rhs != nullptr && Primitive::IsFloatingPointType(bin->GetResultType())) {
+    if (rhs != nullptr && Primitive::IsFloatingPointType(rhs->GetType())) {
       ReplaceInput(bin, rhs, 1, false);
     }
   }
 
+  void VisitEqual(HEqual* cond) OVERRIDE {
+    BinaryFP(cond);
+  }
+
+  void VisitNotEqual(HNotEqual* cond) OVERRIDE {
+    BinaryFP(cond);
+  }
+
+  void VisitLessThan(HLessThan* cond) OVERRIDE {
+    BinaryFP(cond);
+  }
+
+  void VisitLessThanOrEqual(HLessThanOrEqual* cond) OVERRIDE {
+    BinaryFP(cond);
+  }
+
+  void VisitGreaterThan(HGreaterThan* cond) OVERRIDE {
+    BinaryFP(cond);
+  }
+
+  void VisitGreaterThanOrEqual(HGreaterThanOrEqual* cond) OVERRIDE {
+    BinaryFP(cond);
+  }
+
+  void VisitNeg(HNeg* neg) OVERRIDE {
+    if (Primitive::IsFloatingPointType(neg->GetType())) {
+      // We need to replace the HNeg with a HX86FPNeg in order to address the constant area.
+      InitializePCRelativeBasePointer();
+      HGraph* graph = GetGraph();
+      HBasicBlock* block = neg->GetBlock();
+      HX86FPNeg* x86_fp_neg = new (graph->GetArena()) HX86FPNeg(
+          neg->GetType(),
+          neg->InputAt(0),
+          base_,
+          neg->GetDexPc());
+      block->ReplaceAndRemoveInstructionWith(neg, x86_fp_neg);
+    }
+  }
+
   void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE {
     if (switch_insn->GetNumEntries() <=
         InstructionCodeGeneratorX86::kPackedSwitchJumpTableThreshold) {
@@ -127,12 +170,23 @@
     // If this is an invoke-static/-direct with PC-relative dex cache array
     // addressing, we need the PC-relative address base.
     HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
+    // We can't add a pointer to the constant area if we already have a current
+    // method pointer. This may arise when sharpening doesn't remove the current
+    // method pointer from the invoke.
+    if (invoke_static_or_direct != nullptr &&
+        invoke_static_or_direct->HasCurrentMethodInput()) {
+      DCHECK(!invoke_static_or_direct->HasPcRelativeDexCache());
+      return;
+    }
+
+    bool base_added = false;
     if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasPcRelativeDexCache()) {
       InitializePCRelativeBasePointer();
       // Add the extra parameter base_.
-      DCHECK(!invoke_static_or_direct->HasCurrentMethodInput());
       invoke_static_or_direct->AddSpecialInput(base_);
+      base_added = true;
     }
+
     // Ensure that we can load FP arguments from the constant area.
     for (size_t i = 0, e = invoke->InputCount(); i < e; i++) {
       HConstant* input = invoke->InputAt(i)->AsConstant();
@@ -140,6 +194,25 @@
         ReplaceInput(invoke, input, i, true);
       }
     }
+
+    // These intrinsics need the constant area.
+    switch (invoke->GetIntrinsic()) {
+      case Intrinsics::kMathAbsDouble:
+      case Intrinsics::kMathAbsFloat:
+      case Intrinsics::kMathMaxDoubleDouble:
+      case Intrinsics::kMathMaxFloatFloat:
+      case Intrinsics::kMathMinDoubleDouble:
+      case Intrinsics::kMathMinFloatFloat:
+        if (!base_added) {
+          DCHECK(invoke_static_or_direct != nullptr);
+          DCHECK(!invoke_static_or_direct->HasCurrentMethodInput());
+          InitializePCRelativeBasePointer();
+          invoke_static_or_direct->AddSpecialInput(base_);
+        }
+        break;
+      default:
+        break;
+    }
   }
 
   // The generated HX86ComputeBaseMethodAddress in the entry block needed as an
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 7ed3c84..1dd3508 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -174,6 +174,38 @@
   ComputeLiveInAndLiveOutSets();
 }
 
+static void RecursivelyProcessInputs(HInstruction* current,
+                                     HInstruction* actual_user,
+                                     BitVector* live_in) {
+  for (size_t i = 0, e = current->InputCount(); i < e; ++i) {
+    HInstruction* input = current->InputAt(i);
+    bool has_in_location = current->GetLocations()->InAt(i).IsValid();
+    bool has_out_location = input->GetLocations()->Out().IsValid();
+
+    if (has_in_location) {
+      DCHECK(has_out_location)
+          << "Instruction " << current->DebugName() << current->GetId()
+          << " expects an input value at index " << i << " but "
+          << input->DebugName() << input->GetId() << " does not produce one.";
+      DCHECK(input->HasSsaIndex());
+      // `input` generates a result used by `current`. Add use and update
+      // the live-in set.
+      input->GetLiveInterval()->AddUse(current, /* environment */ nullptr, i, actual_user);
+      live_in->SetBit(input->GetSsaIndex());
+    } else if (has_out_location) {
+      // `input` generates a result but it is not used by `current`.
+    } else {
+      // `input` is inlined into `current`. Walk over its inputs and record
+      // uses at `current`.
+      DCHECK(input->IsEmittedAtUseSite());
+      // Check that the inlined input is not a phi. Recursing on loop phis could
+      // lead to an infinite loop.
+      DCHECK(!input->IsPhi());
+      RecursivelyProcessInputs(input, actual_user, live_in);
+    }
+  }
+}
+
 void SsaLivenessAnalysis::ComputeLiveRanges() {
   // Do a post order visit, adding inputs of instructions live in the block where
   // that instruction is defined, and killing instructions that are being visited.
@@ -261,35 +293,7 @@
           DCHECK(!current->HasEnvironmentUses());
         }
       } else {
-        for (size_t i = 0, e = current->InputCount(); i < e; ++i) {
-          HInstruction* input = current->InputAt(i);
-          bool has_in_location = current->GetLocations()->InAt(i).IsValid();
-          bool has_out_location = input->GetLocations()->Out().IsValid();
-
-          if (has_in_location) {
-            DCHECK(has_out_location);
-            DCHECK(input->HasSsaIndex());
-            // `Input` generates a result used by `current`. Add use and update
-            // the live-in set.
-            input->GetLiveInterval()->AddUse(current, /* environment */ nullptr, i);
-            live_in->SetBit(input->GetSsaIndex());
-          } else if (has_out_location) {
-            // `Input` generates a result but it is not used by `current`.
-          } else {
-            // `Input` is inlined into `current`. Walk over its inputs and record
-            // uses at `current`.
-            DCHECK(input->IsEmittedAtUseSite());
-            for (size_t i2 = 0, e2 = input->InputCount(); i2 < e2; ++i2) {
-              HInstruction* inlined_input = input->InputAt(i2);
-              DCHECK(inlined_input->HasSsaIndex()) << "Recursive inlining not allowed.";
-              if (input->GetLocations()->InAt(i2).IsValid()) {
-                live_in->SetBit(inlined_input->GetSsaIndex());
-                inlined_input->GetLiveInterval()->AddUse(
-                    /* owner */ input, /* environment */ nullptr, i2, /* actual_user */ current);
-              }
-            }
-          }
-        }
+        RecursivelyProcessInputs(current, current, live_in);
       }
     }
 
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index f9ff2df..ab480ca 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -300,10 +300,17 @@
   EmitRtd(0x1f, rt, rd, 0x5, 0x24);
 }
 
-void Mips64Assembler::Dext(GpuRegister rt, GpuRegister rs, int pos, int size_less_one) {
-  DCHECK(0 <= pos && pos < 32) << pos;
-  DCHECK(0 <= size_less_one && size_less_one < 32) << size_less_one;
-  EmitR(0x1f, rs, rt, static_cast<GpuRegister>(size_less_one), pos, 3);
+void Mips64Assembler::Dext(GpuRegister rt, GpuRegister rs, int pos, int size) {
+  CHECK(IsUint<5>(pos)) << pos;
+  CHECK(IsUint<5>(size - 1)) << size;
+  EmitR(0x1f, rs, rt, static_cast<GpuRegister>(size - 1), pos, 0x3);
+}
+
+void Mips64Assembler::Dinsu(GpuRegister rt, GpuRegister rs, int pos, int size) {
+  CHECK(IsUint<5>(pos - 32)) << pos;
+  CHECK(IsUint<5>(size - 1)) << size;
+  CHECK(IsUint<5>(pos + size - 33)) << pos << " + " << size;
+  EmitR(0x1f, rs, rt, static_cast<GpuRegister>(pos + size - 33), pos - 32, 0x6);
 }
 
 void Mips64Assembler::Wsbh(GpuRegister rd, GpuRegister rt) {
@@ -311,22 +318,22 @@
 }
 
 void Mips64Assembler::Sc(GpuRegister rt, GpuRegister base, int16_t imm9) {
-  DCHECK((-256 <= imm9) && (imm9 < 256));
+  CHECK(IsInt<9>(imm9));
   EmitI(0x1f, base, rt, ((imm9 & 0x1FF) << 7) | 0x26);
 }
 
 void Mips64Assembler::Scd(GpuRegister rt, GpuRegister base, int16_t imm9) {
-  DCHECK((-256 <= imm9) && (imm9 < 256));
+  CHECK(IsInt<9>(imm9));
   EmitI(0x1f, base, rt, ((imm9 & 0x1FF) << 7) | 0x27);
 }
 
 void Mips64Assembler::Ll(GpuRegister rt, GpuRegister base, int16_t imm9) {
-  DCHECK((-256 <= imm9) && (imm9 < 256));
+  CHECK(IsInt<9>(imm9));
   EmitI(0x1f, base, rt, ((imm9 & 0x1FF) << 7) | 0x36);
 }
 
 void Mips64Assembler::Lld(GpuRegister rt, GpuRegister base, int16_t imm9) {
-  DCHECK((-256 <= imm9) && (imm9 < 256));
+  CHECK(IsInt<9>(imm9));
   EmitI(0x1f, base, rt, ((imm9 & 0x1FF) << 7) | 0x37);
 }
 
@@ -967,10 +974,18 @@
   EmitFR(0x11, 0x00, static_cast<FpuRegister>(rt), fs, static_cast<FpuRegister>(0), 0x0);
 }
 
+void Mips64Assembler::Mfhc1(GpuRegister rt, FpuRegister fs) {
+  EmitFR(0x11, 0x03, static_cast<FpuRegister>(rt), fs, static_cast<FpuRegister>(0), 0x0);
+}
+
 void Mips64Assembler::Mtc1(GpuRegister rt, FpuRegister fs) {
   EmitFR(0x11, 0x04, static_cast<FpuRegister>(rt), fs, static_cast<FpuRegister>(0), 0x0);
 }
 
+void Mips64Assembler::Mthc1(GpuRegister rt, FpuRegister fs) {
+  EmitFR(0x11, 0x07, static_cast<FpuRegister>(rt), fs, static_cast<FpuRegister>(0), 0x0);
+}
+
 void Mips64Assembler::Dmfc1(GpuRegister rt, FpuRegister fs) {
   EmitFR(0x11, 0x01, static_cast<FpuRegister>(rt), fs, static_cast<FpuRegister>(0), 0x0);
 }
@@ -1787,11 +1802,13 @@
 
 void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base,
                                      int32_t offset) {
-  if (!IsInt<16>(offset)) {
-    LoadConst32(AT, offset);
+  if (!IsInt<16>(offset) ||
+      (type == kLoadDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
+       !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
+    LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
     Daddu(AT, AT, base);
     base = AT;
-    offset = 0;
+    offset &= (kMips64DoublewordSize - 1);
   }
 
   switch (type) {
@@ -1808,32 +1825,51 @@
       Lhu(reg, base, offset);
       break;
     case kLoadWord:
+      CHECK_ALIGNED(offset, kMips64WordSize);
       Lw(reg, base, offset);
       break;
     case kLoadUnsignedWord:
+      CHECK_ALIGNED(offset, kMips64WordSize);
       Lwu(reg, base, offset);
       break;
     case kLoadDoubleword:
-      Ld(reg, base, offset);
+      if (!IsAligned<kMips64DoublewordSize>(offset)) {
+        CHECK_ALIGNED(offset, kMips64WordSize);
+        Lwu(reg, base, offset);
+        Lwu(TMP2, base, offset + kMips64WordSize);
+        Dinsu(reg, TMP2, 32, 32);
+      } else {
+        Ld(reg, base, offset);
+      }
       break;
   }
 }
 
 void Mips64Assembler::LoadFpuFromOffset(LoadOperandType type, FpuRegister reg, GpuRegister base,
                                         int32_t offset) {
-  if (!IsInt<16>(offset)) {
-    LoadConst32(AT, offset);
+  if (!IsInt<16>(offset) ||
+      (type == kLoadDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
+       !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
+    LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
     Daddu(AT, AT, base);
     base = AT;
-    offset = 0;
+    offset &= (kMips64DoublewordSize - 1);
   }
 
   switch (type) {
     case kLoadWord:
+      CHECK_ALIGNED(offset, kMips64WordSize);
       Lwc1(reg, base, offset);
       break;
     case kLoadDoubleword:
-      Ldc1(reg, base, offset);
+      if (!IsAligned<kMips64DoublewordSize>(offset)) {
+        CHECK_ALIGNED(offset, kMips64WordSize);
+        Lwc1(reg, base, offset);
+        Lw(TMP2, base, offset + kMips64WordSize);
+        Mthc1(TMP2, reg);
+      } else {
+        Ldc1(reg, base, offset);
+      }
       break;
     default:
       LOG(FATAL) << "UNREACHABLE";
@@ -1869,11 +1905,13 @@
 
 void Mips64Assembler::StoreToOffset(StoreOperandType type, GpuRegister reg, GpuRegister base,
                                     int32_t offset) {
-  if (!IsInt<16>(offset)) {
-    LoadConst32(AT, offset);
+  if (!IsInt<16>(offset) ||
+      (type == kStoreDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
+       !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
+    LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
     Daddu(AT, AT, base);
     base = AT;
-    offset = 0;
+    offset &= (kMips64DoublewordSize - 1);
   }
 
   switch (type) {
@@ -1884,10 +1922,18 @@
       Sh(reg, base, offset);
       break;
     case kStoreWord:
+      CHECK_ALIGNED(offset, kMips64WordSize);
       Sw(reg, base, offset);
       break;
     case kStoreDoubleword:
-      Sd(reg, base, offset);
+      if (!IsAligned<kMips64DoublewordSize>(offset)) {
+        CHECK_ALIGNED(offset, kMips64WordSize);
+        Sw(reg, base, offset);
+        Dsrl32(TMP2, reg, 0);
+        Sw(TMP2, base, offset + kMips64WordSize);
+      } else {
+        Sd(reg, base, offset);
+      }
       break;
     default:
       LOG(FATAL) << "UNREACHABLE";
@@ -1896,19 +1942,29 @@
 
 void Mips64Assembler::StoreFpuToOffset(StoreOperandType type, FpuRegister reg, GpuRegister base,
                                        int32_t offset) {
-  if (!IsInt<16>(offset)) {
-    LoadConst32(AT, offset);
+  if (!IsInt<16>(offset) ||
+      (type == kStoreDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
+       !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
+    LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
     Daddu(AT, AT, base);
     base = AT;
-    offset = 0;
+    offset &= (kMips64DoublewordSize - 1);
   }
 
   switch (type) {
     case kStoreWord:
+      CHECK_ALIGNED(offset, kMips64WordSize);
       Swc1(reg, base, offset);
       break;
     case kStoreDoubleword:
-      Sdc1(reg, base, offset);
+      if (!IsAligned<kMips64DoublewordSize>(offset)) {
+        CHECK_ALIGNED(offset, kMips64WordSize);
+        Mfhc1(TMP2, reg);
+        Swc1(reg, base, offset);
+        Sw(TMP2, base, offset + kMips64WordSize);
+      } else {
+        Sdc1(reg, base, offset);
+      }
       break;
     default:
       LOG(FATAL) << "UNREACHABLE";
@@ -2053,7 +2109,7 @@
   StoreToOffset(kStoreWord, scratch.AsGpuRegister(), SP, dest.Int32Value());
 }
 
-void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs,
+void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs,
                                                  FrameOffset fr_offs,
                                                  ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
@@ -2062,7 +2118,7 @@
   StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, thr_offs.Int32Value());
 }
 
-void Mips64Assembler::StoreStackPointerToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs) {
+void Mips64Assembler::StoreStackPointerToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs) {
   StoreToOffset(kStoreDoubleword, SP, S1, thr_offs.Int32Value());
 }
 
@@ -2080,7 +2136,7 @@
 }
 
 void Mips64Assembler::LoadFromThread64(ManagedRegister mdest,
-                                       ThreadOffset<kMipsDoublewordSize> src,
+                                       ThreadOffset<kMips64DoublewordSize> src,
                                        size_t size) {
   return EmitLoad(mdest, S1, src.Int32Value(), size);
 }
@@ -2102,7 +2158,7 @@
     // Negate the 32-bit ref
     Dsubu(dest.AsGpuRegister(), ZERO, dest.AsGpuRegister());
     // And constrain it to 32 bits (zero-extend into bits 32 through 63) as on Arm64 and x86/64
-    Dext(dest.AsGpuRegister(), dest.AsGpuRegister(), 0, 31);
+    Dext(dest.AsGpuRegister(), dest.AsGpuRegister(), 0, 32);
   }
 }
 
@@ -2115,7 +2171,7 @@
 }
 
 void Mips64Assembler::LoadRawPtrFromThread64(ManagedRegister mdest,
-                                             ThreadOffset<kMipsDoublewordSize> offs) {
+                                             ThreadOffset<kMips64DoublewordSize> offs) {
   Mips64ManagedRegister dest = mdest.AsMips64();
   CHECK(dest.IsGpuRegister());
   LoadFromOffset(kLoadDoubleword, dest.AsGpuRegister(), S1, offs.Int32Value());
@@ -2160,7 +2216,7 @@
 }
 
 void Mips64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs,
-                                             ThreadOffset<kMipsDoublewordSize> thr_offs,
+                                             ThreadOffset<kMips64DoublewordSize> thr_offs,
                                              ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
   CHECK(scratch.IsGpuRegister()) << scratch;
@@ -2168,7 +2224,7 @@
   StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), SP, fr_offs.Int32Value());
 }
 
-void Mips64Assembler::CopyRawPtrToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs,
+void Mips64Assembler::CopyRawPtrToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs,
                                            FrameOffset fr_offs,
                                            ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
@@ -2372,7 +2428,7 @@
   // TODO: place reference map on call
 }
 
-void Mips64Assembler::CallFromThread64(ThreadOffset<kMipsDoublewordSize> offset ATTRIBUTE_UNUSED,
+void Mips64Assembler::CallFromThread64(ThreadOffset<kMips64DoublewordSize> offset ATTRIBUTE_UNUSED,
                                        ManagedRegister mscratch ATTRIBUTE_UNUSED) {
   UNIMPLEMENTED(FATAL) << "No MIPS64 implementation";
 }
@@ -2392,7 +2448,7 @@
   LoadFromOffset(kLoadDoubleword,
                  scratch.AsGpuRegister(),
                  S1,
-                 Thread::ExceptionOffset<kMipsDoublewordSize>().Int32Value());
+                 Thread::ExceptionOffset<kMips64DoublewordSize>().Int32Value());
   Bnezc(scratch.AsGpuRegister(), exception_blocks_.back().Entry());
 }
 
@@ -2409,7 +2465,7 @@
   LoadFromOffset(kLoadDoubleword,
                  T9,
                  S1,
-                 QUICK_ENTRYPOINT_OFFSET(kMipsDoublewordSize, pDeliverException).Int32Value());
+                 QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, pDeliverException).Int32Value());
   Jr(T9);
   Nop();
 
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 3262640..71f5e00 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -31,7 +31,8 @@
 namespace art {
 namespace mips64 {
 
-static constexpr size_t kMipsDoublewordSize = 8;
+static constexpr size_t kMips64WordSize = 4;
+static constexpr size_t kMips64DoublewordSize = 8;
 
 enum LoadOperandType {
   kLoadSignedByte,
@@ -151,7 +152,8 @@
   void Seh(GpuRegister rd, GpuRegister rt);
   void Dsbh(GpuRegister rd, GpuRegister rt);
   void Dshd(GpuRegister rd, GpuRegister rt);
-  void Dext(GpuRegister rs, GpuRegister rt, int pos, int size_less_one);  // MIPS64
+  void Dext(GpuRegister rs, GpuRegister rt, int pos, int size);  // MIPS64
+  void Dinsu(GpuRegister rt, GpuRegister rs, int pos, int size);  // MIPS64
   void Wsbh(GpuRegister rd, GpuRegister rt);
   void Sc(GpuRegister rt, GpuRegister base, int16_t imm9 = 0);
   void Scd(GpuRegister rt, GpuRegister base, int16_t imm9 = 0);
@@ -301,7 +303,9 @@
   void Cvtdl(FpuRegister fd, FpuRegister fs);
 
   void Mfc1(GpuRegister rt, FpuRegister fs);
+  void Mfhc1(GpuRegister rt, FpuRegister fs);
   void Mtc1(GpuRegister rt, FpuRegister fs);
+  void Mthc1(GpuRegister rt, FpuRegister fs);
   void Dmfc1(GpuRegister rt, FpuRegister fs);  // MIPS64
   void Dmtc1(GpuRegister rt, FpuRegister fs);  // MIPS64
   void Lwc1(FpuRegister ft, GpuRegister rs, uint16_t imm16);
@@ -378,10 +382,10 @@
 
   void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) OVERRIDE;
 
-  void StoreStackOffsetToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs,
+  void StoreStackOffsetToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs, FrameOffset fr_offs,
                                   ManagedRegister mscratch) OVERRIDE;
 
-  void StoreStackPointerToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs) OVERRIDE;
+  void StoreStackPointerToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs) OVERRIDE;
 
   void StoreSpanning(FrameOffset dest, ManagedRegister msrc, FrameOffset in_off,
                      ManagedRegister mscratch) OVERRIDE;
@@ -390,7 +394,7 @@
   void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE;
 
   void LoadFromThread64(ManagedRegister mdest,
-                        ThreadOffset<kMipsDoublewordSize> src,
+                        ThreadOffset<kMips64DoublewordSize> src,
                         size_t size) OVERRIDE;
 
   void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
@@ -401,15 +405,15 @@
   void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE;
 
   void LoadRawPtrFromThread64(ManagedRegister mdest,
-                              ThreadOffset<kMipsDoublewordSize> offs) OVERRIDE;
+                              ThreadOffset<kMips64DoublewordSize> offs) OVERRIDE;
 
   // Copying routines.
   void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE;
 
-  void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<kMipsDoublewordSize> thr_offs,
+  void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<kMips64DoublewordSize> thr_offs,
                               ManagedRegister mscratch) OVERRIDE;
 
-  void CopyRawPtrToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs,
+  void CopyRawPtrToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs, FrameOffset fr_offs,
                             ManagedRegister mscratch) OVERRIDE;
 
   void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) OVERRIDE;
@@ -466,7 +470,7 @@
   // Call to address held at [base+offset].
   void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE;
   void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE;
-  void CallFromThread64(ThreadOffset<kMipsDoublewordSize> offset,
+  void CallFromThread64(ThreadOffset<kMips64DoublewordSize> offset,
                         ManagedRegister mscratch) OVERRIDE;
 
   // Generate code to check if Thread::Current()->exception_ is non-null
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index 7d79be2..b758d64 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -543,6 +543,30 @@
   DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncLD, "trunc.l.d ${reg1}, ${reg2}"), "trunc.l.d");
 }
 
+TEST_F(AssemblerMIPS64Test, Mfc1) {
+  DriverStr(RepeatRF(&mips64::Mips64Assembler::Mfc1, "mfc1 ${reg1}, ${reg2}"), "Mfc1");
+}
+
+TEST_F(AssemblerMIPS64Test, Mfhc1) {
+  DriverStr(RepeatRF(&mips64::Mips64Assembler::Mfhc1, "mfhc1 ${reg1}, ${reg2}"), "Mfhc1");
+}
+
+TEST_F(AssemblerMIPS64Test, Mtc1) {
+  DriverStr(RepeatRF(&mips64::Mips64Assembler::Mtc1, "mtc1 ${reg1}, ${reg2}"), "Mtc1");
+}
+
+TEST_F(AssemblerMIPS64Test, Mthc1) {
+  DriverStr(RepeatRF(&mips64::Mips64Assembler::Mthc1, "mthc1 ${reg1}, ${reg2}"), "Mthc1");
+}
+
+TEST_F(AssemblerMIPS64Test, Dmfc1) {
+  DriverStr(RepeatRF(&mips64::Mips64Assembler::Dmfc1, "dmfc1 ${reg1}, ${reg2}"), "Dmfc1");
+}
+
+TEST_F(AssemblerMIPS64Test, Dmtc1) {
+  DriverStr(RepeatRF(&mips64::Mips64Assembler::Dmtc1, "dmtc1 ${reg1}, ${reg2}"), "Dmtc1");
+}
+
 ////////////////
 // CALL / JMP //
 ////////////////
@@ -827,6 +851,44 @@
   DriverStr(RepeatRR(&mips64::Mips64Assembler::Dshd, "dshd ${reg1}, ${reg2}"), "dshd");
 }
 
+TEST_F(AssemblerMIPS64Test, Dext) {
+  std::vector<mips64::GpuRegister*> reg1_registers = GetRegisters();
+  std::vector<mips64::GpuRegister*> reg2_registers = GetRegisters();
+  WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * 33 * 16);
+  std::ostringstream expected;
+  for (mips64::GpuRegister* reg1 : reg1_registers) {
+    for (mips64::GpuRegister* reg2 : reg2_registers) {
+      for (int32_t pos = 0; pos < 32; pos++) {
+        for (int32_t size = 1; size <= 32; size++) {
+          __ Dext(*reg1, *reg2, pos, size);
+          expected << "dext $" << *reg1 << ", $" << *reg2 << ", " << pos << ", " << size << "\n";
+        }
+      }
+    }
+  }
+
+  DriverStr(expected.str(), "Dext");
+}
+
+TEST_F(AssemblerMIPS64Test, Dinsu) {
+  std::vector<mips64::GpuRegister*> reg1_registers = GetRegisters();
+  std::vector<mips64::GpuRegister*> reg2_registers = GetRegisters();
+  WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * 33 * 16);
+  std::ostringstream expected;
+  for (mips64::GpuRegister* reg1 : reg1_registers) {
+    for (mips64::GpuRegister* reg2 : reg2_registers) {
+      for (int32_t pos = 32; pos < 64; pos++) {
+        for (int32_t size = 1; pos + size <= 64; size++) {
+          __ Dinsu(*reg1, *reg2, pos, size);
+          expected << "dinsu $" << *reg1 << ", $" << *reg2 << ", " << pos << ", " << size << "\n";
+        }
+      }
+    }
+  }
+
+  DriverStr(expected.str(), "Dinsu");
+}
+
 TEST_F(AssemblerMIPS64Test, Wsbh) {
   DriverStr(RepeatRR(&mips64::Mips64Assembler::Wsbh, "wsbh ${reg1}, ${reg2}"), "wsbh");
 }
@@ -942,4 +1004,638 @@
   DriverStr(RepeatRR(&mips64::Mips64Assembler::Dclo, "dclo ${reg1}, ${reg2}"), "dclo");
 }
 
+TEST_F(AssemblerMIPS64Test, LoadFromOffset) {
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A0, 0);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 1);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 256);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 1000);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0x7FFF);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0x8000);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0x8001);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0x10000);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0x12345678);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, -256);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, -32768);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A0, 0);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 1);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 256);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 1000);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0x7FFF);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0x8000);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0x8001);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0x10000);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0x12345678);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, -256);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, -32768);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A0, 0);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 2);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 256);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 1000);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0x7FFE);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0x8000);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0x8002);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0x10000);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0x12345678);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, -256);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, -32768);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A0, 0);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 2);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 256);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 1000);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0x7FFE);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0x8000);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0x8002);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0x10000);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0x12345678);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, -256);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, -32768);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A0, 0);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 4);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 256);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 1000);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0x7FFC);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0x8000);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0x8004);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0x10000);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0x12345678);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, -256);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, -32768);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A0, 0);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 4);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 256);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 1000);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0x7FFC);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0x8000);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0x8004);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0x10000);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0x12345678);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, -256);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, -32768);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A0, 0);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 4);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 256);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 1000);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x7FFC);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x8000);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x8004);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x10000);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x12345678);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, -256);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, -32768);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  const char* expected =
+      "lb $a0, 0($a0)\n"
+      "lb $a0, 0($a1)\n"
+      "lb $a0, 1($a1)\n"
+      "lb $a0, 256($a1)\n"
+      "lb $a0, 1000($a1)\n"
+      "lb $a0, 0x7FFF($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lb $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lb $a0, 1($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "lb $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "lb $a0, 0($at)\n"
+      "lb $a0, -256($a1)\n"
+      "lb $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "lb $a0, 0($at)\n"
+
+      "lbu $a0, 0($a0)\n"
+      "lbu $a0, 0($a1)\n"
+      "lbu $a0, 1($a1)\n"
+      "lbu $a0, 256($a1)\n"
+      "lbu $a0, 1000($a1)\n"
+      "lbu $a0, 0x7FFF($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lbu $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lbu $a0, 1($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "lbu $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "lbu $a0, 0($at)\n"
+      "lbu $a0, -256($a1)\n"
+      "lbu $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "lbu $a0, 0($at)\n"
+
+      "lh $a0, 0($a0)\n"
+      "lh $a0, 0($a1)\n"
+      "lh $a0, 2($a1)\n"
+      "lh $a0, 256($a1)\n"
+      "lh $a0, 1000($a1)\n"
+      "lh $a0, 0x7FFE($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lh $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lh $a0, 2($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "lh $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "lh $a0, 0($at)\n"
+      "lh $a0, -256($a1)\n"
+      "lh $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "lh $a0, 0($at)\n"
+
+      "lhu $a0, 0($a0)\n"
+      "lhu $a0, 0($a1)\n"
+      "lhu $a0, 2($a1)\n"
+      "lhu $a0, 256($a1)\n"
+      "lhu $a0, 1000($a1)\n"
+      "lhu $a0, 0x7FFE($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lhu $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lhu $a0, 2($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "lhu $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "lhu $a0, 0($at)\n"
+      "lhu $a0, -256($a1)\n"
+      "lhu $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "lhu $a0, 0($at)\n"
+
+      "lw $a0, 0($a0)\n"
+      "lw $a0, 0($a1)\n"
+      "lw $a0, 4($a1)\n"
+      "lw $a0, 256($a1)\n"
+      "lw $a0, 1000($a1)\n"
+      "lw $a0, 0x7FFC($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lw $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lw $a0, 4($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "lw $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "lw $a0, 0($at)\n"
+      "lw $a0, -256($a1)\n"
+      "lw $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "lw $a0, 0($at)\n"
+
+      "lwu $a0, 0($a0)\n"
+      "lwu $a0, 0($a1)\n"
+      "lwu $a0, 4($a1)\n"
+      "lwu $a0, 256($a1)\n"
+      "lwu $a0, 1000($a1)\n"
+      "lwu $a0, 0x7FFC($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lwu $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lwu $a0, 4($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "lwu $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "lwu $a0, 0($at)\n"
+      "lwu $a0, -256($a1)\n"
+      "lwu $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "lwu $a0, 0($at)\n"
+
+      "ld $a0, 0($a0)\n"
+      "ld $a0, 0($a1)\n"
+      "lwu $a0, 4($a1)\n"
+      "lwu $t3, 8($a1)\n"
+      "dins $a0, $t3, 32, 32\n"
+      "ld $a0, 256($a1)\n"
+      "ld $a0, 1000($a1)\n"
+      "ori $at, $zero, 0x7FF8\n"
+      "daddu $at, $at, $a1\n"
+      "lwu $a0, 4($at)\n"
+      "lwu $t3, 8($at)\n"
+      "dins $a0, $t3, 32, 32\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "ld $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lwu $a0, 4($at)\n"
+      "lwu $t3, 8($at)\n"
+      "dins $a0, $t3, 32, 32\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "ld $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "ld $a0, 0($at)\n"
+      "ld $a0, -256($a1)\n"
+      "ld $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "ld $a0, 0($at)\n";
+  DriverStr(expected, "LoadFromOffset");
+}
+
+TEST_F(AssemblerMIPS64Test, LoadFpuFromOffset) {
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 0);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 4);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 256);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 0x7FFC);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 0x8000);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 0x8004);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 0x10000);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 0x12345678);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, -256);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, -32768);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 0xABCDEF00);
+
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 0);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 4);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 256);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 0x7FFC);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 0x8000);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 0x8004);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 0x10000);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 0x12345678);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, -256);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, -32768);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 0xABCDEF00);
+
+  const char* expected =
+      "lwc1 $f0, 0($a0)\n"
+      "lwc1 $f0, 4($a0)\n"
+      "lwc1 $f0, 256($a0)\n"
+      "lwc1 $f0, 0x7FFC($a0)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a0\n"
+      "lwc1 $f0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a0\n"
+      "lwc1 $f0, 4($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a0\n"
+      "lwc1 $f0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a0\n"
+      "lwc1 $f0, 0($at)\n"
+      "lwc1 $f0, -256($a0)\n"
+      "lwc1 $f0, -32768($a0)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a0\n"
+      "lwc1 $f0, 0($at)\n"
+
+      "ldc1 $f0, 0($a0)\n"
+      "lwc1 $f0, 4($a0)\n"
+      "lw $t3, 8($a0)\n"
+      "mthc1 $t3, $f0\n"
+      "ldc1 $f0, 256($a0)\n"
+      "ori $at, $zero, 0x7FF8\n"
+      "daddu $at, $at, $a0\n"
+      "lwc1 $f0, 4($at)\n"
+      "lw $t3, 8($at)\n"
+      "mthc1 $t3, $f0\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a0\n"
+      "ldc1 $f0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a0\n"
+      "lwc1 $f0, 4($at)\n"
+      "lw $t3, 8($at)\n"
+      "mthc1 $t3, $f0\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a0\n"
+      "ldc1 $f0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a0\n"
+      "ldc1 $f0, 0($at)\n"
+      "ldc1 $f0, -256($a0)\n"
+      "ldc1 $f0, -32768($a0)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a0\n"
+      "ldc1 $f0, 0($at)\n";
+  DriverStr(expected, "LoadFpuFromOffset");
+}
+
+TEST_F(AssemblerMIPS64Test, StoreToOffset) {
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A0, 0);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 0);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 1);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 256);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 1000);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 0x7FFF);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 0x8000);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 0x8001);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 0x10000);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 0x12345678);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, -256);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, -32768);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A0, 0);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 0);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 2);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 256);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 1000);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 0x7FFE);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 0x8000);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 0x8002);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 0x10000);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 0x12345678);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, -256);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, -32768);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A0, 0);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 0);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 4);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 256);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 1000);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 0x7FFC);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 0x8000);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 0x8004);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 0x10000);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 0x12345678);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, -256);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, -32768);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A0, 0);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 4);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 256);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 1000);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0x7FFC);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0x8000);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0x8004);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0x10000);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0x12345678);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, -256);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, -32768);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  const char* expected =
+      "sb $a0, 0($a0)\n"
+      "sb $a0, 0($a1)\n"
+      "sb $a0, 1($a1)\n"
+      "sb $a0, 256($a1)\n"
+      "sb $a0, 1000($a1)\n"
+      "sb $a0, 0x7FFF($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "sb $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "sb $a0, 1($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "sb $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "sb $a0, 0($at)\n"
+      "sb $a0, -256($a1)\n"
+      "sb $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "sb $a0, 0($at)\n"
+
+      "sh $a0, 0($a0)\n"
+      "sh $a0, 0($a1)\n"
+      "sh $a0, 2($a1)\n"
+      "sh $a0, 256($a1)\n"
+      "sh $a0, 1000($a1)\n"
+      "sh $a0, 0x7FFE($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "sh $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "sh $a0, 2($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "sh $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "sh $a0, 0($at)\n"
+      "sh $a0, -256($a1)\n"
+      "sh $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "sh $a0, 0($at)\n"
+
+      "sw $a0, 0($a0)\n"
+      "sw $a0, 0($a1)\n"
+      "sw $a0, 4($a1)\n"
+      "sw $a0, 256($a1)\n"
+      "sw $a0, 1000($a1)\n"
+      "sw $a0, 0x7FFC($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "sw $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "sw $a0, 4($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "sw $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "sw $a0, 0($at)\n"
+      "sw $a0, -256($a1)\n"
+      "sw $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "sw $a0, 0($at)\n"
+
+      "sd $a0, 0($a0)\n"
+      "sd $a0, 0($a1)\n"
+      "sw $a0, 4($a1)\n"
+      "dsrl32 $t3, $a0, 0\n"
+      "sw $t3, 8($a1)\n"
+      "sd $a0, 256($a1)\n"
+      "sd $a0, 1000($a1)\n"
+      "ori $at, $zero, 0x7FF8\n"
+      "daddu $at, $at, $a1\n"
+      "sw $a0, 4($at)\n"
+      "dsrl32 $t3, $a0, 0\n"
+      "sw $t3, 8($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "sd $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "sw $a0, 4($at)\n"
+      "dsrl32 $t3, $a0, 0\n"
+      "sw $t3, 8($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "sd $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "sd $a0, 0($at)\n"
+      "sd $a0, -256($a1)\n"
+      "sd $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "sd $a0, 0($at)\n";
+  DriverStr(expected, "StoreToOffset");
+}
+
+TEST_F(AssemblerMIPS64Test, StoreFpuToOffset) {
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 0);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 4);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 256);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 0x7FFC);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 0x8000);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 0x8004);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 0x10000);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 0x12345678);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, -256);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, -32768);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 0xABCDEF00);
+
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 0);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 4);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 256);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 0x7FFC);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 0x8000);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 0x8004);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 0x10000);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 0x12345678);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, -256);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, -32768);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 0xABCDEF00);
+
+  const char* expected =
+      "swc1 $f0, 0($a0)\n"
+      "swc1 $f0, 4($a0)\n"
+      "swc1 $f0, 256($a0)\n"
+      "swc1 $f0, 0x7FFC($a0)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a0\n"
+      "swc1 $f0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a0\n"
+      "swc1 $f0, 4($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a0\n"
+      "swc1 $f0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a0\n"
+      "swc1 $f0, 0($at)\n"
+      "swc1 $f0, -256($a0)\n"
+      "swc1 $f0, -32768($a0)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a0\n"
+      "swc1 $f0, 0($at)\n"
+
+      "sdc1 $f0, 0($a0)\n"
+      "mfhc1 $t3, $f0\n"
+      "swc1 $f0, 4($a0)\n"
+      "sw $t3, 8($a0)\n"
+      "sdc1 $f0, 256($a0)\n"
+      "ori $at, $zero, 0x7FF8\n"
+      "daddu $at, $at, $a0\n"
+      "mfhc1 $t3, $f0\n"
+      "swc1 $f0, 4($at)\n"
+      "sw $t3, 8($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a0\n"
+      "sdc1 $f0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a0\n"
+      "mfhc1 $t3, $f0\n"
+      "swc1 $f0, 4($at)\n"
+      "sw $t3, 8($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a0\n"
+      "sdc1 $f0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a0\n"
+      "sdc1 $f0, 0($at)\n"
+      "sdc1 $f0, -256($a0)\n"
+      "sdc1 $f0, -32768($a0)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a0\n"
+      "sdc1 $f0, 0($at)\n";
+  DriverStr(expected, "StoreFpuToOffset");
+}
+
+#undef __
+
 }  // namespace art
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 8e80961..b1f71ce 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -126,6 +126,11 @@
       continue;
     }
 
+    // The image format is dropped.
+    if (StartsWith(original_argv[i], "--image-format=")) {
+      continue;
+    }
+
     // This should leave any dex-file and oat-file options, describing what we compiled.
 
     // However, we prefer to drop this when we saw --zip-fd.
@@ -928,16 +933,25 @@
     // Fill some values into the key-value store for the oat header.
     key_value_store_.reset(new SafeMap<std::string, std::string>());
 
-    // Automatically force determinism for the boot image in a host
-    // build, except when read barriers are enabled, as the former
-    // switches the GC to a non-concurrent one by passing the
-    // option `-Xgc:nonconcurrent` (see below).
-    if (!kIsTargetBuild && IsBootImage() && !kEmitCompilerReadBarrier) {
-      force_determinism_ = true;
+    // Automatically force determinism for the boot image in a host build if the default GC is CMS
+    // or MS and read barriers are not enabled, as the former switches the GC to a non-concurrent
+    // one by passing the option `-Xgc:nonconcurrent` (see below).
+    if (!kIsTargetBuild && IsBootImage()) {
+      if (SupportsDeterministicCompilation()) {
+        force_determinism_ = true;
+      } else {
+        LOG(WARNING) << "Deterministic compilation is disabled.";
+      }
     }
     compiler_options_->force_determinism_ = force_determinism_;
   }
 
+  static bool SupportsDeterministicCompilation() {
+    return (gc::kCollectorTypeDefault == gc::kCollectorTypeCMS ||
+            gc::kCollectorTypeDefault == gc::kCollectorTypeMS) &&
+        !kEmitCompilerReadBarrier;
+  }
+
   void ExpandOatAndImageFilenames() {
     std::string base_oat = oat_filenames_[0];
     size_t last_oat_slash = base_oat.rfind('/');
@@ -1183,8 +1197,8 @@
       } else if (option.starts_with("--no-inline-from=")) {
         no_inline_from_string_ = option.substr(strlen("--no-inline-from=")).data();
       } else if (option == "--force-determinism") {
-        if (kEmitCompilerReadBarrier) {
-          Usage("Cannot use --force-determinism with read barriers");
+        if (!SupportsDeterministicCompilation()) {
+          Usage("Cannot use --force-determinism with read barriers or non-CMS garbage collector");
         }
         force_determinism_ = true;
       } else if (!compiler_options_->ParseCompilerOption(option, Usage)) {
@@ -1673,6 +1687,12 @@
         std::vector<const DexFile*>& dex_files = dex_files_per_oat_file_[i];
         oat_writer->PrepareLayout(driver_.get(), image_writer_.get(), dex_files);
 
+        // We need to mirror the layout of the ELF file in the compressed debug-info.
+        // Therefore we need to propagate the sizes of at least those sections.
+        size_t rodata_size = oat_writer->GetOatHeader().GetExecutableOffset();
+        size_t text_size = oat_writer->GetSize() - rodata_size;
+        elf_writer->PrepareDebugInfo(rodata_size, text_size, oat_writer->GetMethodDebugInfo());
+
         OutputStream*& rodata = rodata_[i];
         DCHECK(rodata != nullptr);
         if (!oat_writer->WriteRodata(rodata)) {
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 7bf6d21..288f95e 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -263,7 +263,8 @@
   arch/arm/fault_handler_arm.cc
 
 LIBART_TARGET_SRC_FILES_arm64 := \
-  interpreter/mterp/mterp_stub.cc \
+  interpreter/mterp/mterp.cc \
+  interpreter/mterp/out/mterp_arm64.S \
   arch/arm64/context_arm64.cc \
   arch/arm64/entrypoints_init_arm64.cc \
   arch/arm64/jni_entrypoints_arm64.S \
@@ -508,6 +509,7 @@
   ifeq ($$(art_target_or_host),target)
     $$(eval $$(call set-target-local-clang-vars))
     $$(eval $$(call set-target-local-cflags-vars,$(2)))
+    LOCAL_CLANG_arm64 := true
     LOCAL_CFLAGS_$(DEX2OAT_TARGET_ARCH) += -DART_DEFAULT_INSTRUCTION_SET_FEATURES="$(LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES)"
     LOCAL_CFLAGS_$(2ND_DEX2OAT_TARGET_ARCH) += -DART_DEFAULT_INSTRUCTION_SET_FEATURES="$(2ND_LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES)"
   else # host
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 631b784..b3a2979 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -429,6 +429,56 @@
 END art_quick_invoke_stub_internal
 
     /*
+     * On stack replacement stub.
+     * On entry:
+     *   r0 = stack to copy
+     *   r1 = size of stack
+     *   r2 = pc to call
+     *   r3 = JValue* result
+     *   [sp] = shorty
+     *   [sp + 4] = thread
+     */
+ENTRY art_quick_osr_stub
+    SPILL_ALL_CALLEE_SAVE_GPRS             @ Spill regs (9)
+    mov    r11, sp                         @ Save the stack pointer
+    mov    r10, r1                         @ Save size of stack
+    ldr    r9, [r11, #40]                  @ Move managed thread pointer into r9
+    mov    r8, r2                          @ Save the pc to call
+    sub    r7, sp, #12                     @ Reserve space for stack pointer, JValue result, and ArtMethod* slot
+    and    r7, #0xFFFFFFF0                 @ Align stack pointer
+    mov    sp, r7                          @ Update stack pointer
+    str    r11, [sp, #4]                   @ Save old stack pointer
+    str    r3, [sp, #8]                    @ Save JValue result
+    mov    ip, #0
+    str    ip, [sp]                        @ Store null for ArtMethod* at bottom of frame
+    sub    sp, sp, r1                      @ Reserve space for callee stack
+    mov    r2, r1
+    mov    r1, r0
+    mov    r0, sp
+    bl     memcpy                          @ memcpy (dest r0, src r1, bytes r2)
+    bl     .Losr_entry                     @ Call the method
+    ldr    r11, [sp, #4]                   @ Restore saved stack pointer
+    ldr    r10, [sp, #8]                   @ Restire JValue result
+    mov    sp, r11                         @ Restore stack pointer.
+    ldr    r4, [sp, #36]                   @ load shorty
+    ldr    r4, [r4, #0]                    @ load return type
+    cmp    r4, #68                         @ Test if result type char == 'D'.
+    beq    .Losr_fp_result
+    cmp    r4, #70                         @ Test if result type char == 'F'.
+    beq    .Losr_fp_result
+    strd r0, [r10]                         @ Store r0/r1 into result pointer
+    b    .Losr_exit
+.Losr_fp_result:
+    vstr d0, [r10]                         @ Store s0-s1/d0 into result pointer
+.Losr_exit:
+    pop    {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+.Losr_entry:
+    sub r10, r10, #4
+    str lr, [sp, r10]                     @ Store link register per the compiler ABI
+    bx r8
+END art_quick_osr_stub
+
+    /*
      * On entry r0 is uint32_t* gprs_ and r1 is uint32_t* fprs_
      */
 ARM_ENTRY art_quick_do_long_jump
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 5c8ff8f..4db9411 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -113,6 +113,25 @@
   qpoints->pShrLong = nullptr;
   qpoints->pUshrLong = nullptr;
 
+  // More math.
+  qpoints->pCos = cos;
+  qpoints->pSin = sin;
+  qpoints->pAcos = acos;
+  qpoints->pAsin = asin;
+  qpoints->pAtan = atan;
+  qpoints->pAtan2 = atan2;
+  qpoints->pCbrt = cbrt;
+  qpoints->pCosh = cosh;
+  qpoints->pExp = exp;
+  qpoints->pExpm1 = expm1;
+  qpoints->pHypot = hypot;
+  qpoints->pLog = log;
+  qpoints->pLog10 = log10;
+  qpoints->pNextAfter = nextafter;
+  qpoints->pSinh = sinh;
+  qpoints->pTan = tan;
+  qpoints->pTanh = tanh;
+
   // Intrinsics
   qpoints->pIndexOf = art_quick_indexof;
   qpoints->pStringCompareTo = art_quick_string_compareto;
diff --git a/runtime/arch/arm64/instruction_set_features_arm64.h b/runtime/arch/arm64/instruction_set_features_arm64.h
index 805131f..abd7e83 100644
--- a/runtime/arch/arm64/instruction_set_features_arm64.h
+++ b/runtime/arch/arm64/instruction_set_features_arm64.h
@@ -66,14 +66,6 @@
       return fix_cortex_a53_843419_;
   }
 
-  // NOTE: This flag can be tunned on a CPU basis. In general all ARMv8 CPUs
-  // should prefer the Acquire-Release semantics over the explicit DMBs when
-  // handling load/store-volatile. For a specific use case see the ARM64
-  // Optimizing backend.
-  bool PreferAcquireRelease() const {
-    return true;
-  }
-
   virtual ~Arm64InstructionSetFeatures() {}
 
  protected:
diff --git a/runtime/arch/arm64/instruction_set_features_arm64_test.cc b/runtime/arch/arm64/instruction_set_features_arm64_test.cc
index 599f24e..027e59c 100644
--- a/runtime/arch/arm64/instruction_set_features_arm64_test.cc
+++ b/runtime/arch/arm64/instruction_set_features_arm64_test.cc
@@ -30,8 +30,6 @@
   EXPECT_TRUE(arm64_features->Equals(arm64_features.get()));
   EXPECT_STREQ("smp,a53", arm64_features->GetFeatureString().c_str());
   EXPECT_EQ(arm64_features->AsBitmap(), 3U);
-  // See the comments in instruction_set_features_arm64.h.
-  EXPECT_TRUE(arm64_features->AsArm64InstructionSetFeatures()->PreferAcquireRelease());
 }
 
 }  // namespace art
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 9ccabad..e848008 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -915,6 +915,105 @@
 
 
 
+/*  extern"C" void art_quick_osr_stub(void** stack,                x0
+ *                                    size_t stack_size_in_bytes,  x1
+ *                                    const uin8_t* native_pc,     x2
+ *                                    JValue *result,              x3
+ *                                    char   *shorty,              x4
+ *                                    Thread *self)                x5
+ */
+ENTRY art_quick_osr_stub
+SAVE_SIZE=15*8   // x3, x4, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, SP, LR, FP saved.
+    mov x9, sp                             // Save stack pointer.
+    .cfi_register sp,x9
+
+    sub x10, sp, # SAVE_SIZE
+    and x10, x10, # ~0xf                   // Enforce 16 byte stack alignment.
+    mov sp, x10                            // Set new SP.
+
+    str x28, [sp, #112]
+    stp x26, x27, [sp, #96]
+    stp x24, x25, [sp, #80]
+    stp x22, x23, [sp, #64]
+    stp x20, x21, [sp, #48]
+    stp x9, x19, [sp, #32]                // Save old stack pointer and x19.
+    stp x3, x4, [sp, #16]                 // Save result and shorty addresses.
+    stp xFP, xLR, [sp]                    // Store LR & FP.
+    mov xSELF, x5                         // Move thread pointer into SELF register.
+
+    sub sp, sp, #16
+    str xzr, [sp]                         // Store null for ArtMethod* slot
+    // Branch to stub.
+    bl .Losr_entry
+    add sp, sp, #16
+
+    // Restore return value address and shorty address.
+    ldp x3,x4, [sp, #16]
+    ldr x28, [sp, #112]
+    ldp x26, x27, [sp, #96]
+    ldp x24, x25, [sp, #80]
+    ldp x22, x23, [sp, #64]
+    ldp x20, x21, [sp, #48]
+
+    // Store result (w0/x0/s0/d0) appropriately, depending on resultType.
+    ldrb w10, [x4]
+
+    // Check the return type and store the correct register into the jvalue in memory.
+
+    // Don't set anything for a void type.
+    cmp w10, #'V'
+    beq .Losr_exit
+
+    // Is it a double?
+    cmp w10, #'D'
+    bne .Lno_double
+    str d0, [x3]
+    b .Losr_exit
+
+.Lno_double:  // Is it a float?
+    cmp w10, #'F'
+    bne .Lno_float
+    str s0, [x3]
+    b .Losr_exit
+
+.Lno_float:  // Just store x0. Doesn't matter if it is 64 or 32 bits.
+    str x0, [x3]
+
+.Losr_exit:  // Finish up.
+    ldp x2, x19, [sp, #32]   // Restore stack pointer and x19.
+    ldp xFP, xLR, [sp]    // Restore old frame pointer and link register.
+    mov sp, x2
+    ret
+
+.Losr_entry:
+    // Update stack pointer for the callee
+    sub sp, sp, x1
+
+    // Update link register slot expected by the callee.
+    sub w1, w1, #8
+    str lr, [sp, x1]
+
+    // Copy arguments into stack frame.
+    // Use simple copy routine for now.
+    // 4 bytes per slot.
+    // X0 - source address
+    // W1 - args length
+    // SP - destination address.
+    // W10 - temporary
+.Losr_loop_entry:
+    cmp w1, #0
+    beq .Losr_loop_exit
+    sub w1, w1, #4
+    ldr w10, [x0, x1]
+    str w10, [sp, x1]
+    b .Losr_loop_entry
+
+.Losr_loop_exit:
+    // Branch to the OSR entry point.
+    br x2
+
+END art_quick_osr_stub
+
     /*
      * On entry x0 is uintptr_t* gprs_ and x1 is uint64_t* fprs_
      */
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 66c8aad..d264c9b 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -1366,7 +1366,106 @@
 .endm
 
 // Generate the allocation entrypoints for each allocator.
-GENERATE_ALL_ALLOC_ENTRYPOINTS
+GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
+ENTRY art_quick_alloc_object_rosalloc
+
+    # Fast path rosalloc allocation
+    # a0: type_idx
+    # a1: ArtMethod*
+    # s1: Thread::Current
+    # -----------------------------
+    # t0: class
+    # t1: object size
+    # t2: rosalloc run
+    # t3: thread stack top offset
+    # a4: thread stack bottom offset
+    # v0: free list head
+    #
+    # a5, a6 : temps
+
+    ld     $t0, ART_METHOD_DEX_CACHE_TYPES_OFFSET_64($a1)   # Load dex cache resolved types array.
+
+    dsll   $a5, $a0, COMPRESSED_REFERENCE_SIZE_SHIFT        # Shift the value.
+    daddu  $a5, $t0, $a5                                    # Compute the index.
+    lwu    $t0, 0($a5)                                      # Load class (t0).
+    beqzc  $t0, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    li     $a6, MIRROR_CLASS_STATUS_INITIALIZED
+    lwu    $a5, MIRROR_CLASS_STATUS_OFFSET($t0)             # Check class status.
+    bnec   $a5, $a6, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    # Add a fake dependence from the following access flag and size loads to the status load. This
+    # is to prevent those loads from being reordered above the status load and reading wrong values.
+    xor    $a5, $a5, $a5
+    daddu  $t0, $t0, $a5
+
+    lwu    $a5, MIRROR_CLASS_ACCESS_FLAGS_OFFSET($t0)       # Check if access flags has
+    li     $a6, ACCESS_FLAGS_CLASS_IS_FINALIZABLE           # kAccClassIsFinalizable.
+    and    $a6, $a5, $a6
+    bnezc  $a6, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    ld     $t3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET($s1)    # Check if thread local allocation stack
+    ld     $a4, THREAD_LOCAL_ALLOC_STACK_END_OFFSET($s1)    # has any room left.
+    bgeuc  $t3, $a4, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    lwu    $t1, MIRROR_CLASS_OBJECT_SIZE_OFFSET($t0)        # Load object size (t1).
+    li     $a5, ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE      # Check if size is for a thread local
+                                                            # allocation.
+    bltuc  $a5, $t1, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    # Compute the rosalloc bracket index from the size. Allign up the size by the rosalloc bracket
+    # quantum size and divide by the quantum size and subtract by 1.
+    daddiu $t1, $t1, -1                                     # Decrease obj size and shift right by
+    dsrl   $t1, $t1, ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT    # quantum.
+
+    dsll   $t2, $t1, POINTER_SIZE_SHIFT
+    daddu  $t2, $t2, $s1
+    ld     $t2, THREAD_ROSALLOC_RUNS_OFFSET($t2)            # Load rosalloc run (t2).
+
+    # Load the free list head (v0).
+    # NOTE: this will be the return val.
+    ld     $v0, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)($t2)
+    beqzc  $v0, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    # Load the next pointer of the head and update the list head with the next pointer.
+    ld     $a5, ROSALLOC_SLOT_NEXT_OFFSET($v0)
+    sd     $a5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)($t2)
+
+    # Store the class pointer in the header. This also overwrites the first pointer. The offsets are
+    # asserted to match.
+
+#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
+#error "Class pointer needs to overwrite next pointer."
+#endif
+
+    POISON_HEAP_REF $t0
+    sw     $t0, MIRROR_OBJECT_CLASS_OFFSET($v0)
+
+    # Push the new object onto the thread local allocation stack and increment the thread local
+    # allocation stack top.
+    sd     $v0, 0($t3)
+    daddiu $t3, $t3, COMPRESSED_REFERENCE_SIZE
+    sd     $t3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET($s1)
+
+    # Decrement the size of the free list.
+    lw     $a5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)($t2)
+    addiu  $a5, $a5, -1
+    sw     $a5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)($t2)
+
+    sync                                         # Fence.
+
+    jalr   $zero, $ra
+    .cpreturn                                    # Restore gp from t8 in branch delay slot.
+
+.Lart_quick_alloc_object_rosalloc_slow_path:
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    jal    artAllocObjectFromCodeRosAlloc
+    move   $a2 ,$s1                              # Pass self as argument.
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+
+END art_quick_alloc_object_rosalloc
 
     /*
      * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
diff --git a/runtime/arch/mips64/registers_mips64.h b/runtime/arch/mips64/registers_mips64.h
index 1d07d47..b027c95 100644
--- a/runtime/arch/mips64/registers_mips64.h
+++ b/runtime/arch/mips64/registers_mips64.h
@@ -61,6 +61,7 @@
   RA   = 31,  // Return address.
   TR   = S1,  // ART Thread Register
   TMP  = T8,  // scratch register (in addition to AT)
+  TMP2 = T3,  // scratch register (in addition to AT, reserved for assembler)
   kNumberOfGpuRegisters = 32,
   kNoGpuRegister = -1  // Signals an illegal register.
 };
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index da30331..fbee5d7 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1712,5 +1712,65 @@
     ret
 END_FUNCTION art_quick_read_barrier_for_root_slow
 
+  /*
+     * On stack replacement stub.
+     * On entry:
+     *   [sp] = return address
+     *   [sp + 4] = stack to copy
+     *   [sp + 8] = size of stack
+     *   [sp + 12] = pc to call
+     *   [sp + 16] = JValue* result
+     *   [sp + 20] = shorty
+     *   [sp + 24] = thread
+     */
+DEFINE_FUNCTION art_quick_osr_stub
+    // Save native callee saves.
+    PUSH ebp
+    PUSH ebx
+    PUSH esi
+    PUSH edi
+    mov 4+16(%esp), %esi           // ESI = argument array
+    mov 8+16(%esp), %ecx           // ECX = size of args
+    mov 12+16(%esp), %ebx          // EBX = pc to call
+    mov %esp, %ebp                 // Save stack pointer
+    andl LITERAL(0xFFFFFFF0), %esp // Align stack
+    PUSH ebp                       // Save old stack pointer
+    subl LITERAL(12), %esp         // Align stack
+    movl LITERAL(0), (%esp)        // Store null for ArtMethod* slot
+    call .Losr_entry
+
+    // Restore stack pointer.
+    addl LITERAL(12), %esp
+    POP ebp
+    mov %ebp, %esp
+
+    // Restore callee saves.
+    POP edi
+    POP esi
+    POP ebx
+    POP ebp
+    mov 16(%esp), %ecx            // Get JValue result
+    mov %eax, (%ecx)              // Store the result assuming it is a long, int or Object*
+    mov %edx, 4(%ecx)             // Store the other half of the result
+    mov 20(%esp), %edx            // Get the shorty
+    cmpb LITERAL(68), (%edx)      // Test if result type char == 'D'
+    je .Losr_return_double_quick
+    cmpb LITERAL(70), (%edx)      // Test if result type char == 'F'
+    je .Losr_return_float_quick
+    ret
+.Losr_return_double_quick:
+    movsd %xmm0, (%ecx)           // Store the floating point result
+    ret
+.Losr_return_float_quick:
+    movss %xmm0, (%ecx)           // Store the floating point result
+    ret
+.Losr_entry:
+    subl LITERAL(4), %ecx         // Given stack size contains pushed frame pointer, substract it.
+    subl %ecx, %esp
+    mov %esp, %edi                // EDI = beginning of stack
+    rep movsb                     // while (ecx--) { *edi++ = *esi++ }
+    jmp *%ebx
+END_FUNCTION art_quick_osr_stub
+
     // TODO: implement these!
 UNIMPLEMENTED art_quick_memcmp16
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 883da96..d6e0f1c 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1744,3 +1744,62 @@
     RESTORE_FP_CALLEE_SAVE_FRAME
     ret
 END_FUNCTION art_quick_read_barrier_for_root_slow
+
+    /*
+     * On stack replacement stub.
+     * On entry:
+     *   [sp] = return address
+     *   rdi = stack to copy
+     *   rsi = size of stack
+     *   rdx = pc to call
+     *   rcx = JValue* result
+     *   r8 = shorty
+     *   r9 = thread
+     */
+DEFINE_FUNCTION art_quick_osr_stub
+    // Save the non-volatiles.
+    PUSH rbp                      // Save rbp.
+    PUSH rcx                      // Save rcx/result*.
+    PUSH r8                       // Save r8/shorty*.
+
+    // Save callee saves.
+    PUSH rbx
+    PUSH r12
+    PUSH r13
+    PUSH r14
+    PUSH r15
+
+    pushq LITERAL(0)              // Push null for ArtMethod*.
+    movl %esi, %ecx               // rcx := size of stack
+    movq %rdi, %rsi               // rsi := stack to copy
+    call .Losr_entry
+
+    // Restore stack and callee-saves.
+    addq LITERAL(8), %rsp
+    POP r15
+    POP r14
+    POP r13
+    POP r12
+    POP rbx
+    POP r8
+    POP rcx
+    POP rbp
+    cmpb LITERAL(68), (%r8)        // Test if result type char == 'D'.
+    je .Losr_return_double_quick
+    cmpb LITERAL(70), (%r8)        // Test if result type char == 'F'.
+    je .Losr_return_float_quick
+    movq %rax, (%rcx)              // Store the result assuming its a long, int or Object*
+    ret
+.Losr_return_double_quick:
+    movsd %xmm0, (%rcx)            // Store the double floating point result.
+    ret
+.Losr_return_float_quick:
+    movss %xmm0, (%rcx)            // Store the floating point result.
+    ret
+.Losr_entry:
+    subl LITERAL(8), %ecx         // Given stack size contains pushed frame pointer, substract it.
+    subq %rcx, %rsp
+    movq %rsp, %rdi               // rdi := beginning of stack
+    rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
+    jmp *%rdx
+END_FUNCTION art_quick_osr_stub
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index 74eb722..28540c8 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -41,17 +41,15 @@
 
 namespace art {
 
+template <ReadBarrierOption kReadBarrierOption>
 inline mirror::Class* ArtMethod::GetDeclaringClassUnchecked() {
   GcRootSource gc_root_source(this);
-  return declaring_class_.Read(&gc_root_source);
+  return declaring_class_.Read<kReadBarrierOption>(&gc_root_source);
 }
 
-inline mirror::Class* ArtMethod::GetDeclaringClassNoBarrier() {
-  return declaring_class_.Read<kWithoutReadBarrier>();
-}
-
+template <ReadBarrierOption kReadBarrierOption>
 inline mirror::Class* ArtMethod::GetDeclaringClass() {
-  mirror::Class* result = GetDeclaringClassUnchecked();
+  mirror::Class* result = GetDeclaringClassUnchecked<kReadBarrierOption>();
   if (kIsDebugBuild) {
     if (!IsRuntimeMethod()) {
       CHECK(result != nullptr) << this;
@@ -79,24 +77,28 @@
 
 // AssertSharedHeld doesn't work in GetAccessFlags, so use a NO_THREAD_SAFETY_ANALYSIS helper.
 // TODO: Figure out why ASSERT_SHARED_CAPABILITY doesn't work.
-ALWAYS_INLINE
-static inline void DoGetAccessFlagsHelper(ArtMethod* method) NO_THREAD_SAFETY_ANALYSIS {
-  CHECK(method->IsRuntimeMethod() || method->GetDeclaringClass()->IsIdxLoaded() ||
-        method->GetDeclaringClass()->IsErroneous());
+template <ReadBarrierOption kReadBarrierOption>
+ALWAYS_INLINE static inline void DoGetAccessFlagsHelper(ArtMethod* method)
+    NO_THREAD_SAFETY_ANALYSIS {
+  CHECK(method->IsRuntimeMethod() ||
+        method->GetDeclaringClass<kReadBarrierOption>()->IsIdxLoaded() ||
+        method->GetDeclaringClass<kReadBarrierOption>()->IsErroneous());
 }
 
+template <ReadBarrierOption kReadBarrierOption>
 inline uint32_t ArtMethod::GetAccessFlags() {
   if (kIsDebugBuild) {
     Thread* self = Thread::Current();
     if (!Locks::mutator_lock_->IsSharedHeld(self)) {
       ScopedObjectAccess soa(self);
-      CHECK(IsRuntimeMethod() || GetDeclaringClass()->IsIdxLoaded() ||
-            GetDeclaringClass()->IsErroneous());
+      CHECK(IsRuntimeMethod() ||
+            GetDeclaringClass<kReadBarrierOption>()->IsIdxLoaded() ||
+            GetDeclaringClass<kReadBarrierOption>()->IsErroneous());
     } else {
       // We cannot use SOA in this case. We might be holding the lock, but may not be in the
       // runnable state (e.g., during GC).
       Locks::mutator_lock_->AssertSharedHeld(self);
-      DoGetAccessFlagsHelper(this);
+      DoGetAccessFlagsHelper<kReadBarrierOption>(this);
     }
   }
   return access_flags_;
@@ -469,7 +471,7 @@
 
 template <typename Visitor>
 inline void ArtMethod::UpdateObjectsForImageRelocation(const Visitor& visitor) {
-  mirror::Class* old_class = GetDeclaringClassNoBarrier();
+  mirror::Class* old_class = GetDeclaringClassUnchecked<kWithoutReadBarrier>();
   mirror::Class* new_class = visitor(old_class);
   if (old_class != new_class) {
     SetDeclaringClass(new_class);
@@ -486,9 +488,9 @@
   }
 }
 
-template <typename Visitor>
+template <ReadBarrierOption kReadBarrierOption, typename Visitor>
 inline void ArtMethod::UpdateEntrypoints(const Visitor& visitor) {
-  if (IsNative()) {
+  if (IsNative<kReadBarrierOption>()) {
     const void* old_native_code = GetEntryPointFromJni();
     const void* new_native_code = visitor(old_native_code);
     if (old_native_code != new_native_code) {
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index 6f36016..cd38e16 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -292,22 +292,7 @@
         // Unusual case where we were running generated code and an
         // exception was thrown to force the activations to be removed from the
         // stack. Continue execution in the interpreter.
-        self->ClearException();
-        ShadowFrame* shadow_frame =
-            self->PopStackedShadowFrame(StackedShadowFrameType::kDeoptimizationShadowFrame);
-        mirror::Throwable* pending_exception = nullptr;
-        bool from_code = false;
-        self->PopDeoptimizationContext(result, &pending_exception, &from_code);
-        CHECK(!from_code);
-        self->SetTopOfStack(nullptr);
-        self->SetTopOfShadowStack(shadow_frame);
-
-        // Restore the exception that was pending before deoptimization then interpret the
-        // deoptimized frames.
-        if (pending_exception != nullptr) {
-          self->SetException(pending_exception);
-        }
-        interpreter::EnterInterpreterFromDeoptimize(self, shadow_frame, from_code, result);
+        self->DeoptimizeWithDeoptimizationException(result);
       }
       if (kLogInvocationStartAndReturn) {
         LOG(INFO) << StringPrintf("Returned '%s' quick code=%p", PrettyMethod(this).c_str(),
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 440e796..078a978 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -57,11 +57,10 @@
                                         jobject jlr_method)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE mirror::Class* GetDeclaringClass() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ALWAYS_INLINE mirror::Class* GetDeclaringClassNoBarrier()
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE mirror::Class* GetDeclaringClassUnchecked()
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -77,6 +76,7 @@
 
   // Note: GetAccessFlags acquires the mutator lock in debug mode to check that it is not called for
   // a proxy method.
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE uint32_t GetAccessFlags();
 
   void SetAccessFlags(uint32_t new_access_flags) {
@@ -154,8 +154,9 @@
     return (GetAccessFlags() & kAccDefault) != 0;
   }
 
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsNative() {
-    return (GetAccessFlags() & kAccNative) != 0;
+    return (GetAccessFlags<kReadBarrierOption>() & kAccNative) != 0;
   }
 
   bool IsFastNative() {
@@ -173,13 +174,13 @@
 
   bool IsProxyMethod() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  bool IsPreverified() {
-    return (GetAccessFlags() & kAccPreverified) != 0;
+  bool SkipAccessChecks() {
+    return (GetAccessFlags() & kAccSkipAccessChecks) != 0;
   }
 
-  void SetPreverified() {
-    DCHECK(!IsPreverified());
-    SetAccessFlags(GetAccessFlags() | kAccPreverified);
+  void SetSkipAccessChecks() {
+    DCHECK(!SkipAccessChecks());
+    SetAccessFlags(GetAccessFlags() | kAccSkipAccessChecks);
   }
 
   // Returns true if this method could be overridden by a default method.
@@ -485,7 +486,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Update entry points by passing them through the visitor.
-  template <typename Visitor>
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier, typename Visitor>
   ALWAYS_INLINE void UpdateEntrypoints(const Visitor& visitor);
 
  protected:
diff --git a/runtime/base/stl_util.h b/runtime/base/stl_util.h
index ad03c31..a53dcea 100644
--- a/runtime/base/stl_util.h
+++ b/runtime/base/stl_util.h
@@ -156,6 +156,20 @@
   }
 };
 
+// 32-bit FNV-1a hash function suitable for std::unordered_map.
+// It can be used with any container which works with range-based for loop.
+// See http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
+template <typename Vector>
+struct FNVHash {
+  size_t operator()(const Vector& vector) const {
+    uint32_t hash = 2166136261u;
+    for (const auto& value : vector) {
+      hash = (hash ^ value) * 16777619u;
+    }
+    return hash;
+  }
+};
+
 // Use to suppress type deduction for a function argument.
 // See std::identity<> for more background:
 // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2005/n1856.html#20.2.2 - move/forward helpers
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 5ef199c..88d49b2 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1182,11 +1182,15 @@
   ClassTable* const table_;
 };
 
-void ClassLinker::UpdateAppImageClassLoadersAndDexCaches(
+bool ClassLinker::UpdateAppImageClassLoadersAndDexCaches(
     gc::space::ImageSpace* space,
     Handle<mirror::ClassLoader> class_loader,
     Handle<mirror::ObjectArray<mirror::DexCache>> dex_caches,
-    bool added_class_table) {
+    bool added_class_table,
+    bool* out_forward_dex_cache_array,
+    std::string* out_error_msg) {
+  DCHECK(out_forward_dex_cache_array != nullptr);
+  DCHECK(out_error_msg != nullptr);
   Thread* const self = Thread::Current();
   gc::Heap* const heap = Runtime::Current()->GetHeap();
   const ImageHeader& header = space->GetImageHeader();
@@ -1194,8 +1198,11 @@
   // class loader fields.
   WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
   ClassTable* table = InsertClassTableForClassLoader(class_loader.Get());
-  // TODO: Store class table in the image to avoid manually adding the classes.
-  for (int32_t i = 0, num_dex_caches = dex_caches->GetLength(); i < num_dex_caches; i++) {
+  // Dex cache array fixup is all or nothing, we must reject app images that have mixed since we
+  // rely on clobering the dex cache arrays in the image to forward to bss.
+  size_t num_dex_caches_with_bss_arrays = 0;
+  const size_t num_dex_caches = dex_caches->GetLength();
+  for (size_t i = 0; i < num_dex_caches; i++) {
     mirror::DexCache* const dex_cache = dex_caches->Get(i);
     const DexFile* const dex_file = dex_cache->GetDexFile();
     // If the oat file expects the dex cache arrays to be in the BSS, then allocate there and
@@ -1209,22 +1216,22 @@
     CHECK_EQ(num_types, dex_cache->NumResolvedTypes());
     CHECK_EQ(num_methods, dex_cache->NumResolvedMethods());
     CHECK_EQ(num_fields, dex_cache->NumResolvedFields());
-    if (dex_file->GetOatDexFile() != nullptr &&
-        dex_file->GetOatDexFile()->GetDexCacheArrays() != nullptr) {
+    const OatFile::OatDexFile* oat_dex_file = dex_file->GetOatDexFile();
+    if (oat_dex_file != nullptr && oat_dex_file->GetDexCacheArrays() != nullptr) {
+      ++num_dex_caches_with_bss_arrays;
       DexCacheArraysLayout layout(image_pointer_size_, dex_file);
-      uint8_t* const raw_arrays = dex_file->GetOatDexFile()->GetDexCacheArrays();
-      // The space is not yet visible to the GC, we can avoid the read barriers and use
-      // std::copy_n.
+      uint8_t* const raw_arrays = oat_dex_file->GetDexCacheArrays();
+      // The space is not yet visible to the GC, we can avoid the read barriers and use std::copy_n.
       if (num_strings != 0u) {
+        GcRoot<mirror::String>* const image_resolved_strings = dex_cache->GetStrings();
         GcRoot<mirror::String>* const strings =
             reinterpret_cast<GcRoot<mirror::String>*>(raw_arrays + layout.StringsOffset());
         for (size_t j = 0; kIsDebugBuild && j < num_strings; ++j) {
           DCHECK(strings[j].IsNull());
         }
-        std::copy_n(dex_cache->GetStrings(), num_strings, strings);
+        std::copy_n(image_resolved_strings, num_strings, strings);
         dex_cache->SetStrings(strings);
       }
-
       if (num_types != 0u) {
         GcRoot<mirror::Class>* const image_resolved_types = dex_cache->GetResolvedTypes();
         GcRoot<mirror::Class>* const types =
@@ -1282,6 +1289,12 @@
           // Update the class loader from the one in the image class loader to the one that loaded
           // the app image.
           klass->SetClassLoader(class_loader.Get());
+          // The resolved type could be from another dex cache, go through the dex cache just in
+          // case. May be null for array classes.
+          if (klass->GetDexCacheStrings() != nullptr) {
+            DCHECK(!klass->IsArrayClass());
+            klass->SetDexCacheStrings(klass->GetDexCache()->GetStrings());
+          }
           // If there are multiple dex caches, there may be the same class multiple times
           // in different dex caches. Check for this since inserting will add duplicates
           // otherwise.
@@ -1326,7 +1339,6 @@
               CHECK_EQ(table->LookupByDescriptor(super_class), super_class);
             }
           }
-          DCHECK_EQ(klass->GetClassLoader(), class_loader.Get());
           if (kIsDebugBuild) {
             for (ArtMethod& m : klass->GetDirectMethods(sizeof(void*))) {
               const void* code = m.GetEntryPointFromQuickCompiledCode();
@@ -1354,20 +1366,68 @@
       }
     }
   }
-  {
+  *out_forward_dex_cache_array = num_dex_caches_with_bss_arrays != 0;
+  if (*out_forward_dex_cache_array) {
+    if (num_dex_caches_with_bss_arrays != num_dex_caches) {
+      // Reject application image since we cannot forward only some of the dex cache arrays.
+      // TODO: We could get around this by having a dedicated forwarding slot. It should be an
+      // uncommon case.
+      *out_error_msg = StringPrintf("Dex caches in bss does not match total: %zu vs %zu",
+                                    num_dex_caches_with_bss_arrays,
+                                    num_dex_caches);
+      return false;
+    }
     FixupArtMethodArrayVisitor visitor(header);
     header.GetImageSection(ImageHeader::kSectionArtMethods).VisitPackedArtMethods(
-        &visitor, space->Begin(), sizeof(void*));
+        &visitor,
+        space->Begin(),
+        sizeof(void*));
     Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader.Get());
   }
   if (kIsDebugBuild) {
     ClassTable* const class_table = class_loader.Get()->GetClassTable();
     VerifyClassInTableArtMethodVisitor visitor2(class_table);
     header.GetImageSection(ImageHeader::kSectionArtMethods).VisitPackedArtMethods(
-        &visitor2, space->Begin(), sizeof(void*));
+        &visitor2,
+        space->Begin(),
+        sizeof(void*));
   }
+  return true;
 }
 
+class UpdateClassLoaderAndResolvedStringsVisitor {
+ public:
+  UpdateClassLoaderAndResolvedStringsVisitor(gc::space::ImageSpace* space,
+                                             mirror::ClassLoader* class_loader,
+                                             bool forward_strings)
+      : space_(space),
+        class_loader_(class_loader),
+        forward_strings_(forward_strings) {}
+
+  bool operator()(mirror::Class* klass) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (forward_strings_) {
+      GcRoot<mirror::String>* strings = klass->GetDexCacheStrings();
+      if (strings != nullptr) {
+        DCHECK(
+            space_->GetImageHeader().GetImageSection(ImageHeader::kSectionDexCacheArrays).Contains(
+                reinterpret_cast<uint8_t*>(strings) - space_->Begin()))
+            << "String dex cache array for " << PrettyClass(klass) << " is not in app image";
+        // Dex caches have already been updated, so take the strings pointer from there.
+        GcRoot<mirror::String>* new_strings = klass->GetDexCache()->GetStrings();
+        DCHECK_NE(strings, new_strings);
+        klass->SetDexCacheStrings(new_strings);
+      }
+    }
+    // Finally, update class loader.
+    klass->SetClassLoader(class_loader_);
+    return true;
+  }
+
+  gc::space::ImageSpace* const space_;
+  mirror::ClassLoader* const class_loader_;
+  const bool forward_strings_;
+};
+
 bool ClassLinker::AddImageSpace(
     gc::space::ImageSpace* space,
     Handle<mirror::ClassLoader> class_loader,
@@ -1576,21 +1636,55 @@
   if (app_image) {
     GetOrCreateAllocatorForClassLoader(class_loader.Get());  // Make sure we have a linear alloc.
   }
-  if (class_table_section.Size() > 0u) {
-    const uint64_t start_time2 = NanoTime();
+  ClassTable* class_table = nullptr;
+  {
     WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
-    ClassTable* const class_table = InsertClassTableForClassLoader(class_loader.Get());
-    class_table->ReadFromMemory(space->Begin() + class_table_section.Offset());
-    if (app_image) {
-      class_table->SetClassLoader(class_loader.Get());
-    } else {
-      dex_cache_boot_image_class_lookup_required_ = false;
+    class_table = InsertClassTableForClassLoader(class_loader.Get());
+    if (class_table_section.Size() > 0u) {
+      const uint64_t start_time2 = NanoTime();
+      class_table->ReadFromMemory(space->Begin() + class_table_section.Offset());
+      if (!app_image) {
+        dex_cache_boot_image_class_lookup_required_ = false;
+      }
+      VLOG(image) << "Adding class table classes took " << PrettyDuration(NanoTime() - start_time2);
+      added_class_table = true;
     }
-    VLOG(image) << "Adding class table classes took " << PrettyDuration(NanoTime() - start_time2);
-    added_class_table = true;
   }
   if (app_image) {
-    UpdateAppImageClassLoadersAndDexCaches(space, class_loader, dex_caches, added_class_table);
+    bool forward_dex_cache_arrays = false;
+    if (!UpdateAppImageClassLoadersAndDexCaches(space,
+                                                class_loader,
+                                                dex_caches,
+                                                added_class_table,
+                                                /*out*/&forward_dex_cache_arrays,
+                                                /*out*/error_msg)) {
+      return false;
+    }
+    if (added_class_table) {
+      WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
+      // Update class loader and resolved strings. If added_class_table is false, the resolved
+      // strings were already updated in UpdateAppImageClassLoadersAndDexCaches.
+      UpdateClassLoaderAndResolvedStringsVisitor visitor(space,
+                                                         class_loader.Get(),
+                                                         forward_dex_cache_arrays);
+      class_table->Visit(visitor);
+    }
+    // forward_dex_cache_arrays is true iff we copied all of the dex cache arrays into the .bss.
+    // In this case, madvise away the dex cache arrays section of the image to reduce RAM usage and
+    // mark as PROT_NONE to catch any invalid accesses.
+    if (forward_dex_cache_arrays) {
+      const ImageSection& dex_cache_section = header.GetImageSection(
+          ImageHeader::kSectionDexCacheArrays);
+      uint8_t* section_begin = AlignUp(space->Begin() + dex_cache_section.Offset(), kPageSize);
+      uint8_t* section_end = AlignDown(space->Begin() + dex_cache_section.End(), kPageSize);
+      if (section_begin < section_end) {
+        madvise(section_begin, section_end - section_begin, MADV_DONTNEED);
+        mprotect(section_begin, section_end - section_begin, PROT_NONE);
+        VLOG(image) << "Released and protected dex cache array image section from "
+                    << reinterpret_cast<const void*>(section_begin) << "-"
+                    << reinterpret_cast<const void*>(section_end);
+      }
+    }
   }
   VLOG(class_linker) << "Adding image space took " << PrettyDuration(NanoTime() - start_time);
   return true;
@@ -1677,7 +1771,7 @@
   void Visit(mirror::ClassLoader* class_loader)
       SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_) OVERRIDE {
     ClassTable* const class_table = class_loader->GetClassTable();
-    if (!done_ && class_table != nullptr && !class_table->Visit(visitor_)) {
+    if (!done_ && class_table != nullptr && !class_table->Visit(*visitor_)) {
       // If the visitor ClassTable returns false it means that we don't need to continue.
       done_ = true;
     }
@@ -1690,7 +1784,7 @@
 };
 
 void ClassLinker::VisitClassesInternal(ClassVisitor* visitor) {
-  if (boot_class_table_.Visit(visitor)) {
+  if (boot_class_table_.Visit(*visitor)) {
     VisitClassLoaderClassesVisitor loader_visitor(visitor);
     VisitClassLoaders(&loader_visitor);
   }
@@ -1713,7 +1807,7 @@
 
 class GetClassesInToVector : public ClassVisitor {
  public:
-  bool Visit(mirror::Class* klass) OVERRIDE {
+  bool operator()(mirror::Class* klass) OVERRIDE {
     classes_.push_back(klass);
     return true;
   }
@@ -1725,7 +1819,7 @@
   explicit GetClassInToObjectArray(mirror::ObjectArray<mirror::Class>* arr)
       : arr_(arr), index_(0) {}
 
-  bool Visit(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool operator()(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     ++index_;
     if (index_ <= arr_->GetLength()) {
       arr_->Set(index_ - 1, klass);
@@ -1746,16 +1840,17 @@
 void ClassLinker::VisitClassesWithoutClassesLock(ClassVisitor* visitor) {
   // TODO: it may be possible to avoid secondary storage if we iterate over dex caches. The problem
   // is avoiding duplicates.
+  Thread* const self = Thread::Current();
   if (!kMovingClasses) {
+    ScopedAssertNoThreadSuspension nts(self, __FUNCTION__);
     GetClassesInToVector accumulator;
     VisitClasses(&accumulator);
     for (mirror::Class* klass : accumulator.classes_) {
-      if (!visitor->Visit(klass)) {
+      if (!visitor->operator()(klass)) {
         return;
       }
     }
   } else {
-    Thread* const self = Thread::Current();
     StackHandleScope<1> hs(self);
     auto classes = hs.NewHandle<mirror::ObjectArray<mirror::Class>>(nullptr);
     // We size the array assuming classes won't be added to the class table during the visit.
@@ -1783,7 +1878,7 @@
       // the class table grew then the loop repeats. If classes are created after the loop has
       // finished then we don't visit.
       mirror::Class* klass = classes->Get(i);
-      if (klass != nullptr && !visitor->Visit(klass)) {
+      if (klass != nullptr && !visitor->operator()(klass)) {
         return;
       }
     }
@@ -3546,7 +3641,7 @@
 
   // Don't attempt to re-verify if already sufficiently verified.
   if (klass->IsVerified()) {
-    EnsurePreverifiedMethods(klass);
+    EnsureSkipAccessChecksMethods(klass);
     return;
   }
   if (klass->IsCompileTimeVerified() && Runtime::Current()->IsAotCompiler()) {
@@ -3569,22 +3664,10 @@
     mirror::Class::SetStatus(klass, mirror::Class::kStatusVerifyingAtRuntime, self);
   }
 
-  // Skip verification if we are forcing a soft fail.
-  // This has to be before the normal verification enabled check,
-  // since technically verification is disabled in this mode.
-  if (UNLIKELY(Runtime::Current()->IsVerificationSoftFail())) {
-    // Force verification to be a 'soft failure'.
-    mirror::Class::SetStatus(klass, mirror::Class::kStatusVerified, self);
-    // As this is a fake verified status, make sure the methods are _not_ marked preverified
-    // later.
-    klass->SetPreverified();
-    return;
-  }
-
   // Skip verification if disabled.
   if (!Runtime::Current()->IsVerificationEnabled()) {
     mirror::Class::SetStatus(klass, mirror::Class::kStatusVerified, self);
-    EnsurePreverifiedMethods(klass);
+    EnsureSkipAccessChecksMethods(klass);
     return;
   }
 
@@ -3687,9 +3770,9 @@
         mirror::Class::SetStatus(klass, mirror::Class::kStatusRetryVerificationAtRuntime, self);
       } else {
         mirror::Class::SetStatus(klass, mirror::Class::kStatusVerified, self);
-        // As this is a fake verified status, make sure the methods are _not_ marked preverified
-        // later.
-        klass->SetPreverified();
+        // As this is a fake verified status, make sure the methods are _not_ marked
+        // kAccSkipAccessChecks later.
+        klass->SetVerificationAttempted();
       }
     }
   } else {
@@ -3702,19 +3785,26 @@
   }
   if (preverified || verifier_failure == verifier::MethodVerifier::kNoFailure) {
     // Class is verified so we don't need to do any access check on its methods.
-    // Let the interpreter know it by setting the kAccPreverified flag onto each
+    // Let the interpreter know it by setting the kAccSkipAccessChecks flag onto each
     // method.
     // Note: we're going here during compilation and at runtime. When we set the
-    // kAccPreverified flag when compiling image classes, the flag is recorded
+    // kAccSkipAccessChecks flag when compiling image classes, the flag is recorded
     // in the image and is set when loading the image.
-    EnsurePreverifiedMethods(klass);
+
+    if (UNLIKELY(Runtime::Current()->IsVerificationSoftFail())) {
+      // Never skip access checks if the verification soft fail is forced.
+      // Mark the class as having a verification attempt to avoid re-running the verifier.
+      klass->SetVerificationAttempted();
+    } else {
+      EnsureSkipAccessChecksMethods(klass);
+    }
   }
 }
 
-void ClassLinker::EnsurePreverifiedMethods(Handle<mirror::Class> klass) {
-  if (!klass->IsPreverified()) {
-    klass->SetPreverifiedFlagOnAllMethods(image_pointer_size_);
-    klass->SetPreverified();
+void ClassLinker::EnsureSkipAccessChecksMethods(Handle<mirror::Class> klass) {
+  if (!klass->WasVerificationAttempted()) {
+    klass->SetSkipAccessChecksFlagOnAllMethods(image_pointer_size_);
+    klass->SetVerificationAttempted();
   }
 }
 
@@ -3745,7 +3835,7 @@
   }
 
   // We may be running with a preopted oat file but without image. In this case,
-  // we don't skip verification of preverified classes to ensure we initialize
+  // we don't skip verification of skip_access_checks classes to ensure we initialize
   // dex caches with all types resolved during verification.
   // We need to trust image classes, as these might be coming out of a pre-opted, quickened boot
   // image (that we just failed loading), and the verifier can't be run on quickened opcodes when
@@ -3853,8 +3943,9 @@
   }
   DCHECK(klass->GetClass() != nullptr);
   klass->SetObjectSize(sizeof(mirror::Proxy));
-  // Set the class access flags incl. preverified, so we do not try to set the flag on the methods.
-  klass->SetAccessFlags(kAccClassIsProxy | kAccPublic | kAccFinal | kAccPreverified);
+  // Set the class access flags incl. VerificationAttempted, so we do not try to set the flag on
+  // the methods.
+  klass->SetAccessFlags(kAccClassIsProxy | kAccPublic | kAccFinal | kAccVerificationAttempted);
   klass->SetClassLoader(soa.Decode<mirror::ClassLoader*>(loader));
   DCHECK_EQ(klass->GetPrimitiveType(), Primitive::kPrimNot);
   klass->SetName(soa.Decode<mirror::String*>(name));
@@ -4025,9 +4116,10 @@
 
 void ClassLinker::CreateProxyConstructor(Handle<mirror::Class> klass, ArtMethod* out) {
   // Create constructor for Proxy that must initialize the method.
-  CHECK_EQ(GetClassRoot(kJavaLangReflectProxy)->NumDirectMethods(), 19u);
+  CHECK_EQ(GetClassRoot(kJavaLangReflectProxy)->NumDirectMethods(), 18u);
   ArtMethod* proxy_constructor = GetClassRoot(kJavaLangReflectProxy)->GetDirectMethodUnchecked(
       2, image_pointer_size_);
+  DCHECK_EQ(std::string(proxy_constructor->GetName()), "<init>");
   // Ensure constructor is in dex cache so that we can use the dex cache to look up the overridden
   // constructor method.
   GetClassRoot(kJavaLangReflectProxy)->GetDexCache()->SetResolvedMethod(
@@ -4662,7 +4754,7 @@
                                     bool can_init_parents) {
   DCHECK(c.Get() != nullptr);
   if (c->IsInitialized()) {
-    EnsurePreverifiedMethods(c);
+    EnsureSkipAccessChecksMethods(c);
     return true;
   }
   const bool success = InitializeClass(self, c, can_init_fields, can_init_parents);
@@ -6357,11 +6449,11 @@
     for (ArtMethod* def_method : default_methods) {
       ArtMethod& new_method = *out;
       new_method.CopyFrom(def_method, image_pointer_size_);
-      // Clear the preverified flag if it is present. Since this class hasn't been verified yet it
-      // shouldn't have methods that are preverified.
+      // Clear the kAccSkipAccessChecks flag if it is present. Since this class hasn't been verified
+      // yet it shouldn't have methods that are skipping access checks.
       // TODO This is rather arbitrary. We should maybe support classes where only some of its
-      // methods are preverified.
-      new_method.SetAccessFlags((new_method.GetAccessFlags() | kAccDefault) & ~kAccPreverified);
+      // methods are skip_access_checks.
+      new_method.SetAccessFlags((new_method.GetAccessFlags() | kAccDefault) & ~kAccSkipAccessChecks);
       move_table.emplace(def_method, &new_method);
       ++out;
     }
@@ -6369,11 +6461,11 @@
       ArtMethod& new_method = *out;
       new_method.CopyFrom(conf_method, image_pointer_size_);
       // This is a type of default method (there are default method impls, just a conflict) so mark
-      // this as a default, non-abstract method, since thats what it is. Also clear the preverified
-      // bit since this class hasn't been verified yet it shouldn't have methods that are
-      // preverified.
+      // this as a default, non-abstract method, since thats what it is. Also clear the
+      // kAccSkipAccessChecks bit since this class hasn't been verified yet it shouldn't have
+      // methods that are skipping access checks.
       constexpr uint32_t kSetFlags = kAccDefault | kAccDefaultConflict;
-      constexpr uint32_t kMaskFlags = ~(kAccAbstract | kAccPreverified);
+      constexpr uint32_t kMaskFlags = ~(kAccAbstract | kAccSkipAccessChecks);
       new_method.SetAccessFlags((new_method.GetAccessFlags() | kSetFlags) & kMaskFlags);
       DCHECK(new_method.IsDefaultConflicting());
       // The actual method might or might not be marked abstract since we just copied it from a
@@ -7157,7 +7249,7 @@
  public:
   explicit DumpClassVisitor(int flags) : flags_(flags) {}
 
-  bool Visit(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool operator()(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     klass->DumpClass(LOG(ERROR), flags_);
     return true;
   }
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 5176cbd..71fcf29 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -60,6 +60,13 @@
 
 enum VisitRootFlags : uint8_t;
 
+class ClassVisitor {
+ public:
+  virtual ~ClassVisitor() {}
+  // Return true to continue visiting.
+  virtual bool operator()(mirror::Class* klass) = 0;
+};
+
 class ClassLoaderVisitor {
  public:
   virtual ~ClassLoaderVisitor() {}
@@ -957,9 +964,10 @@
   void CreateProxyMethod(Handle<mirror::Class> klass, ArtMethod* prototype, ArtMethod* out)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Ensures that methods have the kAccPreverified bit set. We use the kAccPreverfied bit on the
-  // class access flags to determine whether this has been done before.
-  void EnsurePreverifiedMethods(Handle<mirror::Class> c)
+  // Ensures that methods have the kAccSkipAccessChecks bit set. We use the
+  // kAccVerificationAttempted bit on the class access flags to determine whether this has been done
+  // before.
+  void EnsureSkipAccessChecksMethods(Handle<mirror::Class> c)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   mirror::Class* LookupClassFromBootImage(const char* descriptor)
@@ -1007,11 +1015,13 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!Locks::classlinker_classes_lock_);
 
-  void UpdateAppImageClassLoadersAndDexCaches(
+  bool UpdateAppImageClassLoadersAndDexCaches(
       gc::space::ImageSpace* space,
       Handle<mirror::ClassLoader> class_loader,
       Handle<mirror::ObjectArray<mirror::DexCache>> dex_caches,
-      bool added_class_table)
+      bool added_class_table,
+      bool* out_forward_dex_cache_array,
+      std::string* out_error_msg)
       REQUIRES(!dex_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 40dfda9..3a0f3e5 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -210,11 +210,10 @@
                                                klass->GetDescriptor(&temp2)));
     if (klass->IsInterface()) {
       EXPECT_TRUE(klass->IsAbstract());
-      if (klass->NumDirectMethods() == 1) {
-        EXPECT_TRUE(klass->GetDirectMethod(0, sizeof(void*))->IsClassInitializer());
-        EXPECT_TRUE(klass->GetDirectMethod(0, sizeof(void*))->IsDirect());
-      } else {
-        EXPECT_EQ(0U, klass->NumDirectMethods());
+      // Check that all direct methods are static (either <clinit> or a regular static method).
+      for (ArtMethod& m : klass->GetDirectMethods(sizeof(void*))) {
+        EXPECT_TRUE(m.IsStatic());
+        EXPECT_TRUE(m.IsDirect());
       }
     } else {
       if (!klass->IsSynthetic()) {
@@ -1126,14 +1125,14 @@
 static void CheckMethod(ArtMethod* method, bool verified)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   if (!method->IsNative() && !method->IsAbstract()) {
-    EXPECT_EQ((method->GetAccessFlags() & kAccPreverified) != 0U, verified)
+    EXPECT_EQ((method->GetAccessFlags() & kAccSkipAccessChecks) != 0U, verified)
         << PrettyMethod(method, true);
   }
 }
 
-static void CheckPreverified(mirror::Class* c, bool preverified)
+static void CheckVerificationAttempted(mirror::Class* c, bool preverified)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  EXPECT_EQ((c->GetAccessFlags() & kAccPreverified) != 0U, preverified)
+  EXPECT_EQ((c->GetAccessFlags() & kAccVerificationAttempted) != 0U, preverified)
       << "Class " << PrettyClass(c) << " not as expected";
   for (auto& m : c->GetMethods(sizeof(void*))) {
     CheckMethod(&m, preverified);
@@ -1147,7 +1146,7 @@
   ASSERT_TRUE(JavaLangObject != nullptr);
   EXPECT_TRUE(JavaLangObject->IsInitialized()) << "Not testing already initialized class from the "
                                                   "core";
-  CheckPreverified(JavaLangObject, true);
+  CheckVerificationAttempted(JavaLangObject, true);
 }
 
 TEST_F(ClassLinkerTest, Preverified_UninitializedBoot) {
@@ -1160,10 +1159,10 @@
   EXPECT_FALSE(security_manager->IsInitialized()) << "Not testing uninitialized class from the "
                                                      "core";
 
-  CheckPreverified(security_manager.Get(), false);
+  CheckVerificationAttempted(security_manager.Get(), false);
 
   class_linker_->EnsureInitialized(soa.Self(), security_manager, true, true);
-  CheckPreverified(security_manager.Get(), true);
+  CheckVerificationAttempted(security_manager.Get(), true);
 }
 
 TEST_F(ClassLinkerTest, Preverified_App) {
@@ -1175,10 +1174,10 @@
   Handle<mirror::Class> statics(
       hs.NewHandle(class_linker_->FindClass(soa.Self(), "LStatics;", class_loader)));
 
-  CheckPreverified(statics.Get(), false);
+  CheckVerificationAttempted(statics.Get(), false);
 
   class_linker_->EnsureInitialized(soa.Self(), statics, true, true);
-  CheckPreverified(statics.Get(), true);
+  CheckVerificationAttempted(statics.Get(), true);
 }
 
 TEST_F(ClassLinkerTest, IsBootStrapClassLoaded) {
diff --git a/runtime/class_table-inl.h b/runtime/class_table-inl.h
index aef02b6..e512906 100644
--- a/runtime/class_table-inl.h
+++ b/runtime/class_table-inl.h
@@ -28,6 +28,9 @@
       visitor.VisitRoot(root.AddressWithoutBarrier());
     }
   }
+  for (GcRoot<mirror::Object>& root : dex_files_) {
+    visitor.VisitRoot(root.AddressWithoutBarrier());
+  }
 }
 
 template<class Visitor>
@@ -42,6 +45,19 @@
   }
 }
 
+template <typename Visitor>
+bool ClassTable::Visit(Visitor& visitor) {
+  for (ClassSet& class_set : classes_) {
+    for (GcRoot<mirror::Class>& root : class_set) {
+      if (!visitor(root.Read())) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_CLASS_TABLE_INL_H_
diff --git a/runtime/class_table.cc b/runtime/class_table.cc
index 2a4f0e0..afb0556 100644
--- a/runtime/class_table.cc
+++ b/runtime/class_table.cc
@@ -73,17 +73,6 @@
   return existing;
 }
 
-bool ClassTable::Visit(ClassVisitor* visitor) {
-  for (ClassSet& class_set : classes_) {
-    for (GcRoot<mirror::Class>& root : class_set) {
-      if (!visitor->Visit(root.Read())) {
-        return false;
-      }
-    }
-  }
-  return true;
-}
-
 size_t ClassTable::NumZygoteClasses() const {
   size_t sum = 0;
   for (size_t i = 0; i < classes_.size() - 1; ++i) {
@@ -183,12 +172,4 @@
   return read_count;
 }
 
-void ClassTable::SetClassLoader(mirror::ClassLoader* class_loader) {
-  for (const ClassSet& class_set : classes_) {
-    for (const GcRoot<mirror::Class>& root : class_set) {
-      root.Read()->SetClassLoader(class_loader);
-    }
-  }
-}
-
 }  // namespace art
diff --git a/runtime/class_table.h b/runtime/class_table.h
index 0b42035..5f2eb48 100644
--- a/runtime/class_table.h
+++ b/runtime/class_table.h
@@ -36,13 +36,6 @@
   class ClassLoader;
 }  // namespace mirror
 
-class ClassVisitor {
- public:
-  virtual ~ClassVisitor() {}
-  // Return true to continue visiting.
-  virtual bool Visit(mirror::Class* klass) = 0;
-};
-
 // Each loader has a ClassTable
 class ClassTable {
  public:
@@ -80,8 +73,9 @@
       NO_THREAD_SAFETY_ANALYSIS
       SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
 
-  // Return false if the callback told us to exit.
-  bool Visit(ClassVisitor* visitor)
+  // Stops visit if the visitor returns false.
+  template <typename Visitor>
+  bool Visit(Visitor& visitor)
       SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
 
   // Return the first class that matches the descriptor. Returns null if there are none.
@@ -118,11 +112,6 @@
       REQUIRES(Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Change the class loader of all the contained classes.
-  void SetClassLoader(mirror::ClassLoader* class_loader)
-    REQUIRES(Locks::classlinker_classes_lock_)
-    SHARED_REQUIRES(Locks::mutator_lock_);
-
  private:
   class ClassDescriptorHashEquals {
    public:
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index a0f875d..904490a 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -983,7 +983,7 @@
  public:
   explicit ClassListCreator(std::vector<JDWP::RefTypeId>* classes) : classes_(classes) {}
 
-  bool Visit(mirror::Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool operator()(mirror::Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     if (!c->IsPrimitive()) {
       classes_->push_back(Dbg::GetObjectRegistry()->AddRefType(c));
     }
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index 0663b7e..7e7d904 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -299,8 +299,10 @@
 }
 
 template<FindFieldType type, bool access_check>
-inline ArtField* FindFieldFromCode(uint32_t field_idx, ArtMethod* referrer,
-                                           Thread* self, size_t expected_size) {
+inline ArtField* FindFieldFromCode(uint32_t field_idx,
+                                   ArtMethod* referrer,
+                                   Thread* self,
+                                   size_t expected_size) REQUIRES(!Roles::uninterruptible_) {
   bool is_primitive;
   bool is_set;
   bool is_static;
@@ -316,7 +318,31 @@
     default:                     is_primitive = true;  is_set = true;  is_static = true;  break;
   }
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  ArtField* resolved_field = class_linker->ResolveField(field_idx, referrer, is_static);
+
+  ArtField* resolved_field;
+  if (access_check) {
+    // Slow path: According to JLS 13.4.8, a linkage error may occur if a compile-time
+    // qualifying type of a field and the resolved run-time qualifying type of a field differed
+    // in their static-ness.
+    //
+    // In particular, don't assume the dex instruction already correctly knows if the
+    // real field is static or not. The resolution must not be aware of this.
+    ArtMethod* method = referrer->GetInterfaceMethodIfProxy(sizeof(void*));
+
+    StackHandleScope<2> hs(self);
+    Handle<mirror::DexCache> h_dex_cache(hs.NewHandle(method->GetDexCache()));
+    Handle<mirror::ClassLoader> h_class_loader(hs.NewHandle(method->GetClassLoader()));
+
+    resolved_field = class_linker->ResolveFieldJLS(*method->GetDexFile(),
+                                                   field_idx,
+                                                   h_dex_cache,
+                                                   h_class_loader);
+  } else {
+    // Fast path: Verifier already would've called ResolveFieldJLS and we wouldn't
+    // be executing here if there was a static/non-static mismatch.
+    resolved_field = class_linker->ResolveField(field_idx, referrer, is_static);
+  }
+
   if (UNLIKELY(resolved_field == nullptr)) {
     DCHECK(self->IsExceptionPending());  // Throw exception and unwind.
     return nullptr;  // Failure.
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index b5a55bf..3dfad76 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -273,15 +273,15 @@
     if (outer_method != nullptr) {
       const OatQuickMethodHeader* current_code = outer_method->GetOatQuickMethodHeader(caller_pc);
       if (current_code->IsOptimized()) {
-          uintptr_t native_pc_offset = current_code->NativeQuickPcOffset(caller_pc);
-          CodeInfo code_info = current_code->GetOptimizedCodeInfo();
-          StackMapEncoding encoding = code_info.ExtractEncoding();
-          StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
-          DCHECK(stack_map.IsValid());
-          if (stack_map.HasInlineInfo(encoding)) {
-            InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
-            caller = GetResolvedMethod(outer_method, inline_info, inline_info.GetDepth() - 1);
-          }
+        uintptr_t native_pc_offset = current_code->NativeQuickPcOffset(caller_pc);
+        CodeInfo code_info = current_code->GetOptimizedCodeInfo();
+        StackMapEncoding encoding = code_info.ExtractEncoding();
+        StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
+        DCHECK(stack_map.IsValid());
+        if (stack_map.HasInlineInfo(encoding)) {
+          InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
+          caller = GetResolvedMethod(outer_method, inline_info, inline_info.GetDepth() - 1);
+        }
       }
     }
     if (kIsDebugBuild && do_caller_check) {
diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc
index 1850254..a245f18 100644
--- a/runtime/entrypoints/quick/quick_field_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc
@@ -27,7 +27,36 @@
 
 namespace art {
 
-extern "C" int8_t artGetByteStaticFromCode(uint32_t field_idx, ArtMethod* referrer,
+inline constexpr bool FindFieldTypeIsRead(FindFieldType type) {
+  return type == InstanceObjectRead ||
+         type == InstancePrimitiveRead ||
+         type == StaticObjectRead ||
+         type == StaticPrimitiveRead;
+}
+
+// Helper function to do a null check after trying to resolve the field. Not for statics since obj
+// does not exist there. There is a suspend check, object is a double pointer to update the value
+// in the caller in case it moves.
+template<FindFieldType type, bool kAccessCheck>
+ALWAYS_INLINE static inline ArtField* FindInstanceField(uint32_t field_idx,
+                                                        ArtMethod* referrer,
+                                                        Thread* self,
+                                                        size_t size,
+                                                        mirror::Object** obj)
+    REQUIRES(!Roles::uninterruptible_)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  StackHandleScope<1> hs(self);
+  HandleWrapper<mirror::Object> h(hs.NewHandleWrapper(obj));
+  ArtField* field = FindFieldFromCode<type, kAccessCheck>(field_idx, referrer, self, size);
+  if (LIKELY(field != nullptr) && UNLIKELY(h.Get() == nullptr)) {
+    ThrowNullPointerExceptionForFieldAccess(field, /*is_read*/FindFieldTypeIsRead(type));
+    return nullptr;
+  }
+  return field;
+}
+
+extern "C" int8_t artGetByteStaticFromCode(uint32_t field_idx,
+                                           ArtMethod* referrer,
                                            Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
@@ -42,7 +71,8 @@
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" uint8_t artGetBooleanStaticFromCode(uint32_t field_idx, ArtMethod* referrer,
+extern "C" uint8_t artGetBooleanStaticFromCode(uint32_t field_idx,
+                                               ArtMethod* referrer,
                                                Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
@@ -57,7 +87,8 @@
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" int16_t artGetShortStaticFromCode(uint32_t field_idx, ArtMethod* referrer,
+extern "C" int16_t artGetShortStaticFromCode(uint32_t field_idx,
+                                             ArtMethod* referrer,
                                              Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
@@ -125,12 +156,16 @@
                                                    Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  ArtField* field = FindFieldFast(field_idx, referrer, StaticObjectRead,
+  ArtField* field = FindFieldFast(field_idx,
+                                  referrer,
+                                  StaticObjectRead,
                                   sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != nullptr)) {
     return field->GetObj(field->GetDeclaringClass());
   }
-  field = FindFieldFromCode<StaticObjectRead, true>(field_idx, referrer, self,
+  field = FindFieldFromCode<StaticObjectRead, true>(field_idx,
+                                                    referrer,
+                                                    self,
                                                     sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != nullptr)) {
     return field->GetObj(field->GetDeclaringClass());
@@ -138,149 +173,159 @@
   return nullptr;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" int8_t artGetByteInstanceFromCode(uint32_t field_idx, mirror::Object* obj,
-                                             ArtMethod* referrer, Thread* self)
+extern "C" int8_t artGetByteInstanceFromCode(uint32_t field_idx,
+                                             mirror::Object* obj,
+                                             ArtMethod* referrer,
+                                             Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int8_t));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->GetByte(obj);
   }
-  field = FindFieldFromCode<InstancePrimitiveRead, true>(field_idx, referrer, self,
-                                                         sizeof(int8_t));
+  field = FindInstanceField<InstancePrimitiveRead, true>(field_idx,
+                                                         referrer,
+                                                         self,
+                                                         sizeof(int8_t),
+                                                         &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, true);
-    } else {
-      return field->GetByte(obj);
-    }
+    return field->GetByte(obj);
   }
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" uint8_t artGetBooleanInstanceFromCode(uint32_t field_idx, mirror::Object* obj,
-                                                 ArtMethod* referrer, Thread* self)
+extern "C" uint8_t artGetBooleanInstanceFromCode(uint32_t field_idx,
+                                                 mirror::Object* obj,
+                                                 ArtMethod* referrer,
+                                                 Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int8_t));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->GetBoolean(obj);
   }
-  field = FindFieldFromCode<InstancePrimitiveRead, true>(field_idx, referrer, self,
-                                                         sizeof(int8_t));
+  field = FindInstanceField<InstancePrimitiveRead, true>(field_idx,
+                                                         referrer,
+                                                         self,
+                                                         sizeof(int8_t),
+                                                         &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, true);
-    } else {
-      return field->GetBoolean(obj);
-    }
+    return field->GetBoolean(obj);
   }
   return 0;  // Will throw exception by checking with Thread::Current.
 }
-extern "C" int16_t artGetShortInstanceFromCode(uint32_t field_idx, mirror::Object* obj,
-                                               ArtMethod* referrer, Thread* self)
+extern "C" int16_t artGetShortInstanceFromCode(uint32_t field_idx,
+                                               mirror::Object* obj,
+                                               ArtMethod* referrer,
+                                               Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int16_t));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->GetShort(obj);
   }
-  field = FindFieldFromCode<InstancePrimitiveRead, true>(field_idx, referrer, self,
-                                                         sizeof(int16_t));
+  field = FindInstanceField<InstancePrimitiveRead, true>(field_idx,
+                                                         referrer,
+                                                         self,
+                                                         sizeof(int16_t),
+                                                         &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, true);
-    } else {
-      return field->GetShort(obj);
-    }
+    return field->GetShort(obj);
   }
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" uint16_t artGetCharInstanceFromCode(uint32_t field_idx, mirror::Object* obj,
-                                               ArtMethod* referrer, Thread* self)
+extern "C" uint16_t artGetCharInstanceFromCode(uint32_t field_idx,
+                                               mirror::Object* obj,
+                                               ArtMethod* referrer,
+                                               Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int16_t));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->GetChar(obj);
   }
-  field = FindFieldFromCode<InstancePrimitiveRead, true>(field_idx, referrer, self,
-                                                         sizeof(int16_t));
+  field = FindInstanceField<InstancePrimitiveRead, true>(field_idx,
+                                                         referrer,
+                                                         self,
+                                                         sizeof(int16_t),
+                                                         &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, true);
-    } else {
-      return field->GetChar(obj);
-    }
+    return field->GetChar(obj);
   }
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" uint32_t artGet32InstanceFromCode(uint32_t field_idx, mirror::Object* obj,
-                                             ArtMethod* referrer, Thread* self)
+extern "C" uint32_t artGet32InstanceFromCode(uint32_t field_idx,
+                                             mirror::Object* obj,
+                                             ArtMethod* referrer,
+                                             Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int32_t));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->Get32(obj);
   }
-  field = FindFieldFromCode<InstancePrimitiveRead, true>(field_idx, referrer, self,
-                                                         sizeof(int32_t));
+  field = FindInstanceField<InstancePrimitiveRead, true>(field_idx,
+                                                         referrer,
+                                                         self,
+                                                         sizeof(int32_t),
+                                                         &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, true);
-    } else {
-      return field->Get32(obj);
-    }
+    return field->Get32(obj);
   }
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" uint64_t artGet64InstanceFromCode(uint32_t field_idx, mirror::Object* obj,
-                                             ArtMethod* referrer, Thread* self)
+extern "C" uint64_t artGet64InstanceFromCode(uint32_t field_idx,
+                                             mirror::Object* obj,
+                                             ArtMethod* referrer,
+                                             Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int64_t));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->Get64(obj);
   }
-  field = FindFieldFromCode<InstancePrimitiveRead, true>(field_idx, referrer, self,
-                                                         sizeof(int64_t));
+  field = FindInstanceField<InstancePrimitiveRead, true>(field_idx,
+                                                         referrer,
+                                                         self,
+                                                         sizeof(int64_t),
+                                                         &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, true);
-    } else {
-      return field->Get64(obj);
-    }
+    return field->Get64(obj);
   }
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" mirror::Object* artGetObjInstanceFromCode(uint32_t field_idx, mirror::Object* obj,
+extern "C" mirror::Object* artGetObjInstanceFromCode(uint32_t field_idx,
+                                                     mirror::Object* obj,
                                                      ArtMethod* referrer,
                                                      Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  ArtField* field = FindFieldFast(field_idx, referrer, InstanceObjectRead,
+  ArtField* field = FindFieldFast(field_idx,
+                                  referrer,
+                                  InstanceObjectRead,
                                   sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->GetObj(obj);
   }
-  field = FindFieldFromCode<InstanceObjectRead, true>(
-      field_idx, referrer, self, sizeof(mirror::HeapReference<mirror::Object>));
+  field = FindInstanceField<InstanceObjectRead, true>(field_idx,
+                                                      referrer,
+                                                      self,
+                                                      sizeof(mirror::HeapReference<mirror::Object>),
+                                                      &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, true);
-    } else {
-      return field->GetObj(obj);
-    }
+    return field->GetObj(obj);
   }
   return nullptr;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" int artSet8StaticFromCode(uint32_t field_idx, uint32_t new_value,
-                                     ArtMethod* referrer, Thread* self)
+extern "C" int artSet8StaticFromCode(uint32_t field_idx,
+                                     uint32_t new_value,
+                                     ArtMethod* referrer,
+                                     Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite, sizeof(int8_t));
@@ -310,8 +355,10 @@
   return -1;  // failure
 }
 
-extern "C" int artSet16StaticFromCode(uint32_t field_idx, uint16_t new_value,
-                                      ArtMethod* referrer, Thread* self)
+extern "C" int artSet16StaticFromCode(uint32_t field_idx,
+                                      uint16_t new_value,
+                                      ArtMethod* referrer,
+                                      Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite, sizeof(int16_t));
@@ -341,8 +388,10 @@
   return -1;  // failure
 }
 
-extern "C" int artSet32StaticFromCode(uint32_t field_idx, uint32_t new_value,
-                                      ArtMethod* referrer, Thread* self)
+extern "C" int artSet32StaticFromCode(uint32_t field_idx,
+                                      uint32_t new_value,
+                                      ArtMethod* referrer,
+                                      Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite, sizeof(int32_t));
@@ -360,8 +409,10 @@
   return -1;  // failure
 }
 
-extern "C" int artSet64StaticFromCode(uint32_t field_idx, ArtMethod* referrer,
-                                      uint64_t new_value, Thread* self)
+extern "C" int artSet64StaticFromCode(uint32_t field_idx,
+                                      ArtMethod* referrer,
+                                      uint64_t new_value,
+                                      Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite, sizeof(int64_t));
@@ -379,11 +430,15 @@
   return -1;  // failure
 }
 
-extern "C" int artSetObjStaticFromCode(uint32_t field_idx, mirror::Object* new_value,
-                                       ArtMethod* referrer, Thread* self)
+extern "C" int artSetObjStaticFromCode(uint32_t field_idx,
+                                       mirror::Object* new_value,
+                                       ArtMethod* referrer,
+                                       Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  ArtField* field = FindFieldFast(field_idx, referrer, StaticObjectWrite,
+  ArtField* field = FindFieldFast(field_idx,
+                                  referrer,
+                                  StaticObjectWrite,
                                   sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != nullptr)) {
     if (LIKELY(!field->IsPrimitiveType())) {
@@ -392,8 +447,15 @@
       return 0;  // success
     }
   }
-  field = FindFieldFromCode<StaticObjectWrite, true>(field_idx, referrer, self,
-                                                     sizeof(mirror::HeapReference<mirror::Object>));
+  {
+    StackHandleScope<1> hs(self);
+    HandleWrapper<mirror::Object> h_obj(hs.NewHandleWrapper(&new_value));
+    field = FindFieldFromCode<StaticObjectWrite, true>(
+        field_idx,
+        referrer,
+        self,
+        sizeof(mirror::HeapReference<mirror::Object>));
+  }
   if (LIKELY(field != nullptr)) {
     // Compiled code can't use transactional mode.
     field->SetObj<false>(field->GetDeclaringClass(), new_value);
@@ -402,8 +464,11 @@
   return -1;  // failure
 }
 
-extern "C" int artSet8InstanceFromCode(uint32_t field_idx, mirror::Object* obj, uint8_t new_value,
-                                       ArtMethod* referrer, Thread* self)
+extern "C" int artSet8InstanceFromCode(uint32_t field_idx,
+                                       mirror::Object* obj,
+                                       uint8_t new_value,
+                                       ArtMethod* referrer,
+                                       Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite, sizeof(int8_t));
@@ -418,31 +483,29 @@
     }
     return 0;  // success
   }
-  {
-    StackHandleScope<1> hs(self);
-    HandleWrapper<mirror::Object> h_obj(hs.NewHandleWrapper(&obj));
-    field = FindFieldFromCode<InstancePrimitiveWrite, true>(field_idx, referrer, self,
-                                                            sizeof(int8_t));
-  }
+  field = FindInstanceField<InstancePrimitiveWrite, true>(field_idx,
+                                                          referrer,
+                                                          self,
+                                                          sizeof(int8_t),
+                                                          &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, false);
+    Primitive::Type type = field->GetTypeAsPrimitiveType();
+    // Compiled code can't use transactional mode.
+    if (type == Primitive::kPrimBoolean) {
+      field->SetBoolean<false>(obj, new_value);
     } else {
-      Primitive::Type type = field->GetTypeAsPrimitiveType();
-      // Compiled code can't use transactional mode.
-      if (type == Primitive::kPrimBoolean) {
-        field->SetBoolean<false>(obj, new_value);
-      } else {
-        field->SetByte<false>(obj, new_value);
-      }
-      return 0;  // success
+      field->SetByte<false>(obj, new_value);
     }
+    return 0;  // success
   }
   return -1;  // failure
 }
 
-extern "C" int artSet16InstanceFromCode(uint32_t field_idx, mirror::Object* obj, uint16_t new_value,
-                                        ArtMethod* referrer, Thread* self)
+extern "C" int artSet16InstanceFromCode(uint32_t field_idx,
+                                        mirror::Object* obj,
+                                        uint16_t new_value,
+                                        ArtMethod* referrer,
+                                        Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite, sizeof(int16_t));
@@ -457,32 +520,30 @@
     }
     return 0;  // success
   }
-  {
-    StackHandleScope<1> hs(self);
-    HandleWrapper<mirror::Object> h_obj(hs.NewHandleWrapper(&obj));
-    field = FindFieldFromCode<InstancePrimitiveWrite, true>(field_idx, referrer, self,
-                                                            sizeof(int16_t));
-  }
+  field = FindInstanceField<InstancePrimitiveWrite, true>(field_idx,
+                                                          referrer,
+                                                          self,
+                                                          sizeof(int16_t),
+                                                          &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, false);
+    Primitive::Type type = field->GetTypeAsPrimitiveType();
+    // Compiled code can't use transactional mode.
+    if (type == Primitive::kPrimChar) {
+      field->SetChar<false>(obj, new_value);
     } else {
-      Primitive::Type type = field->GetTypeAsPrimitiveType();
-      // Compiled code can't use transactional mode.
-      if (type == Primitive::kPrimChar) {
-        field->SetChar<false>(obj, new_value);
-      } else {
-        DCHECK_EQ(Primitive::kPrimShort, type);
-        field->SetShort<false>(obj, new_value);
-      }
-      return 0;  // success
+      DCHECK_EQ(Primitive::kPrimShort, type);
+      field->SetShort<false>(obj, new_value);
     }
+    return 0;  // success
   }
   return -1;  // failure
 }
 
-extern "C" int artSet32InstanceFromCode(uint32_t field_idx, mirror::Object* obj, uint32_t new_value,
-                                        ArtMethod* referrer, Thread* self)
+extern "C" int artSet32InstanceFromCode(uint32_t field_idx,
+                                        mirror::Object* obj,
+                                        uint32_t new_value,
+                                        ArtMethod* referrer,
+                                        Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite, sizeof(int32_t));
@@ -491,26 +552,24 @@
     field->Set32<false>(obj, new_value);
     return 0;  // success
   }
-  {
-    StackHandleScope<1> hs(self);
-    HandleWrapper<mirror::Object> h_obj(hs.NewHandleWrapper(&obj));
-    field = FindFieldFromCode<InstancePrimitiveWrite, true>(field_idx, referrer, self,
-                                                            sizeof(int32_t));
-  }
+  field = FindInstanceField<InstancePrimitiveWrite, true>(field_idx,
+                                                          referrer,
+                                                          self,
+                                                          sizeof(int32_t),
+                                                          &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, false);
-    } else {
-      // Compiled code can't use transactional mode.
-      field->Set32<false>(obj, new_value);
-      return 0;  // success
-    }
+    // Compiled code can't use transactional mode.
+    field->Set32<false>(obj, new_value);
+    return 0;  // success
   }
   return -1;  // failure
 }
 
-extern "C" int artSet64InstanceFromCode(uint32_t field_idx, mirror::Object* obj, uint64_t new_value,
-                                        ArtMethod* referrer, Thread* self)
+extern "C" int artSet64InstanceFromCode(uint32_t field_idx,
+                                        mirror::Object* obj,
+                                        uint64_t new_value,
+                                        ArtMethod* referrer,
+                                        Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite, sizeof(int64_t));
@@ -519,34 +578,45 @@
     field->Set64<false>(obj, new_value);
     return 0;  // success
   }
-  field = FindFieldFromCode<InstancePrimitiveWrite, true>(field_idx, referrer, self,
-                                                          sizeof(int64_t));
+  field = FindInstanceField<InstancePrimitiveWrite, true>(field_idx,
+                                                          referrer,
+                                                          self,
+                                                          sizeof(int64_t),
+                                                          &obj);
   if (LIKELY(field != nullptr)) {
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(field, false);
-    } else {
-      // Compiled code can't use transactional mode.
-      field->Set64<false>(obj, new_value);
-      return 0;  // success
-    }
+    // Compiled code can't use transactional mode.
+    field->Set64<false>(obj, new_value);
+    return 0;
   }
   return -1;  // failure
 }
 
-extern "C" int artSetObjInstanceFromCode(uint32_t field_idx, mirror::Object* obj,
+extern "C" int artSetObjInstanceFromCode(uint32_t field_idx,
+                                         mirror::Object* obj,
                                          mirror::Object* new_value,
-                                         ArtMethod* referrer, Thread* self)
+                                         ArtMethod* referrer,
+                                         Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  ArtField* field = FindFieldFast(field_idx, referrer, InstanceObjectWrite,
+  ArtField* field = FindFieldFast(field_idx,
+                                  referrer,
+                                  InstanceObjectWrite,
                                   sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     // Compiled code can't use transactional mode.
     field->SetObj<false>(obj, new_value);
     return 0;  // success
   }
-  field = FindFieldFromCode<InstanceObjectWrite, true>(field_idx, referrer, self,
-                                                       sizeof(mirror::HeapReference<mirror::Object>));
+  {
+    StackHandleScope<2> hs(self);
+    HandleWrapper<mirror::Object> h_obj(hs.NewHandleWrapper(&obj));
+    HandleWrapper<mirror::Object> h_new_value(hs.NewHandleWrapper(&new_value));
+    field = FindFieldFromCode<InstanceObjectWrite, true>(
+        field_idx,
+        referrer,
+        self,
+        sizeof(mirror::HeapReference<mirror::Object>));
+  }
   if (LIKELY(field != nullptr)) {
     if (UNLIKELY(obj == nullptr)) {
       ThrowNullPointerExceptionForFieldAccess(field, false);
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index 5345b89..5c5abeb 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -349,7 +349,7 @@
   // Check that the class pointer inside the object is not null and is aligned.
   // TODO: Method might be not a heap address, and GetClass could fault.
   // No read barrier because method_obj may not be a real object.
-  mirror::Class* cls = method_obj->GetDeclaringClassNoBarrier();
+  mirror::Class* cls = method_obj->GetDeclaringClassUnchecked<kWithoutReadBarrier>();
   if (cls == nullptr) {
     VLOG(signals) << "not a class";
     return false;
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 9397c35..8e1b7f4 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -1081,7 +1081,7 @@
                 !IsInToSpace(to_ref->AsReference()->GetReferent<kWithoutReadBarrier>())))) {
     // Leave this Reference gray in the queue so that GetReferent() will trigger a read barrier. We
     // will change it to black or white later in ReferenceQueue::DequeuePendingReference().
-    DCHECK(to_ref->AsReference()->IsEnqueued()) << "Left unenqueued ref gray " << to_ref;
+    DCHECK(to_ref->AsReference()->GetPendingNext() != nullptr) << "Left unenqueued ref gray " << to_ref;
   } else {
     // We may occasionally leave a Reference black or white in the queue if its referent happens to
     // be concurrently marked after the Scan() call above has enqueued the Reference, in which case
diff --git a/runtime/gc/collector_type.h b/runtime/gc/collector_type.h
index c8e913c..ae41226 100644
--- a/runtime/gc/collector_type.h
+++ b/runtime/gc/collector_type.h
@@ -42,6 +42,8 @@
   kCollectorTypeCC,
   // Instrumentation critical section fake collector.
   kCollectorTypeInstrumentation,
+  // Fake collector for adding or removing application image spaces.
+  kCollectorTypeAddRemoveAppImageSpace,
   // A homogeneous space compaction collector used in background transition
   // when both foreground and background collector are CMS.
   kCollectorTypeHomogeneousSpaceCompact,
diff --git a/runtime/gc/gc_cause.cc b/runtime/gc/gc_cause.cc
index 84243df..679432b 100644
--- a/runtime/gc/gc_cause.cc
+++ b/runtime/gc/gc_cause.cc
@@ -34,6 +34,7 @@
     case kGcCauseHomogeneousSpaceCompact: return "HomogeneousSpaceCompact";
     case kGcCauseTrim: return "HeapTrim";
     case kGcCauseInstrumentation: return "Instrumentation";
+    case kGcCauseAddRemoveAppImageSpace: return "AddRemoveAppImageSpace";
     default:
       LOG(FATAL) << "Unreachable";
       UNREACHABLE();
diff --git a/runtime/gc/gc_cause.h b/runtime/gc/gc_cause.h
index 34c7766..c6b505c 100644
--- a/runtime/gc/gc_cause.h
+++ b/runtime/gc/gc_cause.h
@@ -41,6 +41,8 @@
   kGcCauseTrim,
   // Not a real GC cause, used to implement exclusion between GC and instrumentation.
   kGcCauseInstrumentation,
+  // Not a real GC cause, used to add or remove app image spaces.
+  kGcCauseAddRemoveAppImageSpace,
   // GC triggered for background transition when both foreground and background collector are CMS.
   kGcCauseHomogeneousSpaceCompact,
 };
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index d76a8d1..8269f76 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -271,7 +271,7 @@
     // The loaded spaces. Secondary images may fail to load, in which case we need to remove
     // already added spaces.
     std::vector<space::Space*> added_image_spaces;
-
+    uint8_t* const original_requested_alloc_space_begin = requested_alloc_space_begin;
     for (size_t index = 0; index < image_file_names.size(); ++index) {
       std::string& image_name = image_file_names[index];
       ATRACE_BEGIN("ImageSpace::Create");
@@ -317,7 +317,10 @@
         // Remove already loaded spaces.
         for (space::Space* loaded_space : added_image_spaces) {
           RemoveSpace(loaded_space);
+          delete loaded_space;
         }
+        boot_image_spaces_.clear();
+        requested_alloc_space_begin = original_requested_alloc_space_begin;
         break;
       }
     }
diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc
index 8356814..e172f85 100644
--- a/runtime/gc/reference_processor.cc
+++ b/runtime/gc/reference_processor.cc
@@ -93,7 +93,7 @@
       // in the heap causing corruption since this field would get swept.
       if (collector_->IsMarkedHeapReference(referent_addr)) {
         if (!preserving_references_ ||
-           (LIKELY(!reference->IsFinalizerReferenceInstance()) && !reference->IsEnqueued())) {
+           (LIKELY(!reference->IsFinalizerReferenceInstance()) && reference->IsUnprocessed())) {
           return referent_addr->AsMirrorPtr();
         }
       }
@@ -275,7 +275,7 @@
   // GC queues, but since we hold the lock finalizer_reference_queue_ lock it also prevents this
   // race.
   MutexLock mu2(self, *Locks::reference_queue_finalizer_references_lock_);
-  if (!reference->IsEnqueued()) {
+  if (reference->IsUnprocessed()) {
     CHECK(reference->IsFinalizerReferenceInstance());
     reference->SetPendingNext(reference);
     return true;
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index 67dcc2d..03ab9a1 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -32,42 +32,37 @@
 void ReferenceQueue::AtomicEnqueueIfNotEnqueued(Thread* self, mirror::Reference* ref) {
   DCHECK(ref != nullptr);
   MutexLock mu(self, *lock_);
-  if (!ref->IsEnqueued()) {
-    EnqueuePendingReference(ref);
+  if (ref->IsUnprocessed()) {
+    EnqueueReference(ref);
   }
 }
 
 void ReferenceQueue::EnqueueReference(mirror::Reference* ref) {
-  CHECK(ref->IsEnqueuable());
-  EnqueuePendingReference(ref);
-}
-
-void ReferenceQueue::EnqueuePendingReference(mirror::Reference* ref) {
   DCHECK(ref != nullptr);
+  CHECK(ref->IsUnprocessed());
   if (IsEmpty()) {
     // 1 element cyclic queue, ie: Reference ref = ..; ref.pendingNext = ref;
     list_ = ref;
   } else {
     mirror::Reference* head = list_->GetPendingNext();
+    DCHECK(head != nullptr);
     ref->SetPendingNext(head);
   }
+  // Add the reference in the middle to preserve the cycle.
   list_->SetPendingNext(ref);
 }
 
 mirror::Reference* ReferenceQueue::DequeuePendingReference() {
   DCHECK(!IsEmpty());
-  mirror::Reference* head = list_->GetPendingNext();
-  DCHECK(head != nullptr);
-  mirror::Reference* ref;
+  mirror::Reference* ref = list_->GetPendingNext();
+  DCHECK(ref != nullptr);
   // Note: the following code is thread-safe because it is only called from ProcessReferences which
   // is single threaded.
-  if (list_ == head) {
-    ref = list_;
+  if (list_ == ref) {
     list_ = nullptr;
   } else {
-    mirror::Reference* next = head->GetPendingNext();
+    mirror::Reference* next = ref->GetPendingNext();
     list_->SetPendingNext(next);
-    ref = head;
   }
   ref->SetPendingNext(nullptr);
   Heap* heap = Runtime::Current()->GetHeap();
@@ -152,9 +147,7 @@
       } else {
         ref->ClearReferent<false>();
       }
-      if (ref->IsEnqueuable()) {
-        cleared_references->EnqueuePendingReference(ref);
-      }
+      cleared_references->EnqueueReference(ref);
     }
   }
 }
@@ -167,8 +160,6 @@
     if (referent_addr->AsMirrorPtr() != nullptr &&
         !collector->IsMarkedHeapReference(referent_addr)) {
       mirror::Object* forward_address = collector->MarkObject(referent_addr->AsMirrorPtr());
-      // If the referent is non-null the reference must queuable.
-      DCHECK(ref->IsEnqueuable());
       // Move the updated referent to the zombie field.
       if (Runtime::Current()->IsActiveTransaction()) {
         ref->SetZombie<true>(forward_address);
diff --git a/runtime/gc/reference_queue.h b/runtime/gc/reference_queue.h
index aabac97..04d3454 100644
--- a/runtime/gc/reference_queue.h
+++ b/runtime/gc/reference_queue.h
@@ -44,27 +44,24 @@
 class Heap;
 
 // Used to temporarily store java.lang.ref.Reference(s) during GC and prior to queueing on the
-// appropriate java.lang.ref.ReferenceQueue. The linked list is maintained in the
-// java.lang.ref.Reference objects.
+// appropriate java.lang.ref.ReferenceQueue. The linked list is maintained as an unordered,
+// circular, and singly-linked list using the pendingNext fields of the java.lang.ref.Reference
+// objects.
 class ReferenceQueue {
  public:
   explicit ReferenceQueue(Mutex* lock);
 
-  // Enqueue a reference if is not already enqueued. Thread safe to call from multiple threads
-  // since it uses a lock to avoid a race between checking for the references presence and adding
-  // it.
+  // Enqueue a reference if it is unprocessed. Thread safe to call from multiple
+  // threads since it uses a lock to avoid a race between checking for the references presence and
+  // adding it.
   void AtomicEnqueueIfNotEnqueued(Thread* self, mirror::Reference* ref)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!*lock_);
 
-  // Enqueue a reference, unlike EnqueuePendingReference, enqueue reference checks that the
-  // reference IsEnqueueable. Not thread safe, used when mutators are paused to minimize lock
-  // overhead.
+  // Enqueue a reference. The reference must be unprocessed.
+  // Not thread safe, used when mutators are paused to minimize lock overhead.
   void EnqueueReference(mirror::Reference* ref) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Enqueue a reference without checking that it is enqueable.
-  void EnqueuePendingReference(mirror::Reference* ref) SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Dequeue the first reference (returns list_).
+  // Dequeue a reference from the queue and return that dequeued reference.
   mirror::Reference* DequeuePendingReference() SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Enqueues finalizer references with white referents.  White referents are blackened, moved to
diff --git a/runtime/gc/reference_queue_test.cc b/runtime/gc/reference_queue_test.cc
index dc23afe..35bf718 100644
--- a/runtime/gc/reference_queue_test.cc
+++ b/runtime/gc/reference_queue_test.cc
@@ -41,19 +41,22 @@
   ASSERT_TRUE(ref1.Get() != nullptr);
   auto ref2(hs.NewHandle(ref_class->AllocObject(self)->AsReference()));
   ASSERT_TRUE(ref2.Get() != nullptr);
-  // FIFO ordering.
-  queue.EnqueuePendingReference(ref1.Get());
+  queue.EnqueueReference(ref1.Get());
   ASSERT_TRUE(!queue.IsEmpty());
   ASSERT_EQ(queue.GetLength(), 1U);
-  queue.EnqueuePendingReference(ref2.Get());
+  queue.EnqueueReference(ref2.Get());
   ASSERT_TRUE(!queue.IsEmpty());
   ASSERT_EQ(queue.GetLength(), 2U);
-  ASSERT_EQ(queue.DequeuePendingReference(), ref2.Get());
+
+  std::set<mirror::Reference*> refs = {ref1.Get(), ref2.Get()};
+  std::set<mirror::Reference*> dequeued;
+  dequeued.insert(queue.DequeuePendingReference());
   ASSERT_TRUE(!queue.IsEmpty());
   ASSERT_EQ(queue.GetLength(), 1U);
-  ASSERT_EQ(queue.DequeuePendingReference(), ref1.Get());
+  dequeued.insert(queue.DequeuePendingReference());
   ASSERT_EQ(queue.GetLength(), 0U);
   ASSERT_TRUE(queue.IsEmpty());
+  ASSERT_EQ(refs, dequeued);
 }
 
 TEST_F(ReferenceQueueTest, Dump) {
@@ -75,9 +78,9 @@
   ASSERT_TRUE(ref1.Get() != nullptr);
   auto ref2(hs.NewHandle(finalizer_ref_class->AllocObject(self)->AsReference()));
   ASSERT_TRUE(ref2.Get() != nullptr);
-  queue.EnqueuePendingReference(ref1.Get());
+  queue.EnqueueReference(ref1.Get());
   queue.Dump(LOG(INFO));
-  queue.EnqueuePendingReference(ref2.Get());
+  queue.EnqueueReference(ref2.Get());
   queue.Dump(LOG(INFO));
 }
 
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 998db52..9269339 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -867,20 +867,20 @@
     if (obj->IsClass<kVerifyNone, kWithoutReadBarrier>()) {
       mirror::Class* klass = obj->AsClass<kVerifyNone, kWithoutReadBarrier>();
       FixupObjectAdapter visitor(boot_image_, boot_oat_, app_image_, app_oat_);
-      klass->FixupNativePointers(klass, sizeof(void*), visitor);
+      klass->FixupNativePointers<kVerifyNone, kWithoutReadBarrier>(klass, sizeof(void*), visitor);
       // Deal with the arrays.
       mirror::PointerArray* vtable = klass->GetVTable<kVerifyNone, kWithoutReadBarrier>();
       if (vtable != nullptr) {
-        vtable->Fixup(vtable, sizeof(void*), visitor);
+        vtable->Fixup<kVerifyNone, kWithoutReadBarrier>(vtable, sizeof(void*), visitor);
       }
       mirror::IfTable* iftable = klass->GetIfTable<kVerifyNone, kWithoutReadBarrier>();
       if (iftable != nullptr) {
-        for (int32_t i = 0; i < klass->GetIfTableCount(); ++i) {
-          if (iftable->GetMethodArrayCount(i) > 0) {
+        for (int32_t i = 0, count = iftable->Count(); i < count; ++i) {
+          if (iftable->GetMethodArrayCount<kVerifyNone, kWithoutReadBarrier>(i) > 0) {
             mirror::PointerArray* methods =
                 iftable->GetMethodArray<kVerifyNone, kWithoutReadBarrier>(i);
             DCHECK(methods != nullptr);
-            methods->Fixup(methods, sizeof(void*), visitor);
+            methods->Fixup<kVerifyNone, kWithoutReadBarrier>(methods, sizeof(void*), visitor);
           }
         }
       }
@@ -925,7 +925,7 @@
     if (fixup_heap_objects_) {
       method->UpdateObjectsForImageRelocation(ForwardObjectAdapter(this));
     }
-    method->UpdateEntrypoints(ForwardCodeAdapter(this));
+    method->UpdateEntrypoints<kWithoutReadBarrier>(ForwardCodeAdapter(this));
   }
 
  private:
@@ -1014,6 +1014,7 @@
     // Nothing to fix up.
     return true;
   }
+  ScopedDebugDisallowReadBarriers sddrb(Thread::Current());
   // Need to update the image to be at the target base.
   const ImageSection& objects_section = image_header.GetImageSection(ImageHeader::kSectionObjects);
   uintptr_t objects_begin = reinterpret_cast<uintptr_t>(target_base + objects_section.Offset());
@@ -1039,7 +1040,7 @@
     CHECK_EQ(image_header.GetImageBegin(), target_base);
     // Fix up dex cache DexFile pointers.
     auto* dex_caches = image_header.GetImageRoot<kWithoutReadBarrier>(ImageHeader::kDexCaches)->
-        AsObjectArray<mirror::DexCache>();
+        AsObjectArray<mirror::DexCache, kVerifyNone, kWithoutReadBarrier>();
     for (int32_t i = 0, count = dex_caches->GetLength(); i < count; ++i) {
       mirror::DexCache* dex_cache = dex_caches->Get<kVerifyNone, kWithoutReadBarrier>(i);
       // Fix up dex cache pointers.
@@ -1047,7 +1048,7 @@
       if (strings != nullptr) {
         GcRoot<mirror::String>* new_strings = fixup_adapter.ForwardObject(strings);
         if (strings != new_strings) {
-          dex_cache->SetFieldPtr64<false>(mirror::DexCache::StringsOffset(), new_strings);
+          dex_cache->SetStrings(new_strings);
         }
         dex_cache->FixupStrings<kWithoutReadBarrier>(new_strings, fixup_adapter);
       }
@@ -1055,7 +1056,7 @@
       if (types != nullptr) {
         GcRoot<mirror::Class>* new_types = fixup_adapter.ForwardObject(types);
         if (types != new_types) {
-          dex_cache->SetFieldPtr64<false>(mirror::DexCache::ResolvedTypesOffset(), new_types);
+          dex_cache->SetResolvedTypes(new_types);
         }
         dex_cache->FixupResolvedTypes<kWithoutReadBarrier>(new_types, fixup_adapter);
       }
@@ -1063,7 +1064,7 @@
       if (methods != nullptr) {
         ArtMethod** new_methods = fixup_adapter.ForwardObject(methods);
         if (methods != new_methods) {
-          dex_cache->SetFieldPtr64<false>(mirror::DexCache::ResolvedMethodsOffset(), new_methods);
+          dex_cache->SetResolvedMethods(new_methods);
         }
         for (size_t j = 0, num = dex_cache->NumResolvedMethods(); j != num; ++j) {
           ArtMethod* orig = mirror::DexCache::GetElementPtrSize(new_methods, j, sizeof(void*));
@@ -1077,7 +1078,7 @@
       if (fields != nullptr) {
         ArtField** new_fields = fixup_adapter.ForwardObject(fields);
         if (fields != new_fields) {
-          dex_cache->SetFieldPtr64<false>(mirror::DexCache::ResolvedFieldsOffset(), new_fields);
+          dex_cache->SetResolvedFields(new_fields);
         }
         for (size_t j = 0, num = dex_cache->NumResolvedFields(); j != num; ++j) {
           ArtField* orig = mirror::DexCache::GetElementPtrSize(new_fields, j, sizeof(void*));
diff --git a/runtime/image.cc b/runtime/image.cc
index de00343..1f54e3e 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -24,7 +24,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '2', '6', '\0' };
+const uint8_t ImageHeader::kImageVersion[] = { '0', '2', '7', '\0' };
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index c57b1bb..7484635 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -55,7 +55,7 @@
   explicit InstallStubsClassVisitor(Instrumentation* instrumentation)
       : instrumentation_(instrumentation) {}
 
-  bool Visit(mirror::Class* klass) OVERRIDE REQUIRES(Locks::mutator_lock_) {
+  bool operator()(mirror::Class* klass) OVERRIDE REQUIRES(Locks::mutator_lock_) {
     instrumentation_->InstallStubsForClass(klass);
     return true;  // we visit all classes.
   }
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index e93bbdb..0b2471b 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -240,7 +240,7 @@
 }
 
 #if !defined(__clang__)
-#if (defined(__arm__) || defined(__i386__))
+#if (defined(__arm__) || defined(__i386__) || defined(__aarch64__))
 // TODO: remove when all targets implemented.
 static constexpr InterpreterImplKind kInterpreterImplKind = kMterpImplKind;
 #else
@@ -248,7 +248,7 @@
 #endif
 #else
 // Clang 3.4 fails to build the goto interpreter implementation.
-#if (defined(__arm__) || defined(__i386__))
+#if (defined(__arm__) || defined(__i386__) || defined(__aarch64__))
 static constexpr InterpreterImplKind kInterpreterImplKind = kMterpImplKind;
 #else
 static constexpr InterpreterImplKind kInterpreterImplKind = kSwitchImplKind;
@@ -311,7 +311,7 @@
   shadow_frame.GetMethod()->GetDeclaringClass()->AssertInitializedOrInitializingInThread(self);
 
   bool transaction_active = Runtime::Current()->IsActiveTransaction();
-  if (LIKELY(shadow_frame.GetMethod()->IsPreverified())) {
+  if (LIKELY(shadow_frame.GetMethod()->SkipAccessChecks())) {
     // Enter the "without access check" interpreter.
     if (kInterpreterImplKind == kMterpImplKind) {
       if (transaction_active) {
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 6c9cc70..09d8601 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -637,17 +637,25 @@
         self, new_shadow_frame, StackedShadowFrameType::kShadowFrameUnderConstruction);
     self->EndAssertNoThreadSuspension(old_cause);
 
+    // ArtMethod here is needed to check type information of the call site against the callee.
+    // Type information is retrieved from a DexFile/DexCache for that respective declared method.
+    //
+    // As a special case for proxy methods, which are not dex-backed,
+    // we have to retrieve type information from the proxy's method
+    // interface method instead (which is dex backed since proxies are never interfaces).
+    ArtMethod* method = new_shadow_frame->GetMethod()->GetInterfaceMethodIfProxy(sizeof(void*));
+
     // We need to do runtime check on reference assignment. We need to load the shorty
     // to get the exact type of each reference argument.
-    const DexFile::TypeList* params = new_shadow_frame->GetMethod()->GetParameterTypeList();
+    const DexFile::TypeList* params = method->GetParameterTypeList();
     uint32_t shorty_len = 0;
-    const char* shorty = new_shadow_frame->GetMethod()->GetShorty(&shorty_len);
+    const char* shorty = method->GetShorty(&shorty_len);
 
     // Handle receiver apart since it's not part of the shorty.
     size_t dest_reg = first_dest_reg;
     size_t arg_offset = 0;
 
-    if (!new_shadow_frame->GetMethod()->IsStatic()) {
+    if (!method->IsStatic()) {
       size_t receiver_reg = is_range ? vregC : arg[0];
       new_shadow_frame->SetVRegReference(dest_reg, shadow_frame.GetVRegReference(receiver_reg));
       ++dest_reg;
@@ -667,7 +675,7 @@
           if (do_assignability_check && o != nullptr) {
             size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
             Class* arg_type =
-                new_shadow_frame->GetMethod()->GetClassFromTypeIndex(
+                method->GetClassFromTypeIndex(
                     params->GetTypeItem(shorty_pos).type_idx_, true /* resolve */, pointer_size);
             if (arg_type == nullptr) {
               CHECK(self->IsExceptionPending());
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 940d344..ca8598e 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -21,6 +21,7 @@
 #include "base/stl_util.h"  // MakeUnique
 #include "experimental_flags.h"
 #include "interpreter_common.h"
+#include "jit/jit.h"
 #include "safe_math.h"
 
 #include <memory>  // std::unique_ptr
@@ -63,10 +64,15 @@
   currentHandlersTable = handlersTable[ \
       Runtime::Current()->GetInstrumentation()->GetInterpreterHandlerTable()]
 
-#define BRANCH_INSTRUMENTATION(offset) \
-  do { \
+#define BRANCH_INSTRUMENTATION(offset)                                                            \
+  do {                                                                                            \
+    ArtMethod* method = shadow_frame.GetMethod();                                                 \
     instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation(); \
-    instrumentation->Branch(self, shadow_frame.GetMethod(), dex_pc, offset); \
+    instrumentation->Branch(self, method, dex_pc, offset);                                        \
+    JValue result;                                                                                \
+    if (jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, &result)) {             \
+      return result;                                                                              \
+    }                                                                                             \
   } while (false)
 
 #define UNREACHABLE_CODE_CHECK()                \
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index f606978..25dbab2 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -17,6 +17,7 @@
 #include "base/stl_util.h"  // MakeUnique
 #include "experimental_flags.h"
 #include "interpreter_common.h"
+#include "jit/jit.h"
 #include "safe_math.h"
 
 #include <memory>  // std::unique_ptr
@@ -69,9 +70,14 @@
     }                                                                                           \
   } while (false)
 
-#define BRANCH_INSTRUMENTATION(offset) \
-  do { \
-    instrumentation->Branch(self, shadow_frame.GetMethod(), dex_pc, offset); \
+#define BRANCH_INSTRUMENTATION(offset)                                                         \
+  do {                                                                                         \
+    ArtMethod* method = shadow_frame.GetMethod();                                              \
+    instrumentation->Branch(self, method, dex_pc, offset);                                     \
+    JValue result;                                                                             \
+    if (jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, &result)) {          \
+      return result;                                                                           \
+    }                                                                                          \
   } while (false)
 
 static bool IsExperimentalInstructionEnabled(const Instruction *inst) {
diff --git a/runtime/interpreter/mterp/arm64/alt_stub.S b/runtime/interpreter/mterp/arm64/alt_stub.S
new file mode 100644
index 0000000..9b8b16d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/alt_stub.S
@@ -0,0 +1,12 @@
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (${opnum} * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
diff --git a/runtime/interpreter/mterp/arm64/bincmp.S b/runtime/interpreter/mterp/arm64/bincmp.S
new file mode 100644
index 0000000..ecab2ce
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/bincmp.S
@@ -0,0 +1,37 @@
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+#if MTERP_SUSPEND
+    mov     w1, wINST, lsr #12          // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    cmp     w2, w3                      // compare (vA, vB)
+    mov${condition} w1, #2                 // w1<- BYTE branch dist for not-taken
+    adds    w2, w1, w1                  // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     w0, #2                      // Offset if branch not taken
+    cmp     w2, w3                      // compare (vA, vB)
+    csel    w1, w1, w0, ${condition}    // Branch if true
+    adds    w2, w1, w1                  // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
diff --git a/runtime/interpreter/mterp/arm64/binop.S b/runtime/interpreter/mterp/arm64/binop.S
new file mode 100644
index 0000000..b629b0b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/binop.S
@@ -0,0 +1,33 @@
+%default {"preinstr":"", "result":"w0", "chkzero":"0"}
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if $chkzero
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    $preinstr                           // optional op; may set condition codes
+    $instr                              // $result<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG $result, w9                // vAA<- $result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
diff --git a/runtime/interpreter/mterp/arm64/binop2addr.S b/runtime/interpreter/mterp/arm64/binop2addr.S
new file mode 100644
index 0000000..a480a7d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/binop2addr.S
@@ -0,0 +1,30 @@
+%default {"preinstr":"", "result":"w0", "chkzero":"0"}
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if $chkzero
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    $preinstr                           // optional op; may set condition codes
+    $instr                              // $result<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG $result, w9                // vAA<- $result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
diff --git a/runtime/interpreter/mterp/arm64/binopLit16.S b/runtime/interpreter/mterp/arm64/binopLit16.S
new file mode 100644
index 0000000..4f9d205
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/binopLit16.S
@@ -0,0 +1,28 @@
+%default {"preinstr":"", "result":"w0", "chkzero":"0"}
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S w1, 1                       // w1<- ssssCCCC (sign-extended)
+    lsr     w2, wINST, #12              // w2<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w0, w2                     // w0<- vB
+    .if $chkzero
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    $preinstr
+    $instr                              // $result<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG $result, w9                // vAA<- $result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
diff --git a/runtime/interpreter/mterp/arm64/binopLit8.S b/runtime/interpreter/mterp/arm64/binopLit8.S
new file mode 100644
index 0000000..326c657
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/binopLit8.S
@@ -0,0 +1,30 @@
+%default {"preinstr":"", "result":"w0", "chkzero":"0"}
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    .if $chkzero
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    $preinstr                           // optional op; may set condition codes
+    $instr                              // $result<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG $result, w9                // vAA<- $result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
diff --git a/runtime/interpreter/mterp/arm64/binopWide.S b/runtime/interpreter/mterp/arm64/binopWide.S
new file mode 100644
index 0000000..9de24f1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/binopWide.S
@@ -0,0 +1,30 @@
+%default {"preinstr":"", "instr":"add x0, x1, x2", "result":"x0", "r1":"x1", "r2":"x2", "chkzero":"0"}
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE $r2, w2               // w2<- vCC
+    GET_VREG_WIDE $r1, w1               // w1<- vBB
+    .if $chkzero
+    cbz     $r2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    $preinstr
+    $instr                              // $result<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE $result, w4           // vAA<- $result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
diff --git a/runtime/interpreter/mterp/arm64/binopWide2addr.S b/runtime/interpreter/mterp/arm64/binopWide2addr.S
new file mode 100644
index 0000000..d9927a2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/binopWide2addr.S
@@ -0,0 +1,29 @@
+%default {"preinstr":"", "instr":"add x0, x0, x1", "r0":"x0", "r1":"x1", "chkzero":"0"}
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE $r1, w1               // x1<- vB
+    GET_VREG_WIDE $r0, w2               // x0<- vA
+    .if $chkzero
+    cbz     $r1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    $preinstr
+    $instr                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE $r0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
diff --git a/runtime/interpreter/mterp/arm64/entry.S b/runtime/interpreter/mterp/arm64/entry.S
new file mode 100644
index 0000000..f9073ab
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/entry.S
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+    .text
+
+/*
+ * Interpreter entry point.
+ * On entry:
+ *  x0  Thread* self/
+ *  x1  code_item
+ *  x2  ShadowFrame
+ *  x3  JValue* result_register
+ *
+ */
+    .global ExecuteMterpImpl
+    .type   ExecuteMterpImpl, %function
+    .balign 16
+
+ExecuteMterpImpl:
+    .cfi_startproc
+    stp     xIBASE, xREFS, [sp, #-64]!
+    stp     xSELF, xINST, [sp, #16]
+    stp     xPC, xFP, [sp, #32]
+    stp     fp, lr, [sp, #48]
+    add     fp, sp, #48
+
+    /* Remember the return register */
+    str     x3, [x2, #SHADOWFRAME_RESULT_REGISTER_OFFSET]
+
+    /* Remember the code_item */
+    str     x1, [x2, #SHADOWFRAME_CODE_ITEM_OFFSET]
+
+    /* set up "named" registers */
+    mov     xSELF, x0
+    ldr     w0, [x2, #SHADOWFRAME_NUMBER_OF_VREGS_OFFSET]
+    add     xFP, x2, #SHADOWFRAME_VREGS_OFFSET     // point to insns[] (i.e. - the dalivk byte code).
+    add     xREFS, xFP, w0, lsl #2                 // point to reference array in shadow frame
+    ldr     w0, [x2, #SHADOWFRAME_DEX_PC_OFFSET]   // Get starting dex_pc.
+    add     xPC, x1, #CODEITEM_INSNS_OFFSET        // Point to base of insns[]
+    add     xPC, xPC, w0, lsl #1                   // Create direct pointer to 1st dex opcode
+    EXPORT_PC
+
+    /* Starting ibase */
+    ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
+
+    /* start executing the instruction at rPC */
+    FETCH_INST                          // load wINST from rPC
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* NOTE: no fallthrough */
diff --git a/runtime/interpreter/mterp/arm64/fallback.S b/runtime/interpreter/mterp/arm64/fallback.S
new file mode 100644
index 0000000..44e7e12
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/fallback.S
@@ -0,0 +1,3 @@
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
+
diff --git a/runtime/interpreter/mterp/arm64/fbinop.S b/runtime/interpreter/mterp/arm64/fbinop.S
new file mode 100644
index 0000000..926d078
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/fbinop.S
@@ -0,0 +1,19 @@
+%default {}
+    /*:
+     * Generic 32-bit floating-point operation.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     * form: <op> s0, s0, s1
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH w0, 1                         // r0<- CCBB
+    lsr     w1, w0, #8                  // r2<- CC
+    and     w0, w0, #255                // r1<- BB
+    GET_VREG  s1, w1
+    GET_VREG  s0, w0
+    $instr                              // s0<- op
+    lsr     w1, wINST, #8               // r1<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG  s0, w1
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/fbinop2addr.S b/runtime/interpreter/mterp/arm64/fbinop2addr.S
new file mode 100644
index 0000000..0d57cbf
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/fbinop2addr.S
@@ -0,0 +1,18 @@
+    /*
+     * Generic 32-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "s2 = s0 op s1".
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w9, wINST, #8               // w9<- A+
+    and     w9, w9, #15                 // w9<- A
+    GET_VREG s1, w3
+    GET_VREG s0, w9
+    $instr                              // s2<- op
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG s2, w9
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/fcmp.S b/runtime/interpreter/mterp/arm64/fcmp.S
new file mode 100644
index 0000000..a45e789
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/fcmp.S
@@ -0,0 +1,20 @@
+%default {"wide":"", "r1":"s1", "r2":"s2", "default_val":"-1","cond":"le"}
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG$wide $r1, w2
+    GET_VREG$wide $r2, w3
+    mov     w0, #$default_val
+    fcmp $r1, $r2
+    csneg w0, w0, w0, $cond
+    csel w0, wzr, w0, eq
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w4                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/footer.S b/runtime/interpreter/mterp/arm64/footer.S
new file mode 100644
index 0000000..b360539
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/footer.S
@@ -0,0 +1,170 @@
+/*
+ * ===========================================================================
+ *  Common subroutines and data
+ * ===========================================================================
+ */
+
+
+/*
+ * We've detected a condition that will result in an exception, but the exception
+ * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
+ * TUNING: for consistency, we may want to just go ahead and handle these here.
+ */
+#define MTERP_LOGGING 0
+common_errDivideByZero:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogDivideByZeroException
+#endif
+    b MterpCommonFallback
+
+common_errArrayIndex:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogArrayIndexException
+#endif
+    b MterpCommonFallback
+
+common_errNegativeArraySize:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNegativeArraySizeException
+#endif
+    b MterpCommonFallback
+
+common_errNoSuchMethod:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNoSuchMethodException
+#endif
+    b MterpCommonFallback
+
+common_errNullObject:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNullObjectException
+#endif
+    b MterpCommonFallback
+
+common_exceptionThrown:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogExceptionThrownException
+#endif
+    b MterpCommonFallback
+
+MterpSuspendFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    ldr  x2, [xSELF, #THREAD_FLAGS_OFFSET]
+    bl MterpLogSuspendFallback
+#endif
+    b MterpCommonFallback
+
+/*
+ * If we're here, something is out of the ordinary.  If there is a pending
+ * exception, handle it.  Otherwise, roll back and retry with the reference
+ * interpreter.
+ */
+MterpPossibleException:
+    ldr     x0, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    cbz     x0, MterpFallback                       // If not, fall back to reference interpreter.
+    /* intentional fallthrough - handle pending exception. */
+/*
+ * On return from a runtime helper routine, we've found a pending exception.
+ * Can we handle it here - or need to bail out to caller?
+ *
+ */
+MterpException:
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    bl      MterpHandleException                    // (self, shadow_frame)
+    cbz     w0, MterpExceptionReturn                // no local catch, back to caller.
+    ldr     x0, [xFP, #OFF_FP_CODE_ITEM]
+    ldr     w1, [xFP, #OFF_FP_DEX_PC]
+    ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
+    add     xPC, x0, #CODEITEM_INSNS_OFFSET
+    add     xPC, xPC, x1, lsl #1                    // generate new dex_pc_ptr
+    str     xPC, [xFP, #OFF_FP_DEX_PC_PTR]
+    /* resume execution at catch block */
+    FETCH_INST
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+    /* NOTE: no fallthrough */
+
+/*
+ * Check for suspend check request.  Assumes wINST already loaded, xPC advanced and
+ * still needs to get the opcode and branch to it, and flags are in lr.
+ */
+MterpCheckSuspendAndContinue:
+    ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh xIBASE
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    check1
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+check1:
+    EXPORT_PC
+    mov     x0, xSELF
+    bl      MterpSuspendCheck           // (self)
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/*
+ * Bail out to reference interpreter.
+ */
+MterpFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogFallback
+#endif
+MterpCommonFallback:
+    mov     x0, #0                                  // signal retry with reference interpreter.
+    b       MterpDone
+
+/*
+ * We pushed some registers on the stack in ExecuteMterpImpl, then saved
+ * SP and LR.  Here we restore SP, restore the registers, and then restore
+ * LR to PC.
+ *
+ * On entry:
+ *  uint32_t* xFP  (should still be live, pointer to base of vregs)
+ */
+MterpExceptionReturn:
+    mov     x0, #1                                  // signal return to caller.
+    b MterpDone
+MterpReturn:
+    ldr     x2, [xFP, #OFF_FP_RESULT_REGISTER]
+    ldr     lr, [xSELF, #THREAD_FLAGS_OFFSET]
+    str     x0, [x2]
+    mov     x0, xSELF
+    ands    lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.eq    check2
+    bl      MterpSuspendCheck                       // (self)
+check2:
+    mov     x0, #1                                  // signal return to caller.
+MterpDone:
+    ldp     fp, lr, [sp, #48]
+    ldp     xPC, xFP, [sp, #32]
+    ldp     xSELF, xINST, [sp, #16]
+    ldp     xIBASE, xREFS, [sp], #64
+    ret
+
+    .cfi_endproc
+    .size   ExecuteMterpImpl, .-ExecuteMterpImpl
+
diff --git a/runtime/interpreter/mterp/arm64/funopNarrow.S b/runtime/interpreter/mterp/arm64/funopNarrow.S
new file mode 100644
index 0000000..9f5ad1e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/funopNarrow.S
@@ -0,0 +1,18 @@
+%default {"srcreg":"s0", "tgtreg":"d0"}
+    /*
+     * Generic 32bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "$tgtreg = op $srcreg".
+     *
+     * For: int-to-float, float-to-int
+     * TODO: refactor all of the conversions - parameterize width and use same template.
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w4, wINST, #8               // w4<- A+
+    GET_VREG $srcreg, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    and     w4, w4, #15                 // w4<- A
+    $instr                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG $tgtreg, w4                // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/funopNarrower.S b/runtime/interpreter/mterp/arm64/funopNarrower.S
new file mode 100644
index 0000000..411396b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/funopNarrower.S
@@ -0,0 +1,17 @@
+%default {"srcreg":"s0", "tgtreg":"d0"}
+    /*
+     * Generic 64bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "$tgtreg = op $srcreg".
+     *
+     * For: int-to-double, float-to-double, float-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w4, wINST, #8               // w4<- A+
+    GET_VREG_WIDE $srcreg, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    and     w4, w4, #15                 // w4<- A
+    $instr                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG $tgtreg, w4                // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/funopWide.S b/runtime/interpreter/mterp/arm64/funopWide.S
new file mode 100644
index 0000000..d83b39c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/funopWide.S
@@ -0,0 +1,17 @@
+%default {"srcreg":"s0", "tgtreg":"d0"}
+    /*
+     * Generic 64bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "$tgtreg = op $srcreg".
+     *
+     * For: long-to-double, double-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w4, wINST, #8               // w4<- A+
+    GET_VREG_WIDE $srcreg, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    and     w4, w4, #15                 // w4<- A
+    $instr                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE $tgtreg, w4           // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/funopWider.S b/runtime/interpreter/mterp/arm64/funopWider.S
new file mode 100644
index 0000000..50a73f1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/funopWider.S
@@ -0,0 +1,17 @@
+%default {"srcreg":"s0", "tgtreg":"d0"}
+    /*
+     * Generic 32bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "$tgtreg = op $srcreg".
+     *
+     * For: int-to-double, float-to-double, float-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w4, wINST, #8               // w4<- A+
+    GET_VREG $srcreg, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    and     w4, w4, #15                 // w4<- A
+    $instr                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE $tgtreg, w4           // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/header.S b/runtime/interpreter/mterp/arm64/header.S
new file mode 100644
index 0000000..351a607
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/header.S
@@ -0,0 +1,288 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+  Art assembly interpreter notes:
+
+  First validate assembly code by implementing ExecuteXXXImpl() style body (doesn't
+  handle invoke, allows higher-level code to create frame & shadow frame.
+
+  Once that's working, support direct entry code & eliminate shadow frame (and
+  excess locals allocation.
+
+  Some (hopefully) temporary ugliness.  We'll treat xFP as pointing to the
+  base of the vreg array within the shadow frame.  Access the other fields,
+  dex_pc_, method_ and number_of_vregs_ via negative offsets.  For now, we'll continue
+  the shadow frame mechanism of double-storing object references - via xFP &
+  number_of_vregs_.
+
+ */
+
+/*
+ARM64 Runtime register usage conventions.
+
+  r0     : w0 is 32-bit return register and x0 is 64-bit.
+  r0-r7  : Argument registers.
+  r8-r15 : Caller save registers (used as temporary registers).
+  r16-r17: Also known as ip0-ip1, respectively. Used as scratch registers by
+           the linker, by the trampolines and other stubs (the backend uses
+           these as temporary registers).
+  r18    : Caller save register (used as temporary register).
+  r19    : Pointer to thread-local storage.
+  r20-r29: Callee save registers.
+  r30    : (lr) is reserved (the link register).
+  rsp    : (sp) is reserved (the stack pointer).
+  rzr    : (zr) is reserved (the zero register).
+
+  Floating-point registers
+  v0-v31
+
+  v0     : s0 is return register for singles (32-bit) and d0 for doubles (64-bit).
+           This is analogous to the C/C++ (hard-float) calling convention.
+  v0-v7  : Floating-point argument registers in both Dalvik and C/C++ conventions.
+           Also used as temporary and codegen scratch registers.
+
+  v0-v7 and v16-v31 : trashed across C calls.
+  v8-v15 : bottom 64-bits preserved across C calls (d8-d15 are preserved).
+
+  v16-v31: Used as codegen temp/scratch.
+  v8-v15 : Can be used for promotion.
+
+  Must maintain 16-byte stack alignment.
+
+Mterp notes:
+
+The following registers have fixed assignments:
+
+  reg nick      purpose
+  x20  xPC       interpreted program counter, used for fetching instructions
+  x21  xFP       interpreted frame pointer, used for accessing locals and args
+  x22  xSELF     self (Thread) pointer
+  x23  xINST     first 16-bit code unit of current instruction
+  x24  xIBASE    interpreted instruction base pointer, used for computed goto
+  x25  xREFS     base of object references in shadow frame  (ideally, we'll get rid of this later).
+  x16  ip        scratch reg
+  x17  ip2       scratch reg (used by macros)
+
+Macros are provided for common operations.  They MUST NOT alter unspecified registers or condition
+codes.
+*/
+
+/*
+ * This is a #include, not a %include, because we want the C pre-processor
+ * to expand the macros into assembler assignment statements.
+ */
+#include "asm_support.h"
+
+/* During bringup, we'll use the shadow frame model instead of xFP */
+/* single-purpose registers, given names for clarity */
+#define xPC     x20
+#define xFP     x21
+#define xSELF   x22
+#define xINST   x23
+#define wINST   w23
+#define xIBASE  x24
+#define xREFS   x25
+#define ip      x16
+#define ip2     x17
+
+/*
+ * Instead of holding a pointer to the shadow frame, we keep xFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+
+/*
+ *
+ * The reference interpreter performs explicit suspect checks, which is somewhat wasteful.
+ * Dalvik's interpreter folded suspend checks into the jump table mechanism, and eventually
+ * mterp should do so as well.
+ */
+#define MTERP_SUSPEND 0
+
+/*
+ * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
+ * be done *before* something throws.
+ *
+ * It's okay to do this more than once.
+ *
+ * NOTE: the fast interpreter keeps track of dex pc as a direct pointer to the mapped
+ * dex byte codes.  However, the rest of the runtime expects dex pc to be an instruction
+ * offset into the code_items_[] array.  For effiency, we will "export" the
+ * current dex pc as a direct pointer using the EXPORT_PC macro, and rely on GetDexPC
+ * to convert to a dex pc when needed.
+ */
+.macro EXPORT_PC
+    str  xPC, [xFP, #OFF_FP_DEX_PC_PTR]
+.endm
+
+/*
+ * Fetch the next instruction from xPC into wINST.  Does not advance xPC.
+ */
+.macro FETCH_INST
+    ldrh    wINST, [xPC]
+.endm
+
+/*
+ * Fetch the next instruction from the specified offset.  Advances xPC
+ * to point to the next instruction.  "_count" is in 16-bit code units.
+ *
+ * Because of the limited size of immediate constants on ARM, this is only
+ * suitable for small forward movements (i.e. don't try to implement "goto"
+ * with this).
+ *
+ * This must come AFTER anything that can throw an exception, or the
+ * exception catch may miss.  (This also implies that it must come after
+ * EXPORT_PC.)
+ */
+.macro FETCH_ADVANCE_INST count
+    ldrh    wINST, [xPC, #((\count)*2)]!
+.endm
+
+/*
+ * The operation performed here is similar to FETCH_ADVANCE_INST, except the
+ * src and dest registers are parameterized (not hard-wired to xPC and xINST).
+ */
+.macro PREFETCH_ADVANCE_INST dreg, sreg, count
+    ldrh    \dreg, [\sreg, #((\count)*2)]!
+.endm
+
+/*
+ * Similar to FETCH_ADVANCE_INST, but does not update xPC.  Used to load
+ * xINST ahead of possible exception point.  Be sure to manually advance xPC
+ * later.
+ */
+.macro PREFETCH_INST count
+    ldrh    wINST, [xPC, #((\count)*2)]
+.endm
+
+/* Advance xPC by some number of code units. */
+.macro ADVANCE count
+  add  xPC, xPC, #((\count)*2)
+.endm
+
+/*
+ * Fetch the next instruction from an offset specified by _reg and advance xPC.
+ * xPC to point to the next instruction.  "_reg" must specify the distance
+ * in bytes, *not* 16-bit code units, and may be a signed value.  Must not set flags.
+ *
+ */
+.macro FETCH_ADVANCE_INST_RB reg
+    add     xPC, xPC, \reg, sxtw
+    ldrh    wINST, [xPC]
+.endm
+
+/*
+ * Fetch a half-word code unit from an offset past the current PC.  The
+ * "_count" value is in 16-bit code units.  Does not advance xPC.
+ *
+ * The "_S" variant works the same but treats the value as signed.
+ */
+.macro FETCH reg, count
+    ldrh    \reg, [xPC, #((\count)*2)]
+.endm
+
+.macro FETCH_S reg, count
+    ldrsh   \reg, [xPC, #((\count)*2)]
+.endm
+
+/*
+ * Fetch one byte from an offset past the current PC.  Pass in the same
+ * "_count" as you would for FETCH, and an additional 0/1 indicating which
+ * byte of the halfword you want (lo/hi).
+ */
+.macro FETCH_B reg, count, byte
+    ldrb     \reg, [xPC, #((\count)*2+(\byte))]
+.endm
+
+/*
+ * Put the instruction's opcode field into the specified register.
+ */
+.macro GET_INST_OPCODE reg
+    and     \reg, xINST, #255
+.endm
+
+/*
+ * Put the prefetched instruction's opcode field into the specified register.
+ */
+.macro GET_PREFETCHED_OPCODE oreg, ireg
+    and     \oreg, \ireg, #255
+.endm
+
+/*
+ * Begin executing the opcode in _reg.  Clobbers reg
+ */
+
+.macro GOTO_OPCODE reg
+    add     \reg, xIBASE, \reg, lsl #${handler_size_bits}
+    br      \reg
+.endm
+.macro GOTO_OPCODE_BASE base,reg
+    add     \reg, \base, \reg, lsl #${handler_size_bits}
+    br      \reg
+.endm
+
+/*
+ * Get/set the 32-bit value from a Dalvik register.
+ */
+.macro GET_VREG reg, vreg
+    ldr     \reg, [xFP, \vreg, uxtw #2]
+.endm
+.macro SET_VREG reg, vreg
+    str     \reg, [xFP, \vreg, uxtw #2]
+    str     wzr, [xREFS, \vreg, uxtw #2]
+.endm
+.macro SET_VREG_OBJECT reg, vreg, tmpreg
+    str     \reg, [xFP, \vreg, uxtw #2]
+    str     \reg, [xREFS, \vreg, uxtw #2]
+.endm
+
+/*
+ * Get/set the 64-bit value from a Dalvik register.
+ * TUNING: can we do better here?
+ */
+.macro GET_VREG_WIDE reg, vreg
+    add     ip2, xFP, \vreg, lsl #2
+    ldr     \reg, [ip2]
+.endm
+.macro SET_VREG_WIDE reg, vreg
+    add     ip2, xFP, \vreg, lsl #2
+    str     \reg, [ip2]
+    add     ip2, xREFS, \vreg, lsl #2
+    str     xzr, [ip2]
+.endm
+
+/*
+ * Convert a virtual register index into an address.
+ */
+.macro VREG_INDEX_TO_ADDR reg, vreg
+    add     \reg, xFP, \vreg, lsl #2   /* WARNING/FIXME: handle shadow frame vreg zero if store */
+.endm
+
+/*
+ * Refresh handler table.
+ */
+.macro REFRESH_IBASE
+  ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
+.endm
diff --git a/runtime/interpreter/mterp/arm64/invoke.S b/runtime/interpreter/mterp/arm64/invoke.S
new file mode 100644
index 0000000..ff1974c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/invoke.S
@@ -0,0 +1,19 @@
+%default { "helper":"UndefinedInvokeHandler" }
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern $helper
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    // and     x3, xINST, 0xFFFF
+    mov     x3, xINST
+    bl      $helper
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
diff --git a/runtime/interpreter/mterp/arm64/op_add_double.S b/runtime/interpreter/mterp/arm64/op_add_double.S
new file mode 100644
index 0000000..8509f70
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_add_double.S
@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"fadd d0, d1, d2", "result":"d0", "r1":"d1", "r2":"d2"}
diff --git a/runtime/interpreter/mterp/arm64/op_add_double_2addr.S b/runtime/interpreter/mterp/arm64/op_add_double_2addr.S
new file mode 100644
index 0000000..61fd58f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_add_double_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"fadd     d0, d0, d1", "r0":"d0", "r1":"d1"}
diff --git a/runtime/interpreter/mterp/arm64/op_add_float.S b/runtime/interpreter/mterp/arm64/op_add_float.S
new file mode 100644
index 0000000..7d09fef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_add_float.S
@@ -0,0 +1 @@
+%include "arm64/fbinop.S" {"instr":"fadd   s0, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm64/op_add_float_2addr.S b/runtime/interpreter/mterp/arm64/op_add_float_2addr.S
new file mode 100644
index 0000000..7b378e2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_add_float_2addr.S
@@ -0,0 +1 @@
+%include "arm64/fbinop2addr.S" {"instr":"fadd   s2, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm64/op_add_int.S b/runtime/interpreter/mterp/arm64/op_add_int.S
new file mode 100644
index 0000000..6eadb54
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_add_int.S
@@ -0,0 +1 @@
+%include "arm64/binop.S" {"instr":"add     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_add_int_2addr.S b/runtime/interpreter/mterp/arm64/op_add_int_2addr.S
new file mode 100644
index 0000000..d35bc8e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_add_int_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binop2addr.S" {"instr":"add     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_add_int_lit16.S b/runtime/interpreter/mterp/arm64/op_add_int_lit16.S
new file mode 100644
index 0000000..4930ad7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_add_int_lit16.S
@@ -0,0 +1 @@
+%include "arm64/binopLit16.S" {"instr":"add     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_add_int_lit8.S b/runtime/interpreter/mterp/arm64/op_add_int_lit8.S
new file mode 100644
index 0000000..196ea99
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_add_int_lit8.S
@@ -0,0 +1 @@
+%include "arm64/binopLit8.S" {"instr":"add     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_add_long.S b/runtime/interpreter/mterp/arm64/op_add_long.S
new file mode 100644
index 0000000..bc334aa
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_add_long.S
@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"add x0, x1, x2"}
diff --git a/runtime/interpreter/mterp/arm64/op_add_long_2addr.S b/runtime/interpreter/mterp/arm64/op_add_long_2addr.S
new file mode 100644
index 0000000..5e5dbce
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_add_long_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"add     x0, x0, x1"}
diff --git a/runtime/interpreter/mterp/arm64/op_aget.S b/runtime/interpreter/mterp/arm64/op_aget.S
new file mode 100644
index 0000000..662c9cc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aget.S
@@ -0,0 +1,28 @@
+%default { "load":"ldr", "shift":"2", "data_offset":"MIRROR_INT_ARRAY_DATA_OFFSET" }
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     x0, common_errNullObject    // bail if null array object.
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- arrayObj->length
+    add     x0, x0, w1, uxtw #$shift    // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    $load   w2, [x0, #$data_offset]     // w2<- vBB[vCC]
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w2, w9                     // vAA<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_aget_boolean.S b/runtime/interpreter/mterp/arm64/op_aget_boolean.S
new file mode 100644
index 0000000..6ab6cc1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aget_boolean.S
@@ -0,0 +1 @@
+%include "arm64/op_aget.S" { "load":"ldrb", "shift":"0", "data_offset":"MIRROR_BOOLEAN_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm64/op_aget_byte.S b/runtime/interpreter/mterp/arm64/op_aget_byte.S
new file mode 100644
index 0000000..c7f5b23
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aget_byte.S
@@ -0,0 +1 @@
+%include "arm64/op_aget.S" { "load":"ldrsb", "shift":"0", "data_offset":"MIRROR_BYTE_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm64/op_aget_char.S b/runtime/interpreter/mterp/arm64/op_aget_char.S
new file mode 100644
index 0000000..9fddf17
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aget_char.S
@@ -0,0 +1 @@
+%include "arm64/op_aget.S" { "load":"ldrh", "shift":"1", "data_offset":"MIRROR_CHAR_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm64/op_aget_object.S b/runtime/interpreter/mterp/arm64/op_aget_object.S
new file mode 100644
index 0000000..1bbe3e8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aget_object.S
@@ -0,0 +1,20 @@
+    /*
+     * Array object get.  vAA <- vBB[vCC].
+     *
+     * for: aget-object
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    FETCH_B w3, 1, 1                    // w3<- CC
+    EXPORT_PC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    bl       artAGetObjectFromMterp     // (array, index)
+    ldr      x1, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr      w2, wINST, #8               // w9<- AA
+    PREFETCH_INST 2
+    cbnz     w1, MterpException
+    SET_VREG_OBJECT w0, w2
+    ADVANCE 2
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_aget_short.S b/runtime/interpreter/mterp/arm64/op_aget_short.S
new file mode 100644
index 0000000..39554de
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aget_short.S
@@ -0,0 +1 @@
+%include "arm64/op_aget.S" { "load":"ldrsh", "shift":"1", "data_offset":"MIRROR_SHORT_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm64/op_aget_wide.S b/runtime/interpreter/mterp/arm64/op_aget_wide.S
new file mode 100644
index 0000000..6f990ba
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aget_wide.S
@@ -0,0 +1,21 @@
+    /*
+     * Array get, 64 bits.  vAA <- vBB[vCC].
+     *
+     */
+    /* aget-wide vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     w0, common_errNullObject        // yes, bail
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- arrayObj->length
+    add     x0, x0, w1, lsl #3          // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    ldr     x2, [x0, #MIRROR_WIDE_ARRAY_DATA_OFFSET]  // x2<- vBB[vCC]
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x2, w4
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_and_int.S b/runtime/interpreter/mterp/arm64/op_and_int.S
new file mode 100644
index 0000000..31f3f73
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_and_int.S
@@ -0,0 +1 @@
+%include "arm64/binop.S" {"instr":"and     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_and_int_2addr.S b/runtime/interpreter/mterp/arm64/op_and_int_2addr.S
new file mode 100644
index 0000000..e59632c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_and_int_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binop2addr.S" {"instr":"and     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_and_int_lit16.S b/runtime/interpreter/mterp/arm64/op_and_int_lit16.S
new file mode 100644
index 0000000..6540f81
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_and_int_lit16.S
@@ -0,0 +1 @@
+%include "arm64/binopLit16.S" {"instr":"and     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_and_int_lit8.S b/runtime/interpreter/mterp/arm64/op_and_int_lit8.S
new file mode 100644
index 0000000..167b40e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_and_int_lit8.S
@@ -0,0 +1 @@
+%include "arm64/binopLit8.S" {"instr":"and     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_and_long.S b/runtime/interpreter/mterp/arm64/op_and_long.S
new file mode 100644
index 0000000..ede047d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_and_long.S
@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"and x0, x1, x2"}
diff --git a/runtime/interpreter/mterp/arm64/op_and_long_2addr.S b/runtime/interpreter/mterp/arm64/op_and_long_2addr.S
new file mode 100644
index 0000000..d62ccef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_and_long_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"and     x0, x0, x1"}
diff --git a/runtime/interpreter/mterp/arm64/op_aput.S b/runtime/interpreter/mterp/arm64/op_aput.S
new file mode 100644
index 0000000..175b483
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aput.S
@@ -0,0 +1,28 @@
+%default { "store":"str", "shift":"2", "data_offset":"MIRROR_INT_ARRAY_DATA_OFFSET" }
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     w0, common_errNullObject    // bail if null
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]     // w3<- arrayObj->length
+    add     x0, x0, w1, lsl #$shift     // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_VREG w2, w9                     // w2<- vAA
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    $store  w2, [x0, #$data_offset]     // vBB[vCC]<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_aput_boolean.S b/runtime/interpreter/mterp/arm64/op_aput_boolean.S
new file mode 100644
index 0000000..5e7a86f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aput_boolean.S
@@ -0,0 +1 @@
+%include "arm64/op_aput.S" { "store":"strb", "shift":"0", "data_offset":"MIRROR_BOOLEAN_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm64/op_aput_byte.S b/runtime/interpreter/mterp/arm64/op_aput_byte.S
new file mode 100644
index 0000000..d659ebc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aput_byte.S
@@ -0,0 +1 @@
+%include "arm64/op_aput.S" { "store":"strb", "shift":"0", "data_offset":"MIRROR_BYTE_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm64/op_aput_char.S b/runtime/interpreter/mterp/arm64/op_aput_char.S
new file mode 100644
index 0000000..7547c80
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aput_char.S
@@ -0,0 +1 @@
+%include "arm64/op_aput.S" { "store":"strh", "shift":"1", "data_offset":"MIRROR_CHAR_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm64/op_aput_object.S b/runtime/interpreter/mterp/arm64/op_aput_object.S
new file mode 100644
index 0000000..0146fdc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aput_object.S
@@ -0,0 +1,13 @@
+    /*
+     * Store an object into an array.  vBB[vCC] <- vAA.
+     */
+    /* op vAA, vBB, vCC */
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     w2, wINST
+    bl      MterpAputObject
+    cbz     w0, MterpPossibleException
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_aput_short.S b/runtime/interpreter/mterp/arm64/op_aput_short.S
new file mode 100644
index 0000000..8631e28
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aput_short.S
@@ -0,0 +1 @@
+%include "arm64/op_aput.S" { "store":"strh", "shift":"1", "data_offset":"MIRROR_SHORT_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm64/op_aput_wide.S b/runtime/interpreter/mterp/arm64/op_aput_wide.S
new file mode 100644
index 0000000..e1cf9c1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aput_wide.S
@@ -0,0 +1,21 @@
+    /*
+     * Array put, 64 bits.  vBB[vCC] <- vAA.
+     *
+     */
+    /* aput-wide vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     w0, common_errNullObject    // bail if null
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- arrayObj->length
+    add     x0, x0, w1, lsl #3          // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    GET_VREG_WIDE x1, w4
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    str     x1, [x0, #MIRROR_WIDE_ARRAY_DATA_OFFSET]
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_array_length.S b/runtime/interpreter/mterp/arm64/op_array_length.S
new file mode 100644
index 0000000..0cce917
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_array_length.S
@@ -0,0 +1,12 @@
+    /*
+     * Return the length of an array.
+     */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG w0, w1                     // w0<- vB (object ref)
+    cbz     w0, common_errNullObject    // yup, fail
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- array length
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w3, w2                     // vB<- length
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_check_cast.S b/runtime/interpreter/mterp/arm64/op_check_cast.S
new file mode 100644
index 0000000..cb9f606
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_check_cast.S
@@ -0,0 +1,16 @@
+    /*
+     * Check to see if a cast from one class to another is allowed.
+     */
+    /* check-cast vAA, class//BBBB */
+    EXPORT_PC
+    FETCH    w0, 1                      // w0<- BBBB
+    lsr      w1, wINST, #8              // w1<- AA
+    VREG_INDEX_TO_ADDR x1, w1           // w1<- &object
+    ldr      x2, [xFP, #OFF_FP_METHOD]  // w2<- method
+    mov      x3, xSELF                  // w3<- self
+    bl       MterpCheckCast             // (index, &obj, method, self)
+    PREFETCH_INST 2
+    cbnz     w0, MterpPossibleException
+    ADVANCE  2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_cmp_long.S b/runtime/interpreter/mterp/arm64/op_cmp_long.S
new file mode 100644
index 0000000..982e5b1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_cmp_long.S
@@ -0,0 +1,13 @@
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG_WIDE x1, w2
+    GET_VREG_WIDE x2, w3
+    cmp     x1, x2
+    csinc   w0, wzr, wzr, eq
+    csneg   w0, w0, w0, ge
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    SET_VREG w0, w4
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_cmpg_double.S b/runtime/interpreter/mterp/arm64/op_cmpg_double.S
new file mode 100644
index 0000000..14f9ff8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_cmpg_double.S
@@ -0,0 +1 @@
+%include "arm64/fcmp.S" {"wide":"_WIDE", "r1":"d1", "r2":"d2", "default_val":"1", "cond":"pl"}
diff --git a/runtime/interpreter/mterp/arm64/op_cmpg_float.S b/runtime/interpreter/mterp/arm64/op_cmpg_float.S
new file mode 100644
index 0000000..3a20cba
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_cmpg_float.S
@@ -0,0 +1 @@
+%include "arm64/fcmp.S" {"wide":"", "r1":"s1", "r2":"s2", "default_val":"1", "cond":"pl"}
diff --git a/runtime/interpreter/mterp/arm64/op_cmpl_double.S b/runtime/interpreter/mterp/arm64/op_cmpl_double.S
new file mode 100644
index 0000000..06d5917
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_cmpl_double.S
@@ -0,0 +1 @@
+%include "arm64/fcmp.S" {"wide":"_WIDE", "r1":"d1", "r2":"d2", "default_val":"-1", "cond":"le"}
diff --git a/runtime/interpreter/mterp/arm64/op_cmpl_float.S b/runtime/interpreter/mterp/arm64/op_cmpl_float.S
new file mode 100644
index 0000000..d87d086
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_cmpl_float.S
@@ -0,0 +1 @@
+%include "arm64/fcmp.S" {"wide":"", "r1":"s1", "r2":"s2", "default_val":"-1", "cond":"le"}
diff --git a/runtime/interpreter/mterp/arm64/op_const.S b/runtime/interpreter/mterp/arm64/op_const.S
new file mode 100644
index 0000000..031ede1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const.S
@@ -0,0 +1,9 @@
+    /* const vAA, #+BBBBbbbb */
+    lsr     w3, wINST, #8               // w3<- AA
+    FETCH w0, 1                         // w0<- bbbb (low
+    FETCH w1, 2                         // w1<- BBBB (high
+    FETCH_ADVANCE_INST 3                // advance rPC, load wINST
+    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG w0, w3                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_const_16.S b/runtime/interpreter/mterp/arm64/op_const_16.S
new file mode 100644
index 0000000..27f5273
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const_16.S
@@ -0,0 +1,7 @@
+    /* const/16 vAA, #+BBBB */
+    FETCH_S w0, 1                       // w0<- ssssBBBB (sign-extended
+    lsr     w3, wINST, #8               // w3<- AA
+    FETCH_ADVANCE_INST 2                // advance xPC, load wINST
+    SET_VREG w0, w3                     // vAA<- w0
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_const_4.S b/runtime/interpreter/mterp/arm64/op_const_4.S
new file mode 100644
index 0000000..04cd4f8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const_4.S
@@ -0,0 +1,8 @@
+    /* const/4 vA, #+B */
+    lsl     w1, wINST, #16              // w1<- Bxxx0000
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    FETCH_ADVANCE_INST 1                // advance xPC, load wINST
+    asr     w1, w1, #28                 // w1<- sssssssB (sign-extended)
+    GET_INST_OPCODE ip                  // ip<- opcode from xINST
+    SET_VREG w1, w0                     // fp[A]<- w1
+    GOTO_OPCODE ip                      // execute next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_const_class.S b/runtime/interpreter/mterp/arm64/op_const_class.S
new file mode 100644
index 0000000..971cfa0
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const_class.S
@@ -0,0 +1,12 @@
+    /* const/class vAA, Class//BBBB */
+    EXPORT_PC
+    FETCH   w0, 1                       // w0<- BBBB
+    lsr     w1, wINST, #8               // w1<- AA
+    add     x2, xFP, #OFF_FP_SHADOWFRAME
+    mov     x3, xSELF
+    bl      MterpConstClass             // (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 2
+    cbnz    w0, MterpPossibleException
+    ADVANCE 2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_const_high16.S b/runtime/interpreter/mterp/arm64/op_const_high16.S
new file mode 100644
index 0000000..dd51ce1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const_high16.S
@@ -0,0 +1,8 @@
+    /* const/high16 vAA, #+BBBB0000 */
+    FETCH   w0, 1                       // r0<- 0000BBBB (zero-extended
+    lsr     w3, wINST, #8               // r3<- AA
+    lsl     w0, w0, #16                 // r0<- BBBB0000
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    SET_VREG w0, w3                     // vAA<- r0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_const_string.S b/runtime/interpreter/mterp/arm64/op_const_string.S
new file mode 100644
index 0000000..896f1e7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const_string.S
@@ -0,0 +1,12 @@
+    /* const/string vAA, String//BBBB */
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- BBBB
+    lsr     w1, wINST, #8               // w1<- AA
+    add     x2, xFP, #OFF_FP_SHADOWFRAME
+    mov     x3, xSELF
+    bl      MterpConstString            // (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 2                     // load rINST
+    cbnz    w0, MterpPossibleException  // let reference interpreter deal with it.
+    ADVANCE 2                           // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_const_string_jumbo.S b/runtime/interpreter/mterp/arm64/op_const_string_jumbo.S
new file mode 100644
index 0000000..e1a7339
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const_string_jumbo.S
@@ -0,0 +1,14 @@
+    /* const/string vAA, String//BBBBBBBB */
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- bbbb (low
+    FETCH w2, 2                         // w2<- BBBB (high
+    lsr     w1, wINST, #8               // w1<- AA
+    orr     w0, w0, w2, lsl #16         // w1<- BBBBbbbb
+    add     x2, xFP, #OFF_FP_SHADOWFRAME
+    mov     x3, xSELF
+    bl      MterpConstString            // (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 3                     // advance rPC
+    cbnz    w0, MterpPossibleException      // let reference interpreter deal with it.
+    ADVANCE 3                           // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_const_wide.S b/runtime/interpreter/mterp/arm64/op_const_wide.S
new file mode 100644
index 0000000..8f57dda
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const_wide.S
@@ -0,0 +1,13 @@
+    /* const-wide vAA, #+HHHHhhhhBBBBbbbb */
+    FETCH w0, 1                         // w0<- bbbb (low)
+    FETCH w1, 2                         // w1<- BBBB (low middle)
+    FETCH w2, 3                         // w2<- hhhh (high middle)
+    FETCH w3, 4                         // w3<- HHHH (high)
+    lsr     w4, wINST, #8               // r4<- AA
+    FETCH_ADVANCE_INST 5                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    orr     w0, w0, w1, lsl #16         // w0<-         BBBBbbbb
+    orr     x0, x0, x2, lsl #32         // w0<-     hhhhBBBBbbbb
+    orr     x0, x0, x3, lsl #48         // w0<- HHHHhhhhBBBBbbbb
+    SET_VREG_WIDE x0, w4
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_const_wide_16.S b/runtime/interpreter/mterp/arm64/op_const_wide_16.S
new file mode 100644
index 0000000..e43628b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const_wide_16.S
@@ -0,0 +1,8 @@
+    /* const-wide/16 vAA, #+BBBB */
+    FETCH_S w0, 1                       // w0<- ssssBBBB (sign-extended
+    lsr     w3, wINST, #8               // w3<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    sbfm    x0, x0, 0, 31
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w3
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_const_wide_32.S b/runtime/interpreter/mterp/arm64/op_const_wide_32.S
new file mode 100644
index 0000000..527f7d8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const_wide_32.S
@@ -0,0 +1,10 @@
+    /* const-wide/32 vAA, #+BBBBbbbb */
+    FETCH w0, 1                         // w0<- 0000bbbb (low)
+    lsr     w3, wINST, #8               // w3<- AA
+    FETCH_S w2, 2                       // w2<- ssssBBBB (high)
+    FETCH_ADVANCE_INST 3                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    orr     w0, w0, w2, lsl #16         // w0<- BBBBbbbb
+    sbfm    x0, x0, 0, 31
+    SET_VREG_WIDE x0, w3
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_const_wide_high16.S b/runtime/interpreter/mterp/arm64/op_const_wide_high16.S
new file mode 100644
index 0000000..94ab987
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const_wide_high16.S
@@ -0,0 +1,8 @@
+    /* const-wide/high16 vAA, #+BBBB000000000000 */
+    FETCH w0, 1                         // w0<- 0000BBBB (zero-extended)
+    lsr     w1, wINST, #8               // w1<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    lsl     x0, x0, #48
+    SET_VREG_WIDE x0, w1
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_div_double.S b/runtime/interpreter/mterp/arm64/op_div_double.S
new file mode 100644
index 0000000..1f7dad0
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_div_double.S
@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"fdiv d0, d1, d2", "result":"d0", "r1":"d1", "r2":"d2"}
diff --git a/runtime/interpreter/mterp/arm64/op_div_double_2addr.S b/runtime/interpreter/mterp/arm64/op_div_double_2addr.S
new file mode 100644
index 0000000..414a175
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_div_double_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"fdiv     d0, d0, d1", "r0":"d0", "r1":"d1"}
diff --git a/runtime/interpreter/mterp/arm64/op_div_float.S b/runtime/interpreter/mterp/arm64/op_div_float.S
new file mode 100644
index 0000000..f24a26c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_div_float.S
@@ -0,0 +1 @@
+%include "arm64/fbinop.S" {"instr":"fdiv   s0, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm64/op_div_float_2addr.S b/runtime/interpreter/mterp/arm64/op_div_float_2addr.S
new file mode 100644
index 0000000..2888049
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_div_float_2addr.S
@@ -0,0 +1 @@
+%include "arm64/fbinop2addr.S" {"instr":"fdiv   s2, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm64/op_div_int.S b/runtime/interpreter/mterp/arm64/op_div_int.S
new file mode 100644
index 0000000..88371c0
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_div_int.S
@@ -0,0 +1 @@
+%include "arm64/binop.S" {"instr":"sdiv     w0, w0, w1", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm64/op_div_int_2addr.S b/runtime/interpreter/mterp/arm64/op_div_int_2addr.S
new file mode 100644
index 0000000..5f5a80f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_div_int_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binop2addr.S" {"instr":"sdiv     w0, w0, w1", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm64/op_div_int_lit16.S b/runtime/interpreter/mterp/arm64/op_div_int_lit16.S
new file mode 100644
index 0000000..dc7a484
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_div_int_lit16.S
@@ -0,0 +1 @@
+%include "arm64/binopLit16.S" {"instr":"sdiv w0, w0, w1", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm64/op_div_int_lit8.S b/runtime/interpreter/mterp/arm64/op_div_int_lit8.S
new file mode 100644
index 0000000..c06521c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_div_int_lit8.S
@@ -0,0 +1 @@
+%include "arm64/binopLit8.S" {"instr":"sdiv     w0, w0, w1", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm64/op_div_long.S b/runtime/interpreter/mterp/arm64/op_div_long.S
new file mode 100644
index 0000000..820ae3d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_div_long.S
@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"sdiv x0, x1, x2", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm64/op_div_long_2addr.S b/runtime/interpreter/mterp/arm64/op_div_long_2addr.S
new file mode 100644
index 0000000..da7eabd
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_div_long_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"sdiv     x0, x0, x1", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm64/op_double_to_float.S b/runtime/interpreter/mterp/arm64/op_double_to_float.S
new file mode 100644
index 0000000..c1555fd
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_double_to_float.S
@@ -0,0 +1 @@
+%include "arm64/funopNarrower.S" {"instr":"fcvt s0, d0", "srcreg":"d0", "tgtreg":"s0"}
diff --git a/runtime/interpreter/mterp/arm64/op_double_to_int.S b/runtime/interpreter/mterp/arm64/op_double_to_int.S
new file mode 100644
index 0000000..7244bac
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_double_to_int.S
@@ -0,0 +1 @@
+%include "arm64/funopNarrower.S" {"instr":"fcvtzs w0, d0", "srcreg":"d0", "tgtreg":"w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_double_to_long.S b/runtime/interpreter/mterp/arm64/op_double_to_long.S
new file mode 100644
index 0000000..741160b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_double_to_long.S
@@ -0,0 +1 @@
+%include "arm64/funopWide.S" {"instr":"fcvtzs x0, d0", "srcreg":"d0", "tgtreg":"x0"}
diff --git a/runtime/interpreter/mterp/arm64/op_fill_array_data.S b/runtime/interpreter/mterp/arm64/op_fill_array_data.S
new file mode 100644
index 0000000..f50d9e4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_fill_array_data.S
@@ -0,0 +1,13 @@
+    /* fill-array-data vAA, +BBBBBBBB */
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- bbbb (lo)
+    FETCH w1, 2                         // w1<- BBBB (hi)
+    lsr     w3, wINST, #8               // w3<- AA
+    orr     w1, w0, w1, lsl #16         // w1<- BBBBbbbb
+    GET_VREG w0, w3                     // w0<- vAA (array object)
+    add     x1, xPC, w1, lsl #1         // w1<- PC + BBBBbbbb*2 (array data off.)
+    bl      MterpFillArrayData          // (obj, payload)
+    cbz     w0, MterpPossibleException      // exception?
+    FETCH_ADVANCE_INST 3                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_filled_new_array.S b/runtime/interpreter/mterp/arm64/op_filled_new_array.S
new file mode 100644
index 0000000..806a1b1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_filled_new_array.S
@@ -0,0 +1,18 @@
+%default { "helper":"MterpFilledNewArray" }
+    /*
+     * Create a new array with elements filled from registers.
+     *
+     * for: filled-new-array, filled-new-array/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class//CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type//BBBB */
+    .extern $helper
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     x2, xSELF
+    bl      $helper
+    cbz     w0, MterpPossibleException
+    FETCH_ADVANCE_INST 3                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_filled_new_array_range.S b/runtime/interpreter/mterp/arm64/op_filled_new_array_range.S
new file mode 100644
index 0000000..3c9a419
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_filled_new_array_range.S
@@ -0,0 +1 @@
+%include "arm64/op_filled_new_array.S" { "helper":"MterpFilledNewArrayRange" }
diff --git a/runtime/interpreter/mterp/arm64/op_float_to_double.S b/runtime/interpreter/mterp/arm64/op_float_to_double.S
new file mode 100644
index 0000000..892feca
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_float_to_double.S
@@ -0,0 +1 @@
+%include "arm64/funopWider.S" {"instr":"fcvt  d0, s0", "srcreg":"s0", "tgtreg":"d0"}
diff --git a/runtime/interpreter/mterp/arm64/op_float_to_int.S b/runtime/interpreter/mterp/arm64/op_float_to_int.S
new file mode 100644
index 0000000..c849d81
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_float_to_int.S
@@ -0,0 +1 @@
+%include "arm64/funopNarrow.S" {"instr":"fcvtzs w0, s0", "srcreg":"s0", "tgtreg":"w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_float_to_long.S b/runtime/interpreter/mterp/arm64/op_float_to_long.S
new file mode 100644
index 0000000..c3de16f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_float_to_long.S
@@ -0,0 +1 @@
+%include "arm64/funopWider.S" {"instr":"fcvtzs x0, s0", "srcreg":"s0", "tgtreg":"x0"}
diff --git a/runtime/interpreter/mterp/arm64/op_goto.S b/runtime/interpreter/mterp/arm64/op_goto.S
new file mode 100644
index 0000000..db98a45
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_goto.S
@@ -0,0 +1,28 @@
+    /*
+     * Unconditional branch, 8-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     */
+    /* goto +AA */
+    /* tuning: use sbfx for 6t2+ targets */
+#if MTERP_SUSPEND
+    mov     w0, wINST, lsl #16          // w0<- AAxx0000
+    movs    w1, w0, asr #24             // w1<- ssssssAA (sign-extended)
+    add     w2, w1, w1                  // w2<- byte offset, set flags
+       // If backwards branch refresh rIBASE
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]  // Preload flags for MterpCheckSuspendAndContinue
+    lsl     w0, wINST, #16              // w0<- AAxx0000
+    asr     w0, w0, #24                 // w0<- ssssssAA (sign-extended)
+    adds    w1, w0, w0                  // Convert dalvik offset to byte offset, setting flags
+    FETCH_ADVANCE_INST_RB w1            // load wINST and advance xPC
+       // If backwards branch refresh rIBASE
+    b.mi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
diff --git a/runtime/interpreter/mterp/arm64/op_goto_16.S b/runtime/interpreter/mterp/arm64/op_goto_16.S
new file mode 100644
index 0000000..ff66a23
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_goto_16.S
@@ -0,0 +1,23 @@
+    /*
+     * Unconditional branch, 16-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     */
+    /* goto/16 +AAAA */
+#if MTERP_SUSPEND
+    FETCH_S w0, 1                       // w0<- ssssAAAA (sign-extended)
+    adds    w1, w0, w0                  // w1<- byte offset, flags set
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load rINST
+    ldrmi   xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    FETCH_S w0, 1                       // w0<- ssssAAAA (sign-extended)
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w1, w0, w0                  // w1<- byte offset, flags set
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load rINST
+    b.mi    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
diff --git a/runtime/interpreter/mterp/arm64/op_goto_32.S b/runtime/interpreter/mterp/arm64/op_goto_32.S
new file mode 100644
index 0000000..8a6980e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_goto_32.S
@@ -0,0 +1,32 @@
+    /*
+     * Unconditional branch, 32-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     *
+     * Unlike most opcodes, this one is allowed to branch to itself, so
+     * our "backward branch" test must be "<=0" instead of "<0".  Because
+     * we need the V bit set, we'll use an adds to convert from Dalvik
+     * offset to byte offset.
+     */
+    /* goto/32 +AAAAAAAA */
+#if MTERP_SUSPEND
+    FETCH w0, 1                         // w0<- aaaa (lo)
+    FETCH w1, 2                         // w1<- AAAA (hi)
+    orr     w0, w0, w1, lsl #16         // w0<- AAAAaaaa
+    adds    w1, w0, w0                  // w1<- byte offset
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load xINST
+    ldrle   xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
+    GET_INST_OPCODE ip                  // extract opcode from xINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    FETCH w0, 1                         // w0<- aaaa (lo)
+    FETCH w1, 2                         // w1<- AAAA (hi)
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    orr     w0, w0, w1, lsl #16         // w0<- AAAAaaaa
+    adds    w1, w0, w0                  // w1<- byte offset
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load xINST
+    b.le    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from xINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
diff --git a/runtime/interpreter/mterp/arm64/op_if_eq.S b/runtime/interpreter/mterp/arm64/op_if_eq.S
new file mode 100644
index 0000000..aa4a0f1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_eq.S
@@ -0,0 +1 @@
+%include "arm64/bincmp.S" { "condition":"eq" }
diff --git a/runtime/interpreter/mterp/arm64/op_if_eqz.S b/runtime/interpreter/mterp/arm64/op_if_eqz.S
new file mode 100644
index 0000000..1d3202e1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_eqz.S
@@ -0,0 +1 @@
+%include "arm64/zcmp.S" { "condition":"eq" }
diff --git a/runtime/interpreter/mterp/arm64/op_if_ge.S b/runtime/interpreter/mterp/arm64/op_if_ge.S
new file mode 100644
index 0000000..d6ec761
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_ge.S
@@ -0,0 +1 @@
+%include "arm64/bincmp.S" { "condition":"ge" }
diff --git a/runtime/interpreter/mterp/arm64/op_if_gez.S b/runtime/interpreter/mterp/arm64/op_if_gez.S
new file mode 100644
index 0000000..8e3abd3
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_gez.S
@@ -0,0 +1 @@
+%include "arm64/zcmp.S" { "condition":"ge" }
diff --git a/runtime/interpreter/mterp/arm64/op_if_gt.S b/runtime/interpreter/mterp/arm64/op_if_gt.S
new file mode 100644
index 0000000..7db8e9d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_gt.S
@@ -0,0 +1 @@
+%include "arm64/bincmp.S" { "condition":"gt" }
diff --git a/runtime/interpreter/mterp/arm64/op_if_gtz.S b/runtime/interpreter/mterp/arm64/op_if_gtz.S
new file mode 100644
index 0000000..a4f2f6b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_gtz.S
@@ -0,0 +1 @@
+%include "arm64/zcmp.S" { "condition":"gt" }
diff --git a/runtime/interpreter/mterp/arm64/op_if_le.S b/runtime/interpreter/mterp/arm64/op_if_le.S
new file mode 100644
index 0000000..ca3a83f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_le.S
@@ -0,0 +1 @@
+%include "arm64/bincmp.S" { "condition":"le" }
diff --git a/runtime/interpreter/mterp/arm64/op_if_lez.S b/runtime/interpreter/mterp/arm64/op_if_lez.S
new file mode 100644
index 0000000..c1425fdd
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_lez.S
@@ -0,0 +1 @@
+%include "arm64/zcmp.S" { "condition":"le" }
diff --git a/runtime/interpreter/mterp/arm64/op_if_lt.S b/runtime/interpreter/mterp/arm64/op_if_lt.S
new file mode 100644
index 0000000..56450a1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_lt.S
@@ -0,0 +1 @@
+%include "arm64/bincmp.S" { "condition":"lt" }
diff --git a/runtime/interpreter/mterp/arm64/op_if_ltz.S b/runtime/interpreter/mterp/arm64/op_if_ltz.S
new file mode 100644
index 0000000..03cd3d6
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_ltz.S
@@ -0,0 +1 @@
+%include "arm64/zcmp.S" { "condition":"lt" }
diff --git a/runtime/interpreter/mterp/arm64/op_if_ne.S b/runtime/interpreter/mterp/arm64/op_if_ne.S
new file mode 100644
index 0000000..14d9e13
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_ne.S
@@ -0,0 +1 @@
+%include "arm64/bincmp.S" { "condition":"ne" }
diff --git a/runtime/interpreter/mterp/arm64/op_if_nez.S b/runtime/interpreter/mterp/arm64/op_if_nez.S
new file mode 100644
index 0000000..21e1bc2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_nez.S
@@ -0,0 +1 @@
+%include "arm64/zcmp.S" { "condition":"ne" }
diff --git a/runtime/interpreter/mterp/arm64/op_iget.S b/runtime/interpreter/mterp/arm64/op_iget.S
new file mode 100644
index 0000000..165c730
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget.S
@@ -0,0 +1,25 @@
+%default { "is_object":"0", "helper":"artGet32InstanceFromCode"}
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    w0, 1                         // w0<- field ref CCCC
+    lsr      w1, wINST, #12                // w1<- B
+    GET_VREG w1, w1                        // w1<- fp[B], the object pointer
+    ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
+    mov      x3, xSELF                     // w3<- self
+    bl       $helper
+    ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     w2, wINST, #8, #4             // w2<- A
+    PREFETCH_INST 2
+    cbnz     x3, MterpPossibleException    // bail out
+    .if $is_object
+    SET_VREG_OBJECT w0, w2                 // fp[A]<- w0
+    .else
+    SET_VREG w0, w2                        // fp[A]<- w0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     // extract opcode from rINST
+    GOTO_OPCODE ip                         // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_iget_boolean.S b/runtime/interpreter/mterp/arm64/op_iget_boolean.S
new file mode 100644
index 0000000..36a9b6b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_boolean.S
@@ -0,0 +1 @@
+%include "arm64/op_iget.S" { "helper":"artGetBooleanInstanceFromCode", "extend":"uxtb w0, w0" }
diff --git a/runtime/interpreter/mterp/arm64/op_iget_boolean_quick.S b/runtime/interpreter/mterp/arm64/op_iget_boolean_quick.S
new file mode 100644
index 0000000..2ceccb9
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_boolean_quick.S
@@ -0,0 +1 @@
+%include "arm64/op_iget_quick.S" { "load":"ldrb" }
diff --git a/runtime/interpreter/mterp/arm64/op_iget_byte.S b/runtime/interpreter/mterp/arm64/op_iget_byte.S
new file mode 100644
index 0000000..fd3f164
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_byte.S
@@ -0,0 +1 @@
+%include "arm64/op_iget.S" { "helper":"artGetByteInstanceFromCode", "extend":"sxtb w0, w0" }
diff --git a/runtime/interpreter/mterp/arm64/op_iget_byte_quick.S b/runtime/interpreter/mterp/arm64/op_iget_byte_quick.S
new file mode 100644
index 0000000..6e97b72
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_byte_quick.S
@@ -0,0 +1 @@
+%include "arm64/op_iget_quick.S" { "load":"ldrsb" }
diff --git a/runtime/interpreter/mterp/arm64/op_iget_char.S b/runtime/interpreter/mterp/arm64/op_iget_char.S
new file mode 100644
index 0000000..ea23275
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_char.S
@@ -0,0 +1 @@
+%include "arm64/op_iget.S" { "helper":"artGetCharInstanceFromCode", "extend":"uxth w0, w0" }
diff --git a/runtime/interpreter/mterp/arm64/op_iget_char_quick.S b/runtime/interpreter/mterp/arm64/op_iget_char_quick.S
new file mode 100644
index 0000000..325dd1c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_char_quick.S
@@ -0,0 +1 @@
+%include "arm64/op_iget_quick.S" { "load":"ldrh" }
diff --git a/runtime/interpreter/mterp/arm64/op_iget_object.S b/runtime/interpreter/mterp/arm64/op_iget_object.S
new file mode 100644
index 0000000..03be78d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_object.S
@@ -0,0 +1 @@
+%include "arm64/op_iget.S" { "is_object":"1", "helper":"artGetObjInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/arm64/op_iget_object_quick.S b/runtime/interpreter/mterp/arm64/op_iget_object_quick.S
new file mode 100644
index 0000000..e9a797d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_object_quick.S
@@ -0,0 +1,15 @@
+    /* For: iget-object-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    EXPORT_PC
+    GET_VREG w0, w2                     // w0<- object we're operating on
+    bl      artIGetObjectFromMterp      // (obj, offset)
+    ldr     x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    PREFETCH_INST 2
+    cbnz    w3, MterpPossibleException      // bail out
+    SET_VREG_OBJECT w0, w2              // fp[A]<- w0
+    ADVANCE 2                           // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_iget_quick.S b/runtime/interpreter/mterp/arm64/op_iget_quick.S
new file mode 100644
index 0000000..45c68a3
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_quick.S
@@ -0,0 +1,15 @@
+%default { "load":"ldr", "extend":"" }
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- object we're operating on
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cmp     x3, #0                      // check object for null
+    beq     common_errNullObject        // object was null
+    $load   w0, [x3, x1]                // w0<- obj.field
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    $extend
+    SET_VREG w0, w2                     // fp[A]<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_iget_short.S b/runtime/interpreter/mterp/arm64/op_iget_short.S
new file mode 100644
index 0000000..c347542
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_short.S
@@ -0,0 +1 @@
+%include "arm64/op_iget.S" { "helper":"artGetShortInstanceFromCode", "extend":"sxth w0, w0" }
diff --git a/runtime/interpreter/mterp/arm64/op_iget_short_quick.S b/runtime/interpreter/mterp/arm64/op_iget_short_quick.S
new file mode 100644
index 0000000..8367070
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_short_quick.S
@@ -0,0 +1 @@
+%include "arm64/op_iget_quick.S" { "load":"ldrsh" }
diff --git a/runtime/interpreter/mterp/arm64/op_iget_wide.S b/runtime/interpreter/mterp/arm64/op_iget_wide.S
new file mode 100644
index 0000000..9718390
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_wide.S
@@ -0,0 +1,21 @@
+    /*
+     * 64-bit instance field get.
+     *
+     * for: iget-wide
+     */
+    EXPORT_PC
+    FETCH    w0, 1                         // w0<- field ref CCCC
+    lsr      w1, wINST, #12                // w1<- B
+    GET_VREG w1, w1                        // w1<- fp[B], the object pointer
+    ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
+    mov      x3, xSELF                     // w3<- self
+    bl       artGet64InstanceFromCode
+    ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     w2, wINST, #8, #4             // w2<- A
+    PREFETCH_INST 2
+    cmp      w3, #0
+    cbnz     w3, MterpException            // bail out
+    SET_VREG_WIDE x0, w2
+    ADVANCE 2
+    GET_INST_OPCODE ip                     // extract opcode from wINST
+    GOTO_OPCODE ip                         // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S b/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S
new file mode 100644
index 0000000..2480d2d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S
@@ -0,0 +1,12 @@
+    /* iget-wide-quick vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w4, 1                         // w4<- field byte offset
+    GET_VREG w3, w2                     // w3<- object we're operating on
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cbz     w3, common_errNullObject        // object was null
+    add     x4, x3, x4                  // create direct pointer
+    ldr     x0, [x4]
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    SET_VREG_WIDE x0, w2
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_instance_of.S b/runtime/interpreter/mterp/arm64/op_instance_of.S
new file mode 100644
index 0000000..647bc75
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_instance_of.S
@@ -0,0 +1,23 @@
+    /*
+     * Check to see if an object reference is an instance of a class.
+     *
+     * Most common situation is a non-null object, being compared against
+     * an already-resolved class.
+     */
+    /* instance-of vA, vB, class//CCCC */
+    EXPORT_PC
+    FETCH     w0, 1                     // w0<- CCCC
+    lsr       w1, wINST, #12            // w1<- B
+    VREG_INDEX_TO_ADDR x1, w1           // w1<- &object
+    ldr       x2, [xFP, #OFF_FP_METHOD] // w2<- method
+    mov       x3, xSELF                 // w3<- self
+    bl        MterpInstanceOf           // (index, &obj, method, self)
+    ldr       x1, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr       w2, wINST, #8             // w2<- A+
+    and       w2, w2, #15               // w2<- A
+    PREFETCH_INST 2
+    cbnz      x1, MterpException
+    ADVANCE 2                           // advance rPC
+    SET_VREG w0, w2                     // vA<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_int_to_byte.S b/runtime/interpreter/mterp/arm64/op_int_to_byte.S
new file mode 100644
index 0000000..43f8148
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_int_to_byte.S
@@ -0,0 +1 @@
+%include "arm64/unop.S" {"instr":"sxtb    w0, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_int_to_char.S b/runtime/interpreter/mterp/arm64/op_int_to_char.S
new file mode 100644
index 0000000..f092170
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_int_to_char.S
@@ -0,0 +1 @@
+%include "arm64/unop.S" {"instr":"uxth    w0, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_int_to_double.S b/runtime/interpreter/mterp/arm64/op_int_to_double.S
new file mode 100644
index 0000000..3dee75a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_int_to_double.S
@@ -0,0 +1 @@
+%include "arm64/funopWider.S" {"instr":"scvtf d0, w0", "srcreg":"w0", "tgtreg":"d0"}
diff --git a/runtime/interpreter/mterp/arm64/op_int_to_float.S b/runtime/interpreter/mterp/arm64/op_int_to_float.S
new file mode 100644
index 0000000..3ebbdc7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_int_to_float.S
@@ -0,0 +1 @@
+%include "arm64/funopNarrow.S" {"instr":"scvtf s0, w0", "srcreg":"w0", "tgtreg":"s0"}
diff --git a/runtime/interpreter/mterp/arm64/op_int_to_long.S b/runtime/interpreter/mterp/arm64/op_int_to_long.S
new file mode 100644
index 0000000..13d2120
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_int_to_long.S
@@ -0,0 +1 @@
+%include "arm64/funopWider.S" {"instr":"sbfm x0, x0, 0, 31", "srcreg":"w0", "tgtreg":"x0"}
diff --git a/runtime/interpreter/mterp/arm64/op_int_to_short.S b/runtime/interpreter/mterp/arm64/op_int_to_short.S
new file mode 100644
index 0000000..87fb804
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_int_to_short.S
@@ -0,0 +1 @@
+%include "arm64/unop.S" {"instr":"sxth    w0, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_invoke_direct.S b/runtime/interpreter/mterp/arm64/op_invoke_direct.S
new file mode 100644
index 0000000..c117232
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_direct.S
@@ -0,0 +1 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeDirect" }
diff --git a/runtime/interpreter/mterp/arm64/op_invoke_direct_range.S b/runtime/interpreter/mterp/arm64/op_invoke_direct_range.S
new file mode 100644
index 0000000..efc54c7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_direct_range.S
@@ -0,0 +1 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeDirectRange" }
diff --git a/runtime/interpreter/mterp/arm64/op_invoke_interface.S b/runtime/interpreter/mterp/arm64/op_invoke_interface.S
new file mode 100644
index 0000000..12dfa59
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_interface.S
@@ -0,0 +1,8 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeInterface" }
+    /*
+     * Handle an interface method call.
+     *
+     * for: invoke-interface, invoke-interface/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
diff --git a/runtime/interpreter/mterp/arm64/op_invoke_interface_range.S b/runtime/interpreter/mterp/arm64/op_invoke_interface_range.S
new file mode 100644
index 0000000..61caaf4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_interface_range.S
@@ -0,0 +1 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeInterfaceRange" }
diff --git a/runtime/interpreter/mterp/arm64/op_invoke_static.S b/runtime/interpreter/mterp/arm64/op_invoke_static.S
new file mode 100644
index 0000000..634eda2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_static.S
@@ -0,0 +1,2 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeStatic" }
+
diff --git a/runtime/interpreter/mterp/arm64/op_invoke_static_range.S b/runtime/interpreter/mterp/arm64/op_invoke_static_range.S
new file mode 100644
index 0000000..32cdcdd
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_static_range.S
@@ -0,0 +1 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeStaticRange" }
diff --git a/runtime/interpreter/mterp/arm64/op_invoke_super.S b/runtime/interpreter/mterp/arm64/op_invoke_super.S
new file mode 100644
index 0000000..def2c55
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_super.S
@@ -0,0 +1,8 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeSuper" }
+    /*
+     * Handle a "super" method call.
+     *
+     * for: invoke-super, invoke-super/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
diff --git a/runtime/interpreter/mterp/arm64/op_invoke_super_range.S b/runtime/interpreter/mterp/arm64/op_invoke_super_range.S
new file mode 100644
index 0000000..27fb859
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_super_range.S
@@ -0,0 +1 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeSuperRange" }
diff --git a/runtime/interpreter/mterp/arm64/op_invoke_virtual.S b/runtime/interpreter/mterp/arm64/op_invoke_virtual.S
new file mode 100644
index 0000000..66d0502
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_virtual.S
@@ -0,0 +1,8 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeVirtual" }
+    /*
+     * Handle a virtual method call.
+     *
+     * for: invoke-virtual, invoke-virtual/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
diff --git a/runtime/interpreter/mterp/arm64/op_invoke_virtual_quick.S b/runtime/interpreter/mterp/arm64/op_invoke_virtual_quick.S
new file mode 100644
index 0000000..4300c34
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_virtual_quick.S
@@ -0,0 +1 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeVirtualQuick" }
diff --git a/runtime/interpreter/mterp/arm64/op_invoke_virtual_range.S b/runtime/interpreter/mterp/arm64/op_invoke_virtual_range.S
new file mode 100644
index 0000000..b43955c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_virtual_range.S
@@ -0,0 +1 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeVirtualRange" }
diff --git a/runtime/interpreter/mterp/arm64/op_invoke_virtual_range_quick.S b/runtime/interpreter/mterp/arm64/op_invoke_virtual_range_quick.S
new file mode 100644
index 0000000..90c7b65
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_virtual_range_quick.S
@@ -0,0 +1 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeVirtualQuickRange" }
diff --git a/runtime/interpreter/mterp/arm64/op_iput.S b/runtime/interpreter/mterp/arm64/op_iput.S
new file mode 100644
index 0000000..a8c0e61
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput.S
@@ -0,0 +1,21 @@
+%default { "is_object":"0", "handler":"artSet32InstanceFromMterp" }
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field//CCCC */
+    .extern $handler
+    EXPORT_PC
+    FETCH    w0, 1                      // w0<- field ref CCCC
+    lsr      w1, wINST, #12             // w1<- B
+    GET_VREG w1, w1                     // w1<- fp[B], the object pointer
+    ubfx     w2, wINST, #8, #4          // w2<- A
+    GET_VREG w2, w2                     // w2<- fp[A]
+    ldr      x3, [xFP, #OFF_FP_METHOD]  // w3<- referrer
+    PREFETCH_INST 2
+    bl       $handler
+    cbnz     w0, MterpPossibleException
+    ADVANCE  2                          // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_iput_boolean.S b/runtime/interpreter/mterp/arm64/op_iput_boolean.S
new file mode 100644
index 0000000..bbf5319
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_boolean.S
@@ -0,0 +1 @@
+%include "arm64/op_iput.S" { "handler":"artSet8InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/arm64/op_iput_boolean_quick.S b/runtime/interpreter/mterp/arm64/op_iput_boolean_quick.S
new file mode 100644
index 0000000..25c61d7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_boolean_quick.S
@@ -0,0 +1 @@
+%include "arm64/op_iput_quick.S" { "store":"strb" }
diff --git a/runtime/interpreter/mterp/arm64/op_iput_byte.S b/runtime/interpreter/mterp/arm64/op_iput_byte.S
new file mode 100644
index 0000000..bbf5319
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_byte.S
@@ -0,0 +1 @@
+%include "arm64/op_iput.S" { "handler":"artSet8InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/arm64/op_iput_byte_quick.S b/runtime/interpreter/mterp/arm64/op_iput_byte_quick.S
new file mode 100644
index 0000000..25c61d7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_byte_quick.S
@@ -0,0 +1 @@
+%include "arm64/op_iput_quick.S" { "store":"strb" }
diff --git a/runtime/interpreter/mterp/arm64/op_iput_char.S b/runtime/interpreter/mterp/arm64/op_iput_char.S
new file mode 100644
index 0000000..150d879
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_char.S
@@ -0,0 +1 @@
+%include "arm64/op_iput.S" { "handler":"artSet16InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/arm64/op_iput_char_quick.S b/runtime/interpreter/mterp/arm64/op_iput_char_quick.S
new file mode 100644
index 0000000..c6ef46a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_char_quick.S
@@ -0,0 +1 @@
+%include "arm64/op_iput_quick.S" { "store":"strh" }
diff --git a/runtime/interpreter/mterp/arm64/op_iput_object.S b/runtime/interpreter/mterp/arm64/op_iput_object.S
new file mode 100644
index 0000000..37a649b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_object.S
@@ -0,0 +1,10 @@
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     w2, wINST
+    mov     x3, xSELF
+    bl      MterpIputObject
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_iput_object_quick.S b/runtime/interpreter/mterp/arm64/op_iput_object_quick.S
new file mode 100644
index 0000000..6fbf2b1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_object_quick.S
@@ -0,0 +1,9 @@
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     w2, wINST
+    bl      MterpIputObjectQuick
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_iput_quick.S b/runtime/interpreter/mterp/arm64/op_iput_quick.S
new file mode 100644
index 0000000..2afc51b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_quick.S
@@ -0,0 +1,14 @@
+%default { "store":"str" }
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- fp[B], the object pointer
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cmp     w3, #0                      // check object for null
+    cbz     w3, common_errNullObject    // object was null
+    GET_VREG w0, w2                     // w0<- fp[A]
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    $store     w0, [x3, x1]             // obj.field<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_iput_short.S b/runtime/interpreter/mterp/arm64/op_iput_short.S
new file mode 100644
index 0000000..150d879
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_short.S
@@ -0,0 +1 @@
+%include "arm64/op_iput.S" { "handler":"artSet16InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/arm64/op_iput_short_quick.S b/runtime/interpreter/mterp/arm64/op_iput_short_quick.S
new file mode 100644
index 0000000..c6ef46a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_short_quick.S
@@ -0,0 +1 @@
+%include "arm64/op_iput_quick.S" { "store":"strh" }
diff --git a/runtime/interpreter/mterp/arm64/op_iput_wide.S b/runtime/interpreter/mterp/arm64/op_iput_wide.S
new file mode 100644
index 0000000..4ce9525
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_wide.S
@@ -0,0 +1,15 @@
+    /* iput-wide vA, vB, field//CCCC */
+    .extern artSet64InstanceFromMterp
+    EXPORT_PC
+    FETCH    w0, 1                      // w0<- field ref CCCC
+    lsr      w1, wINST, #12             // w1<- B
+    GET_VREG w1, w1                     // w1<- fp[B], the object pointer
+    ubfx     w2, wINST, #8, #4          // w2<- A
+    add      x2, xFP, x2, lsl #2        // w2<- &fp[A]
+    ldr      x3, [xFP, #OFF_FP_METHOD]  // w3<- referrer
+    PREFETCH_INST 2
+    bl       artSet64InstanceFromMterp
+    cbnz     w0, MterpPossibleException
+    ADVANCE  2                          // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S b/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S
new file mode 100644
index 0000000..27b5dc5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S
@@ -0,0 +1,13 @@
+    /* iput-wide-quick vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w3, 1                         // w3<- field byte offset
+    GET_VREG w2, w2                     // w2<- fp[B], the object pointer
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    cmp     w2, #0                      // check object for null
+    beq     common_errNullObject        // object was null
+    GET_VREG_WIDE x0, w0                // x0-< fp[A]
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    add     x1, x2, x3                  // create a direct pointer
+    str     x0, [x1]
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_long_to_double.S b/runtime/interpreter/mterp/arm64/op_long_to_double.S
new file mode 100644
index 0000000..a3f59c2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_long_to_double.S
@@ -0,0 +1 @@
+%include "arm64/funopWide.S" {"instr":"scvtf d0, x0", "srcreg":"x0", "tgtreg":"d0"}
diff --git a/runtime/interpreter/mterp/arm64/op_long_to_float.S b/runtime/interpreter/mterp/arm64/op_long_to_float.S
new file mode 100644
index 0000000..e9c9145
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_long_to_float.S
@@ -0,0 +1 @@
+%include "arm64/funopNarrower.S" {"instr":"scvtf s0, x0", "srcreg":"x0", "tgtreg":"s0"}
diff --git a/runtime/interpreter/mterp/arm64/op_long_to_int.S b/runtime/interpreter/mterp/arm64/op_long_to_int.S
new file mode 100644
index 0000000..360a69b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_long_to_int.S
@@ -0,0 +1 @@
+%include "arm64/funopNarrower.S" {"instr":"", "srcreg":"x0", "tgtreg":"w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_monitor_enter.S b/runtime/interpreter/mterp/arm64/op_monitor_enter.S
new file mode 100644
index 0000000..6fbd9ae
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_monitor_enter.S
@@ -0,0 +1,13 @@
+    /*
+     * Synchronize on an object.
+     */
+    /* monitor-enter vAA */
+    EXPORT_PC
+    lsr      w2, wINST, #8               // w2<- AA
+    GET_VREG w0, w2                      // w0<- vAA (object)
+    mov      x1, xSELF                   // w1<- self
+    bl       artLockObjectFromCode
+    cbnz     w0, MterpException
+    FETCH_ADVANCE_INST 1
+    GET_INST_OPCODE ip                   // extract opcode from rINST
+    GOTO_OPCODE ip                       // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_monitor_exit.S b/runtime/interpreter/mterp/arm64/op_monitor_exit.S
new file mode 100644
index 0000000..26e2d8d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_monitor_exit.S
@@ -0,0 +1,17 @@
+    /*
+     * Unlock an object.
+     *
+     * Exceptions that occur when unlocking a monitor need to appear as
+     * if they happened at the following instruction.  See the Dalvik
+     * instruction spec.
+     */
+    /* monitor-exit vAA */
+    EXPORT_PC
+    lsr      w2, wINST, #8              // w2<- AA
+    GET_VREG w0, w2                     // w0<- vAA (object)
+    mov      x1, xSELF                  // w0<- self
+    bl       artUnlockObjectFromCode    // w0<- success for unlock(self, obj)
+    cbnz     w0, MterpException
+    FETCH_ADVANCE_INST 1                // before throw: advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_move.S b/runtime/interpreter/mterp/arm64/op_move.S
new file mode 100644
index 0000000..195b7eb
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move.S
@@ -0,0 +1,14 @@
+%default { "is_object":"0" }
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    lsr     w1, wINST, #12              // x1<- B from 15:12
+    ubfx    w0, wINST, #8, #4           // x0<- A from 11:8
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    GET_VREG w2, w1                     // x2<- fp[B]
+    GET_INST_OPCODE ip                  // ip<- opcode from wINST
+    .if $is_object
+    SET_VREG_OBJECT w2, w0              // fp[A]<- x2
+    .else
+    SET_VREG w2, w0                     // fp[A]<- x2
+    .endif
+    GOTO_OPCODE ip                      // execute next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_move_16.S b/runtime/interpreter/mterp/arm64/op_move_16.S
new file mode 100644
index 0000000..5146e3d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_16.S
@@ -0,0 +1,14 @@
+%default { "is_object":"0" }
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    FETCH w1, 2                         // w1<- BBBB
+    FETCH w0, 1                         // w0<- AAAA
+    FETCH_ADVANCE_INST 3                // advance xPC, load xINST
+    GET_VREG w2, w1                     // w2<- fp[BBBB]
+    GET_INST_OPCODE ip                  // extract opcode from xINST
+    .if $is_object
+    SET_VREG_OBJECT w2, w0              // fp[AAAA]<- w2
+    .else
+    SET_VREG w2, w0                     // fp[AAAA]<- w2
+    .endif
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_move_exception.S b/runtime/interpreter/mterp/arm64/op_move_exception.S
new file mode 100644
index 0000000..b29298f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_exception.S
@@ -0,0 +1,9 @@
+    /* move-exception vAA */
+    lsr     w2, wINST, #8               // w2<- AA
+    ldr     x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    mov     x1, #0                      // w1<- 0
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    SET_VREG_OBJECT w3, w2              // fp[AA]<- exception obj
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    str     x1, [xSELF, #THREAD_EXCEPTION_OFFSET]  // clear exception
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_move_from16.S b/runtime/interpreter/mterp/arm64/op_move_from16.S
new file mode 100644
index 0000000..78f344d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_from16.S
@@ -0,0 +1,14 @@
+%default { "is_object":"0" }
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    FETCH w1, 1                         // r1<- BBBB
+    lsr     w0, wINST, #8               // r0<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    GET_VREG w2, w1                     // r2<- fp[BBBB]
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    .if $is_object
+    SET_VREG_OBJECT w2, w0              // fp[AA]<- r2
+    .else
+    SET_VREG w2, w0                     // fp[AA]<- r2
+    .endif
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_move_object.S b/runtime/interpreter/mterp/arm64/op_move_object.S
new file mode 100644
index 0000000..a5adc59
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_object.S
@@ -0,0 +1 @@
+%include "arm64/op_move.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/arm64/op_move_object_16.S b/runtime/interpreter/mterp/arm64/op_move_object_16.S
new file mode 100644
index 0000000..ef86c45
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_object_16.S
@@ -0,0 +1 @@
+%include "arm64/op_move_16.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/arm64/op_move_object_from16.S b/runtime/interpreter/mterp/arm64/op_move_object_from16.S
new file mode 100644
index 0000000..0c73b3b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_object_from16.S
@@ -0,0 +1 @@
+%include "arm64/op_move_from16.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/arm64/op_move_result.S b/runtime/interpreter/mterp/arm64/op_move_result.S
new file mode 100644
index 0000000..06fe962
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_result.S
@@ -0,0 +1,14 @@
+%default { "is_object":"0" }
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    lsr     w2, wINST, #8               // r2<- AA
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    ldr     x0, [xFP, #OFF_FP_RESULT_REGISTER]  // get pointer to result JType.
+    ldr     w0, [x0]                    // r0 <- result.i.
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    .if $is_object
+    SET_VREG_OBJECT w0, w2, w1          // fp[AA]<- r0
+    .else
+    SET_VREG w0, w2                     // fp[AA]<- r0
+    .endif
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_move_result_object.S b/runtime/interpreter/mterp/arm64/op_move_result_object.S
new file mode 100644
index 0000000..da2bbee
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_result_object.S
@@ -0,0 +1 @@
+%include "arm64/op_move_result.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/arm64/op_move_result_wide.S b/runtime/interpreter/mterp/arm64/op_move_result_wide.S
new file mode 100644
index 0000000..f90a33f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_result_wide.S
@@ -0,0 +1,9 @@
+    /* for: move-result-wide */
+    /* op vAA */
+    lsr     w2, wINST, #8               // r2<- AA
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    ldr     x0, [xFP, #OFF_FP_RESULT_REGISTER]  // get pointer to result JType.
+    ldr     x0, [x0]                    // r0 <- result.i.
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x0, x2                // fp[AA]<- r0
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_move_wide.S b/runtime/interpreter/mterp/arm64/op_move_wide.S
new file mode 100644
index 0000000..538f079
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_wide.S
@@ -0,0 +1,9 @@
+    /* move-wide vA, vB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE  x3, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE  x3, w2
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_move_wide_16.S b/runtime/interpreter/mterp/arm64/op_move_wide_16.S
new file mode 100644
index 0000000..c79cdc50
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_wide_16.S
@@ -0,0 +1,9 @@
+    /* move-wide/16 vAAAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    FETCH w3, 2                         // w3<- BBBB
+    FETCH w2, 1                         // w2<- AAAA
+    GET_VREG_WIDE x3, w3
+    FETCH_ADVANCE_INST 3                // advance rPC, load rINST
+    SET_VREG_WIDE x3, w2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_move_wide_from16.S b/runtime/interpreter/mterp/arm64/op_move_wide_from16.S
new file mode 100644
index 0000000..70dbe99
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_wide_from16.S
@@ -0,0 +1,9 @@
+    /* move-wide/from16 vAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    FETCH w3, 1                         // w3<- BBBB
+    lsr     w2, wINST, #8               // w2<- AA
+    GET_VREG_WIDE x3, w3
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x3, w2
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_mul_double.S b/runtime/interpreter/mterp/arm64/op_mul_double.S
new file mode 100644
index 0000000..8d35b81
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_mul_double.S
@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"fmul d0, d1, d2", "result":"d0", "r1":"d1", "r2":"d2"}
diff --git a/runtime/interpreter/mterp/arm64/op_mul_double_2addr.S b/runtime/interpreter/mterp/arm64/op_mul_double_2addr.S
new file mode 100644
index 0000000..526cb3b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_mul_double_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"fmul     d0, d0, d1", "r0":"d0", "r1":"d1"}
diff --git a/runtime/interpreter/mterp/arm64/op_mul_float.S b/runtime/interpreter/mterp/arm64/op_mul_float.S
new file mode 100644
index 0000000..eea7733
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_mul_float.S
@@ -0,0 +1 @@
+%include "arm64/fbinop.S" {"instr":"fmul   s0, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm64/op_mul_float_2addr.S b/runtime/interpreter/mterp/arm64/op_mul_float_2addr.S
new file mode 100644
index 0000000..c1f2376
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_mul_float_2addr.S
@@ -0,0 +1 @@
+%include "arm64/fbinop2addr.S" {"instr":"fmul   s2, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm64/op_mul_int.S b/runtime/interpreter/mterp/arm64/op_mul_int.S
new file mode 100644
index 0000000..d14cae1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_mul_int.S
@@ -0,0 +1,2 @@
+/* must be "mul w0, w1, w0" -- "w0, w0, w1" is illegal */
+%include "arm64/binop.S" {"instr":"mul     w0, w1, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_mul_int_2addr.S b/runtime/interpreter/mterp/arm64/op_mul_int_2addr.S
new file mode 100644
index 0000000..f079118
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_mul_int_2addr.S
@@ -0,0 +1,2 @@
+/* must be "mul w0, w1, w0" -- "w0, w0, w1" is illegal */
+%include "arm64/binop2addr.S" {"instr":"mul     w0, w1, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_mul_int_lit16.S b/runtime/interpreter/mterp/arm64/op_mul_int_lit16.S
new file mode 100644
index 0000000..a378559
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_mul_int_lit16.S
@@ -0,0 +1,2 @@
+/* must be "mul w0, w1, w0" -- "w0, w0, w1" is illegal */
+%include "arm64/binopLit16.S" {"instr":"mul     w0, w1, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_mul_int_lit8.S b/runtime/interpreter/mterp/arm64/op_mul_int_lit8.S
new file mode 100644
index 0000000..b3d4014
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_mul_int_lit8.S
@@ -0,0 +1,2 @@
+/* must be "mul w0, w1, w0" -- "w0, w0, w1" is illegal */
+%include "arm64/binopLit8.S" {"instr":"mul     w0, w1, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_mul_long.S b/runtime/interpreter/mterp/arm64/op_mul_long.S
new file mode 100644
index 0000000..bc0dcbd
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_mul_long.S
@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"mul x0, x1, x2"}
diff --git a/runtime/interpreter/mterp/arm64/op_mul_long_2addr.S b/runtime/interpreter/mterp/arm64/op_mul_long_2addr.S
new file mode 100644
index 0000000..fa1cdf8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_mul_long_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"mul     x0, x0, x1"}
diff --git a/runtime/interpreter/mterp/arm64/op_neg_double.S b/runtime/interpreter/mterp/arm64/op_neg_double.S
new file mode 100644
index 0000000..e9064c4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_neg_double.S
@@ -0,0 +1 @@
+%include "arm64/unopWide.S" {"preinstr":"mov x1, #0x8000000000000000", "instr":"add     x0, x0, x1"}
diff --git a/runtime/interpreter/mterp/arm64/op_neg_float.S b/runtime/interpreter/mterp/arm64/op_neg_float.S
new file mode 100644
index 0000000..49d51af
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_neg_float.S
@@ -0,0 +1 @@
+%include "arm64/unop.S" {"preinstr":"mov w4, #0x80000000", "instr":"add     w0, w0, w4"}
diff --git a/runtime/interpreter/mterp/arm64/op_neg_int.S b/runtime/interpreter/mterp/arm64/op_neg_int.S
new file mode 100644
index 0000000..59c14a9
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_neg_int.S
@@ -0,0 +1 @@
+%include "arm64/unop.S" {"instr":"sub     w0, wzr, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_neg_long.S b/runtime/interpreter/mterp/arm64/op_neg_long.S
new file mode 100644
index 0000000..0c71ea7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_neg_long.S
@@ -0,0 +1 @@
+%include "arm64/unopWide.S" {"instr":"sub x0, xzr, x0"}
diff --git a/runtime/interpreter/mterp/arm64/op_new_array.S b/runtime/interpreter/mterp/arm64/op_new_array.S
new file mode 100644
index 0000000..886120a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_new_array.S
@@ -0,0 +1,18 @@
+    /*
+     * Allocate an array of objects, specified with the array class
+     * and a count.
+     *
+     * The verifier guarantees that this is an array class, so we don't
+     * check for it here.
+     */
+    /* new-array vA, vB, class//CCCC */
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     w2, wINST
+    mov     x3, xSELF
+    bl      MterpNewArray
+    cbz     w0, MterpPossibleException
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_new_instance.S b/runtime/interpreter/mterp/arm64/op_new_instance.S
new file mode 100644
index 0000000..c171ac5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_new_instance.S
@@ -0,0 +1,13 @@
+    /*
+     * Create a new instance of a class.
+     */
+    /* new-instance vAA, class//BBBB */
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xSELF
+    mov     w2, wINST
+    bl      MterpNewInstance           // (shadow_frame, self, inst_data)
+    cbz     w0, MterpPossibleException
+    FETCH_ADVANCE_INST 2               // advance rPC, load rINST
+    GET_INST_OPCODE ip                 // extract opcode from rINST
+    GOTO_OPCODE ip                     // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_nop.S b/runtime/interpreter/mterp/arm64/op_nop.S
new file mode 100644
index 0000000..80c2d45
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_nop.S
@@ -0,0 +1,3 @@
+    FETCH_ADVANCE_INST 1                // advance to next instr, load rINST
+    GET_INST_OPCODE ip                  // ip<- opcode from rINST
+    GOTO_OPCODE ip                      // execute it
diff --git a/runtime/interpreter/mterp/arm64/op_not_int.S b/runtime/interpreter/mterp/arm64/op_not_int.S
new file mode 100644
index 0000000..55d7750
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_not_int.S
@@ -0,0 +1 @@
+%include "arm64/unop.S" {"instr":"mvn     w0, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_not_long.S b/runtime/interpreter/mterp/arm64/op_not_long.S
new file mode 100644
index 0000000..e5ebdd6
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_not_long.S
@@ -0,0 +1 @@
+%include "arm64/unopWide.S" {"instr":"mvn     x0, x0"}
diff --git a/runtime/interpreter/mterp/arm64/op_or_int.S b/runtime/interpreter/mterp/arm64/op_or_int.S
new file mode 100644
index 0000000..648c1e6
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_or_int.S
@@ -0,0 +1 @@
+%include "arm64/binop.S" {"instr":"orr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_or_int_2addr.S b/runtime/interpreter/mterp/arm64/op_or_int_2addr.S
new file mode 100644
index 0000000..abdf599
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_or_int_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binop2addr.S" {"instr":"orr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_or_int_lit16.S b/runtime/interpreter/mterp/arm64/op_or_int_lit16.S
new file mode 100644
index 0000000..db7f4ff
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_or_int_lit16.S
@@ -0,0 +1 @@
+%include "arm64/binopLit16.S" {"instr":"orr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_or_int_lit8.S b/runtime/interpreter/mterp/arm64/op_or_int_lit8.S
new file mode 100644
index 0000000..51675f8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_or_int_lit8.S
@@ -0,0 +1 @@
+%include "arm64/binopLit8.S" {"instr":"orr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_or_long.S b/runtime/interpreter/mterp/arm64/op_or_long.S
new file mode 100644
index 0000000..dd137ce
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_or_long.S
@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"orr x0, x1, x2"}
diff --git a/runtime/interpreter/mterp/arm64/op_or_long_2addr.S b/runtime/interpreter/mterp/arm64/op_or_long_2addr.S
new file mode 100644
index 0000000..f785230
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_or_long_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"orr     x0, x0, x1"}
diff --git a/runtime/interpreter/mterp/arm64/op_packed_switch.S b/runtime/interpreter/mterp/arm64/op_packed_switch.S
new file mode 100644
index 0000000..f087d23
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_packed_switch.S
@@ -0,0 +1,39 @@
+%default { "func":"MterpDoPackedSwitch" }
+    /*
+     * Handle a packed-switch or sparse-switch instruction.  In both cases
+     * we decode it and hand it off to a helper function.
+     *
+     * We don't really expect backward branches in a switch statement, but
+     * they're perfectly legal, so we check for them here.
+     *
+     * for: packed-switch, sparse-switch
+     */
+    /* op vAA, +BBBB */
+#if MTERP_SUSPEND
+    FETCH w0, 1                         // w0<- bbbb (lo)
+    FETCH w1, 2                         // w1<- BBBB (hi)
+    mov     w3, wINST, lsr #8           // w3<- AA
+    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
+    GET_VREG w1, w3                     // w1<- vAA
+    add     w0, rPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
+    bl      $func                       // w0<- code-unit branch offset
+    adds    w1, w0, w0                  // w1<- byte offset; clear V
+    ldrle   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    FETCH w0, 1                         // w0<- bbbb (lo)
+    FETCH w1, 2                         // w1<- BBBB (hi)
+    lsr     w3, wINST, #8               // w3<- AA
+    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
+    GET_VREG w1, w3                     // w1<- vAA
+    add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
+    bl      $func                       // w0<- code-unit branch offset
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w1, w0, w0                  // w1<- byte offset; clear V
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    b.le    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
diff --git a/runtime/interpreter/mterp/arm64/op_rem_double.S b/runtime/interpreter/mterp/arm64/op_rem_double.S
new file mode 100644
index 0000000..c631ddb
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rem_double.S
@@ -0,0 +1,13 @@
+    /* rem vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE d1, w2                // d1<- vCC
+    GET_VREG_WIDE d0, w1                // d0<- vBB
+    bl  fmod
+    lsr     w4, wINST, #8               // w4<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w4                // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
diff --git a/runtime/interpreter/mterp/arm64/op_rem_double_2addr.S b/runtime/interpreter/mterp/arm64/op_rem_double_2addr.S
new file mode 100644
index 0000000..db18aa7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rem_double_2addr.S
@@ -0,0 +1,12 @@
+    /* rem vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE d1, w1                // d1<- vB
+    GET_VREG_WIDE d0, w2                // d0<- vA
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    bl fmod
+    ubfx    w2, wINST, #8, #4           // w2<- A (need to reload - killed across call)
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w2                // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
diff --git a/runtime/interpreter/mterp/arm64/op_rem_float.S b/runtime/interpreter/mterp/arm64/op_rem_float.S
new file mode 100644
index 0000000..73f7060
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rem_float.S
@@ -0,0 +1,2 @@
+/* EABI doesn't define a float remainder function, but libm does */
+%include "arm64/fbinop.S" {"instr":"bl      fmodf"}
diff --git a/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S b/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S
new file mode 100644
index 0000000..0b91891
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S
@@ -0,0 +1,13 @@
+    /* rem vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w9, wINST, #8               // w9<- A+
+    and     w9, w9, #15                 // w9<- A
+    GET_VREG s1, w3
+    GET_VREG s0, w9
+    bl  fmodf
+    lsr     w9, wINST, #8               // w9<- A+
+    and     w9, w9, #15                 // w9<- A
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG s0, w9
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_rem_int.S b/runtime/interpreter/mterp/arm64/op_rem_int.S
new file mode 100644
index 0000000..dd9dfda
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rem_int.S
@@ -0,0 +1 @@
+%include "arm64/binop.S" {"preinstr":"sdiv     w2, w0, w1", "instr":"msub w0, w2, w1, w0", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm64/op_rem_int_2addr.S b/runtime/interpreter/mterp/arm64/op_rem_int_2addr.S
new file mode 100644
index 0000000..57fc4971
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rem_int_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binop2addr.S" {"preinstr":"sdiv     w2, w0, w1", "instr":"msub w0, w2, w1, w0", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm64/op_rem_int_lit16.S b/runtime/interpreter/mterp/arm64/op_rem_int_lit16.S
new file mode 100644
index 0000000..b51a739
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rem_int_lit16.S
@@ -0,0 +1 @@
+%include "arm64/binopLit16.S" {"preinstr":"sdiv w3, w0, w1", "instr":"msub w0, w3, w1, w0", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm64/op_rem_int_lit8.S b/runtime/interpreter/mterp/arm64/op_rem_int_lit8.S
new file mode 100644
index 0000000..03ea324
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rem_int_lit8.S
@@ -0,0 +1 @@
+%include "arm64/binopLit8.S" {"preinstr":"sdiv w3, w0, w1", "instr":"msub w0, w3, w1, w0", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm64/op_rem_long.S b/runtime/interpreter/mterp/arm64/op_rem_long.S
new file mode 100644
index 0000000..f133f86
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rem_long.S
@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"preinstr":"sdiv x3, x1, x2","instr":"msub x0, x3, x2, x1", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm64/op_rem_long_2addr.S b/runtime/interpreter/mterp/arm64/op_rem_long_2addr.S
new file mode 100644
index 0000000..b45e2a9
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rem_long_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"preinstr":"sdiv x3, x0, x1", "instr":"msub x0, x3, x1, x0", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm64/op_return.S b/runtime/interpreter/mterp/arm64/op_return.S
new file mode 100644
index 0000000..28630ee
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_return.S
@@ -0,0 +1,19 @@
+    /*
+     * Return a 32-bit value.
+     *
+     * for: return, return-object
+     */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     x0, xSELF
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .L${opcode}_check
+.L${opcode}_return:
+    lsr     w2, wINST, #8               // r2<- AA
+    GET_VREG w0, w2                     // r0<- vAA
+    b       MterpReturn
+.L${opcode}_check:
+    bl      MterpSuspendCheck           // (self)
+    b       .L${opcode}_return
diff --git a/runtime/interpreter/mterp/arm64/op_return_object.S b/runtime/interpreter/mterp/arm64/op_return_object.S
new file mode 100644
index 0000000..b6cb532
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_return_object.S
@@ -0,0 +1 @@
+%include "arm64/op_return.S"
diff --git a/runtime/interpreter/mterp/arm64/op_return_void.S b/runtime/interpreter/mterp/arm64/op_return_void.S
new file mode 100644
index 0000000..3a5aa56
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_return_void.S
@@ -0,0 +1,12 @@
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     x0, xSELF
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .L${opcode}_check
+.L${opcode}_return:
+    mov     x0, #0
+    b       MterpReturn
+.L${opcode}_check:
+    bl      MterpSuspendCheck           // (self)
+    b       .L${opcode}_return
diff --git a/runtime/interpreter/mterp/arm64/op_return_void_no_barrier.S b/runtime/interpreter/mterp/arm64/op_return_void_no_barrier.S
new file mode 100644
index 0000000..1e06953
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_return_void_no_barrier.S
@@ -0,0 +1,10 @@
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     x0, xSELF
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .L${opcode}_check
+.L${opcode}_return:
+    mov     x0, #0
+    b       MterpReturn
+.L${opcode}_check:
+    bl      MterpSuspendCheck           // (self)
+    b       .L${opcode}_return
diff --git a/runtime/interpreter/mterp/arm64/op_return_wide.S b/runtime/interpreter/mterp/arm64/op_return_wide.S
new file mode 100644
index 0000000..c6e1d9d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_return_wide.S
@@ -0,0 +1,18 @@
+    /*
+     * Return a 64-bit value.
+     */
+    /* return-wide vAA */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     x0, xSELF
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .L${opcode}_check
+.L${opcode}_return:
+    lsr     w2, wINST, #8               // w2<- AA
+    GET_VREG_WIDE x0, w2                // x0<- vAA
+    b       MterpReturn
+.L${opcode}_check:
+    bl      MterpSuspendCheck           // (self)
+    b       .L${opcode}_return
diff --git a/runtime/interpreter/mterp/arm64/op_rsub_int.S b/runtime/interpreter/mterp/arm64/op_rsub_int.S
new file mode 100644
index 0000000..3bf45fe
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rsub_int.S
@@ -0,0 +1,2 @@
+/* this op is "rsub-int", but can be thought of as "rsub-int/lit16" */
+%include "arm64/binopLit16.S" {"instr":"sub     w0, w1, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_rsub_int_lit8.S b/runtime/interpreter/mterp/arm64/op_rsub_int_lit8.S
new file mode 100644
index 0000000..7a3572b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rsub_int_lit8.S
@@ -0,0 +1 @@
+%include "arm64/binopLit8.S" {"instr":"sub     w0, w1, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_sget.S b/runtime/interpreter/mterp/arm64/op_sget.S
new file mode 100644
index 0000000..6352ce0
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sget.S
@@ -0,0 +1,27 @@
+%default { "is_object":"0", "helper":"artGet32StaticFromCode", "extend":"" }
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field//BBBB */
+
+    .extern $helper
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- field ref BBBB
+    ldr   x1, [xFP, #OFF_FP_METHOD]
+    mov   x2, xSELF
+    bl    $helper
+    ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr   w2, wINST, #8                 // w2<- AA
+    $extend
+    PREFETCH_INST 2
+    cbnz  x3, MterpException            // bail out
+.if $is_object
+    SET_VREG_OBJECT w0, w2              // fp[AA]<- w0
+.else
+    SET_VREG w0, w2                     // fp[AA]<- w0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip
diff --git a/runtime/interpreter/mterp/arm64/op_sget_boolean.S b/runtime/interpreter/mterp/arm64/op_sget_boolean.S
new file mode 100644
index 0000000..c40dbdd
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sget_boolean.S
@@ -0,0 +1 @@
+%include "arm64/op_sget.S" {"helper":"artGetBooleanStaticFromCode", "extend":"uxtb w0, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_sget_byte.S b/runtime/interpreter/mterp/arm64/op_sget_byte.S
new file mode 100644
index 0000000..6cf69a3
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sget_byte.S
@@ -0,0 +1 @@
+%include "arm64/op_sget.S" {"helper":"artGetByteStaticFromCode", "extend":"sxtb w0, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_sget_char.S b/runtime/interpreter/mterp/arm64/op_sget_char.S
new file mode 100644
index 0000000..8924a34
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sget_char.S
@@ -0,0 +1 @@
+%include "arm64/op_sget.S" {"helper":"artGetCharStaticFromCode", "extend":"uxth w0, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_sget_object.S b/runtime/interpreter/mterp/arm64/op_sget_object.S
new file mode 100644
index 0000000..620b0ba
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sget_object.S
@@ -0,0 +1 @@
+%include "arm64/op_sget.S" {"is_object":"1", "helper":"artGetObjStaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm64/op_sget_short.S b/runtime/interpreter/mterp/arm64/op_sget_short.S
new file mode 100644
index 0000000..19dbba6
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sget_short.S
@@ -0,0 +1 @@
+%include "arm64/op_sget.S" {"helper":"artGetShortStaticFromCode", "extend":"sxth w0, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_sget_wide.S b/runtime/interpreter/mterp/arm64/op_sget_wide.S
new file mode 100644
index 0000000..287f66d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sget_wide.S
@@ -0,0 +1,19 @@
+    /*
+     * SGET_WIDE handler wrapper.
+     *
+     */
+    /* sget-wide vAA, field//BBBB */
+
+    .extern artGet64StaticFromCode
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- field ref BBBB
+    ldr   x1, [xFP, #OFF_FP_METHOD]
+    mov   x2, xSELF
+    bl    artGet64StaticFromCode
+    ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr   w4, wINST, #8                 // w4<- AA
+    cbnz  x3, MterpException            // bail out
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    SET_VREG_WIDE x0, w4
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_shl_int.S b/runtime/interpreter/mterp/arm64/op_shl_int.S
new file mode 100644
index 0000000..bd0f237
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_shl_int.S
@@ -0,0 +1 @@
+%include "arm64/binop.S" {"preinstr":"and     w1, w1, #31", "instr":"lsl     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S b/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S
new file mode 100644
index 0000000..b4671d2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binop2addr.S" {"preinstr":"and     w1, w1, #31", "instr":"lsl     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S b/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S
new file mode 100644
index 0000000..4dd32e0
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S
@@ -0,0 +1 @@
+%include "arm64/binopLit8.S" {"preinstr":"and     w1, w1, #31", "instr":"lsl     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shl_long.S b/runtime/interpreter/mterp/arm64/op_shl_long.S
new file mode 100644
index 0000000..bbf9600
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_shl_long.S
@@ -0,0 +1 @@
+%include "arm64/shiftWide.S" {"opcode":"lsl"}
diff --git a/runtime/interpreter/mterp/arm64/op_shl_long_2addr.S b/runtime/interpreter/mterp/arm64/op_shl_long_2addr.S
new file mode 100644
index 0000000..a5c4013
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_shl_long_2addr.S
@@ -0,0 +1 @@
+%include "arm64/shiftWide2addr.S" {"opcode":"lsl"}
diff --git a/runtime/interpreter/mterp/arm64/op_shr_int.S b/runtime/interpreter/mterp/arm64/op_shr_int.S
new file mode 100644
index 0000000..c214a18
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_shr_int.S
@@ -0,0 +1 @@
+%include "arm64/binop.S" {"preinstr":"and     w1, w1, #31", "instr":"asr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S b/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S
new file mode 100644
index 0000000..3c1484b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binop2addr.S" {"preinstr":"and     w1, w1, #31", "instr":"asr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S b/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S
new file mode 100644
index 0000000..26d5024
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S
@@ -0,0 +1 @@
+%include "arm64/binopLit8.S" {"preinstr":"and     w1, w1, #31", "instr":"asr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shr_long.S b/runtime/interpreter/mterp/arm64/op_shr_long.S
new file mode 100644
index 0000000..4d33235
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_shr_long.S
@@ -0,0 +1 @@
+%include "arm64/shiftWide.S" {"opcode":"asr"}
diff --git a/runtime/interpreter/mterp/arm64/op_shr_long_2addr.S b/runtime/interpreter/mterp/arm64/op_shr_long_2addr.S
new file mode 100644
index 0000000..0a4a386
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_shr_long_2addr.S
@@ -0,0 +1 @@
+%include "arm64/shiftWide2addr.S" {"opcode":"asr"}
diff --git a/runtime/interpreter/mterp/arm64/op_sparse_switch.S b/runtime/interpreter/mterp/arm64/op_sparse_switch.S
new file mode 100644
index 0000000..5a8d748
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sparse_switch.S
@@ -0,0 +1 @@
+%include "arm64/op_packed_switch.S" { "func":"MterpDoSparseSwitch" }
diff --git a/runtime/interpreter/mterp/arm64/op_sput.S b/runtime/interpreter/mterp/arm64/op_sput.S
new file mode 100644
index 0000000..75f27ab
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sput.S
@@ -0,0 +1,19 @@
+%default { "helper":"artSet32StaticFromCode"}
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field//BBBB */
+    EXPORT_PC
+    FETCH   w0, 1                       // r0<- field ref BBBB
+    lsr     w3, wINST, #8               // r3<- AA
+    GET_VREG w1, w3                     // r1<= fp[AA]
+    ldr     x2, [xFP, #OFF_FP_METHOD]
+    mov     x3, xSELF
+    PREFETCH_INST 2                     // Get next inst, but don't advance rPC
+    bl      $helper
+    cbnz    w0, MterpException          // 0 on success
+    ADVANCE 2                           // Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_sput_boolean.S b/runtime/interpreter/mterp/arm64/op_sput_boolean.S
new file mode 100644
index 0000000..11c55e5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sput_boolean.S
@@ -0,0 +1 @@
+%include "arm64/op_sput.S" {"helper":"artSet8StaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm64/op_sput_byte.S b/runtime/interpreter/mterp/arm64/op_sput_byte.S
new file mode 100644
index 0000000..11c55e5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sput_byte.S
@@ -0,0 +1 @@
+%include "arm64/op_sput.S" {"helper":"artSet8StaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm64/op_sput_char.S b/runtime/interpreter/mterp/arm64/op_sput_char.S
new file mode 100644
index 0000000..b4dd5aa
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sput_char.S
@@ -0,0 +1 @@
+%include "arm64/op_sput.S" {"helper":"artSet16StaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm64/op_sput_object.S b/runtime/interpreter/mterp/arm64/op_sput_object.S
new file mode 100644
index 0000000..c176da2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sput_object.S
@@ -0,0 +1,10 @@
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     x2, xINST
+    mov     x3, xSELF
+    bl      MterpSputObject
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_sput_short.S b/runtime/interpreter/mterp/arm64/op_sput_short.S
new file mode 100644
index 0000000..b4dd5aa
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sput_short.S
@@ -0,0 +1 @@
+%include "arm64/op_sput.S" {"helper":"artSet16StaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm64/op_sput_wide.S b/runtime/interpreter/mterp/arm64/op_sput_wide.S
new file mode 100644
index 0000000..1d034ec
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sput_wide.S
@@ -0,0 +1,18 @@
+    /*
+     * SPUT_WIDE handler wrapper.
+     *
+     */
+    /* sput-wide vAA, field//BBBB */
+    .extern artSet64IndirectStaticFromMterp
+    EXPORT_PC
+    FETCH   w0, 1                       // w0<- field ref BBBB
+    ldr     x1, [xFP, #OFF_FP_METHOD]
+    lsr     w2, wINST, #8               // w3<- AA
+    add     x2, xFP, w2, lsl #2
+    mov     x3, xSELF
+    PREFETCH_INST 2                     // Get next inst, but don't advance rPC
+    bl      artSet64IndirectStaticFromMterp
+    cbnz    w0, MterpException          // 0 on success, -1 on failure
+    ADVANCE 2                           // Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_sub_double.S b/runtime/interpreter/mterp/arm64/op_sub_double.S
new file mode 100644
index 0000000..e8e3401
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sub_double.S
@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"fsub d0, d1, d2", "result":"d0", "r1":"d1", "r2":"d2"}
diff --git a/runtime/interpreter/mterp/arm64/op_sub_double_2addr.S b/runtime/interpreter/mterp/arm64/op_sub_double_2addr.S
new file mode 100644
index 0000000..ddab55e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sub_double_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"fsub     d0, d0, d1", "r0":"d0", "r1":"d1"}
diff --git a/runtime/interpreter/mterp/arm64/op_sub_float.S b/runtime/interpreter/mterp/arm64/op_sub_float.S
new file mode 100644
index 0000000..227b15f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sub_float.S
@@ -0,0 +1 @@
+%include "arm64/fbinop.S" {"instr":"fsub   s0, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm64/op_sub_float_2addr.S b/runtime/interpreter/mterp/arm64/op_sub_float_2addr.S
new file mode 100644
index 0000000..19ac8d5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sub_float_2addr.S
@@ -0,0 +1 @@
+%include "arm64/fbinop2addr.S" {"instr":"fsub   s2, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm64/op_sub_int.S b/runtime/interpreter/mterp/arm64/op_sub_int.S
new file mode 100644
index 0000000..0e7ce0e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sub_int.S
@@ -0,0 +1 @@
+%include "arm64/binop.S" {"instr":"sub     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_sub_int_2addr.S b/runtime/interpreter/mterp/arm64/op_sub_int_2addr.S
new file mode 100644
index 0000000..d2c1bd3
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sub_int_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binop2addr.S" {"instr":"sub     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_sub_long.S b/runtime/interpreter/mterp/arm64/op_sub_long.S
new file mode 100644
index 0000000..263c70d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sub_long.S
@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"sub x0, x1, x2"}
diff --git a/runtime/interpreter/mterp/arm64/op_sub_long_2addr.S b/runtime/interpreter/mterp/arm64/op_sub_long_2addr.S
new file mode 100644
index 0000000..5be3772
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sub_long_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"sub     x0, x0, x1"}
diff --git a/runtime/interpreter/mterp/arm64/op_throw.S b/runtime/interpreter/mterp/arm64/op_throw.S
new file mode 100644
index 0000000..9a951af
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_throw.S
@@ -0,0 +1,10 @@
+    /*
+     * Throw an exception object in the current thread.
+     */
+    /* throw vAA */
+    EXPORT_PC
+    lsr      w2, wINST, #8               // r2<- AA
+    GET_VREG w1, w2                      // r1<- vAA (exception object)
+    cbz      w1, common_errNullObject
+    str      x1, [xSELF, #THREAD_EXCEPTION_OFFSET]  // thread->exception<- obj
+    b        MterpException
diff --git a/runtime/interpreter/mterp/arm64/op_unused_3e.S b/runtime/interpreter/mterp/arm64/op_unused_3e.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_3e.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_3f.S b/runtime/interpreter/mterp/arm64/op_unused_3f.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_3f.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_40.S b/runtime/interpreter/mterp/arm64/op_unused_40.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_40.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_41.S b/runtime/interpreter/mterp/arm64/op_unused_41.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_41.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_42.S b/runtime/interpreter/mterp/arm64/op_unused_42.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_42.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_43.S b/runtime/interpreter/mterp/arm64/op_unused_43.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_43.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_73.S b/runtime/interpreter/mterp/arm64/op_unused_73.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_73.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_79.S b/runtime/interpreter/mterp/arm64/op_unused_79.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_79.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_7a.S b/runtime/interpreter/mterp/arm64/op_unused_7a.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_7a.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_f3.S b/runtime/interpreter/mterp/arm64/op_unused_f3.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_f3.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_f4.S b/runtime/interpreter/mterp/arm64/op_unused_f4.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_f4.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_f5.S b/runtime/interpreter/mterp/arm64/op_unused_f5.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_f5.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_f6.S b/runtime/interpreter/mterp/arm64/op_unused_f6.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_f6.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_f7.S b/runtime/interpreter/mterp/arm64/op_unused_f7.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_f7.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_f8.S b/runtime/interpreter/mterp/arm64/op_unused_f8.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_f8.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_f9.S b/runtime/interpreter/mterp/arm64/op_unused_f9.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_f9.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_fa.S b/runtime/interpreter/mterp/arm64/op_unused_fa.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_fa.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_fb.S b/runtime/interpreter/mterp/arm64/op_unused_fb.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_fb.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_fc.S b/runtime/interpreter/mterp/arm64/op_unused_fc.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_fc.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_fd.S b/runtime/interpreter/mterp/arm64/op_unused_fd.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_fd.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_fe.S b/runtime/interpreter/mterp/arm64/op_unused_fe.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_fe.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_ff.S b/runtime/interpreter/mterp/arm64/op_unused_ff.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_ff.S
@@ -0,0 +1 @@
+%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_ushr_int.S b/runtime/interpreter/mterp/arm64/op_ushr_int.S
new file mode 100644
index 0000000..bb8382b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_ushr_int.S
@@ -0,0 +1 @@
+%include "arm64/binop.S" {"preinstr":"and     w1, w1, #31", "instr":"lsr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S b/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S
new file mode 100644
index 0000000..dbccb99
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binop2addr.S" {"preinstr":"and     w1, w1, #31", "instr":"lsr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S b/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S
new file mode 100644
index 0000000..35090c4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S
@@ -0,0 +1 @@
+%include "arm64/binopLit8.S" {"preinstr":"and     w1, w1, #31", "instr":"lsr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_ushr_long.S b/runtime/interpreter/mterp/arm64/op_ushr_long.S
new file mode 100644
index 0000000..e13c86a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_ushr_long.S
@@ -0,0 +1 @@
+%include "arm64/shiftWide.S" {"opcode":"lsr"}
diff --git a/runtime/interpreter/mterp/arm64/op_ushr_long_2addr.S b/runtime/interpreter/mterp/arm64/op_ushr_long_2addr.S
new file mode 100644
index 0000000..67ec91e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_ushr_long_2addr.S
@@ -0,0 +1 @@
+%include "arm64/shiftWide2addr.S" {"opcode":"lsr"}
diff --git a/runtime/interpreter/mterp/arm64/op_xor_int.S b/runtime/interpreter/mterp/arm64/op_xor_int.S
new file mode 100644
index 0000000..7483663
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_xor_int.S
@@ -0,0 +1 @@
+%include "arm64/binop.S" {"instr":"eor     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_xor_int_2addr.S b/runtime/interpreter/mterp/arm64/op_xor_int_2addr.S
new file mode 100644
index 0000000..2f9a2c7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_xor_int_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binop2addr.S" {"instr":"eor     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_xor_int_lit16.S b/runtime/interpreter/mterp/arm64/op_xor_int_lit16.S
new file mode 100644
index 0000000..6b72c56
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_xor_int_lit16.S
@@ -0,0 +1 @@
+%include "arm64/binopLit16.S" {"instr":"eor     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_xor_int_lit8.S b/runtime/interpreter/mterp/arm64/op_xor_int_lit8.S
new file mode 100644
index 0000000..6d187b5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_xor_int_lit8.S
@@ -0,0 +1 @@
+%include "arm64/binopLit8.S" {"instr":"eor     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_xor_long.S b/runtime/interpreter/mterp/arm64/op_xor_long.S
new file mode 100644
index 0000000..3880d5d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_xor_long.S
@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"eor x0, x1, x2"}
diff --git a/runtime/interpreter/mterp/arm64/op_xor_long_2addr.S b/runtime/interpreter/mterp/arm64/op_xor_long_2addr.S
new file mode 100644
index 0000000..3690552
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_xor_long_2addr.S
@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"eor     x0, x0, x1"}
diff --git a/runtime/interpreter/mterp/arm64/shiftWide.S b/runtime/interpreter/mterp/arm64/shiftWide.S
new file mode 100644
index 0000000..6306fca
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/shiftWide.S
@@ -0,0 +1,20 @@
+%default {"opcode":"shl"}
+    /*
+     * 64-bit shift operation.
+     *
+     * For: shl-long, shr-long, ushr-long
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr      w3, wINST, #8               // w3<- AA
+    lsr      w2, w0, #8                  // w2<- CC
+    GET_VREG w2, w2                     // w2<- vCC (shift count)
+    and      w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x1, w1                // x1<- vBB
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    and      x2, x2, #63                 // Mask low 6
+    $opcode  x0, x1, x2                 // Do the shift.
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w3                // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
diff --git a/runtime/interpreter/mterp/arm64/shiftWide2addr.S b/runtime/interpreter/mterp/arm64/shiftWide2addr.S
new file mode 100644
index 0000000..77d104a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/shiftWide2addr.S
@@ -0,0 +1,16 @@
+%default {"opcode":"lsl"}
+    /*
+     * Generic 64-bit shift operation.
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG w1, w1                     // x1<- vB
+    GET_VREG_WIDE x0, w2                // x0<- vA
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    and     x1, x1, #63                 // Mask low 6 bits.
+    $opcode x0, x0, x1
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
diff --git a/runtime/interpreter/mterp/arm64/unop.S b/runtime/interpreter/mterp/arm64/unop.S
new file mode 100644
index 0000000..474a961
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/unop.S
@@ -0,0 +1,20 @@
+%default {"preinstr":""}
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op w0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    GET_VREG w0, w3                     // w0<- vB
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    $preinstr                           // optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    $instr                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 8-9 instructions */
diff --git a/runtime/interpreter/mterp/arm64/unopWide.S b/runtime/interpreter/mterp/arm64/unopWide.S
new file mode 100644
index 0000000..109302a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/unopWide.S
@@ -0,0 +1,18 @@
+%default {"instr":"sub x0, xzr, x0", "preinstr":""}
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op x0".
+     *
+     * For: neg-long, not-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w4, wINST, #8, #4           // w4<- A
+    GET_VREG_WIDE x0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    $preinstr
+    $instr
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x0, w4
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-11 instructions */
diff --git a/runtime/interpreter/mterp/arm64/unused.S b/runtime/interpreter/mterp/arm64/unused.S
new file mode 100644
index 0000000..ffa00be
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/unused.S
@@ -0,0 +1,4 @@
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
diff --git a/runtime/interpreter/mterp/arm64/zcmp.S b/runtime/interpreter/mterp/arm64/zcmp.S
new file mode 100644
index 0000000..d4856d2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/zcmp.S
@@ -0,0 +1,33 @@
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+#if MTERP_SUSPEND
+    mov     w0, wINST, lsr #8           // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    cmp     w2, #0                      // compare (vA, 0)
+    mov${condition} w1, #2                 // w1<- inst branch dist for not-taken
+    adds    w1, w1, w1                  // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    lsr     w0, wINST, #8               // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     w0, #2                      // Branch offset if not taken
+    cmp     w2, #0                      // compare (vA, 0)
+    csel    w1, w1, w0, ${condition}    // Branch if true
+    adds    w2, w1, w1                  // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
diff --git a/runtime/interpreter/mterp/config_arm64 b/runtime/interpreter/mterp/config_arm64
index ef3c721..57206d2 100644
--- a/runtime/interpreter/mterp/config_arm64
+++ b/runtime/interpreter/mterp/config_arm64
@@ -1,3 +1,4 @@
+
 # Copyright (C) 2015 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -36,262 +37,262 @@
     # (override example:) op OP_SUB_FLOAT_2ADDR arm-vfp
     # (fallback example:) op OP_SUB_FLOAT_2ADDR FALLBACK
 
-    op op_nop FALLBACK
-    op op_move FALLBACK
-    op op_move_from16 FALLBACK
-    op op_move_16 FALLBACK
-    op op_move_wide FALLBACK
-    op op_move_wide_from16 FALLBACK
-    op op_move_wide_16 FALLBACK
-    op op_move_object FALLBACK
-    op op_move_object_from16 FALLBACK
-    op op_move_object_16 FALLBACK
-    op op_move_result FALLBACK
-    op op_move_result_wide FALLBACK
-    op op_move_result_object FALLBACK
-    op op_move_exception FALLBACK
-    op op_return_void FALLBACK
-    op op_return FALLBACK
-    op op_return_wide FALLBACK
-    op op_return_object FALLBACK
-    op op_const_4 FALLBACK
-    op op_const_16 FALLBACK
-    op op_const FALLBACK
-    op op_const_high16 FALLBACK
-    op op_const_wide_16 FALLBACK
-    op op_const_wide_32 FALLBACK
-    op op_const_wide FALLBACK
-    op op_const_wide_high16 FALLBACK
-    op op_const_string FALLBACK
-    op op_const_string_jumbo FALLBACK
-    op op_const_class FALLBACK
-    op op_monitor_enter FALLBACK
-    op op_monitor_exit FALLBACK
-    op op_check_cast FALLBACK
-    op op_instance_of FALLBACK
-    op op_array_length FALLBACK
-    op op_new_instance FALLBACK
-    op op_new_array FALLBACK
-    op op_filled_new_array FALLBACK
-    op op_filled_new_array_range FALLBACK
-    op op_fill_array_data FALLBACK
-    op op_throw FALLBACK
-    op op_goto FALLBACK
-    op op_goto_16 FALLBACK
-    op op_goto_32 FALLBACK
-    op op_packed_switch FALLBACK
-    op op_sparse_switch FALLBACK
-    op op_cmpl_float FALLBACK
-    op op_cmpg_float FALLBACK
-    op op_cmpl_double FALLBACK
-    op op_cmpg_double FALLBACK
-    op op_cmp_long FALLBACK
-    op op_if_eq FALLBACK
-    op op_if_ne FALLBACK
-    op op_if_lt FALLBACK
-    op op_if_ge FALLBACK
-    op op_if_gt FALLBACK
-    op op_if_le FALLBACK
-    op op_if_eqz FALLBACK
-    op op_if_nez FALLBACK
-    op op_if_ltz FALLBACK
-    op op_if_gez FALLBACK
-    op op_if_gtz FALLBACK
-    op op_if_lez FALLBACK
-    op_unused_3e FALLBACK
-    op_unused_3f FALLBACK
-    op_unused_40 FALLBACK
-    op_unused_41 FALLBACK
-    op_unused_42 FALLBACK
-    op_unused_43 FALLBACK
-    op op_aget FALLBACK
-    op op_aget_wide FALLBACK
-    op op_aget_object FALLBACK
-    op op_aget_boolean FALLBACK
-    op op_aget_byte FALLBACK
-    op op_aget_char FALLBACK
-    op op_aget_short FALLBACK
-    op op_aput FALLBACK
-    op op_aput_wide FALLBACK
-    op op_aput_object FALLBACK
-    op op_aput_boolean FALLBACK
-    op op_aput_byte FALLBACK
-    op op_aput_char FALLBACK
-    op op_aput_short FALLBACK
-    op op_iget FALLBACK
-    op op_iget_wide FALLBACK
-    op op_iget_object FALLBACK
-    op op_iget_boolean FALLBACK
-    op op_iget_byte FALLBACK
-    op op_iget_char FALLBACK
-    op op_iget_short FALLBACK
-    op op_iput FALLBACK
-    op op_iput_wide FALLBACK
-    op op_iput_object FALLBACK
-    op op_iput_boolean FALLBACK
-    op op_iput_byte FALLBACK
-    op op_iput_char FALLBACK
-    op op_iput_short FALLBACK
-    op op_sget FALLBACK
-    op op_sget_wide FALLBACK
-    op op_sget_object FALLBACK
-    op op_sget_boolean FALLBACK
-    op op_sget_byte FALLBACK
-    op op_sget_char FALLBACK
-    op op_sget_short FALLBACK
-    op op_sput FALLBACK
-    op op_sput_wide FALLBACK
-    op op_sput_object FALLBACK
-    op op_sput_boolean FALLBACK
-    op op_sput_byte FALLBACK
-    op op_sput_char FALLBACK
-    op op_sput_short FALLBACK
-    op op_invoke_virtual FALLBACK
-    op op_invoke_super FALLBACK
-    op op_invoke_direct FALLBACK
-    op op_invoke_static FALLBACK
-    op op_invoke_interface FALLBACK
-    op op_return_void_no_barrier FALLBACK
-    op op_invoke_virtual_range FALLBACK
-    op op_invoke_super_range FALLBACK
-    op op_invoke_direct_range FALLBACK
-    op op_invoke_static_range FALLBACK
-    op op_invoke_interface_range FALLBACK
-    op_unused_79 FALLBACK
-    op_unused_7a FALLBACK
-    op op_neg_int FALLBACK
-    op op_not_int FALLBACK
-    op op_neg_long FALLBACK
-    op op_not_long FALLBACK
-    op op_neg_float FALLBACK
-    op op_neg_double FALLBACK
-    op op_int_to_long FALLBACK
-    op op_int_to_float FALLBACK
-    op op_int_to_double FALLBACK
-    op op_long_to_int FALLBACK
-    op op_long_to_float FALLBACK
-    op op_long_to_double FALLBACK
-    op op_float_to_int FALLBACK
-    op op_float_to_long FALLBACK
-    op op_float_to_double FALLBACK
-    op op_double_to_int FALLBACK
-    op op_double_to_long FALLBACK
-    op op_double_to_float FALLBACK
-    op op_int_to_byte FALLBACK
-    op op_int_to_char FALLBACK
-    op op_int_to_short FALLBACK
-    op op_add_int FALLBACK
-    op op_sub_int FALLBACK
-    op op_mul_int FALLBACK
-    op op_div_int FALLBACK
-    op op_rem_int FALLBACK
-    op op_and_int FALLBACK
-    op op_or_int FALLBACK
-    op op_xor_int FALLBACK
-    op op_shl_int FALLBACK
-    op op_shr_int FALLBACK
-    op op_ushr_int FALLBACK
-    op op_add_long FALLBACK
-    op op_sub_long FALLBACK
-    op op_mul_long FALLBACK
-    op op_div_long FALLBACK
-    op op_rem_long FALLBACK
-    op op_and_long FALLBACK
-    op op_or_long FALLBACK
-    op op_xor_long FALLBACK
-    op op_shl_long FALLBACK
-    op op_shr_long FALLBACK
-    op op_ushr_long FALLBACK
-    op op_add_float FALLBACK
-    op op_sub_float FALLBACK
-    op op_mul_float FALLBACK
-    op op_div_float FALLBACK
-    op op_rem_float FALLBACK
-    op op_add_double FALLBACK
-    op op_sub_double FALLBACK
-    op op_mul_double FALLBACK
-    op op_div_double FALLBACK
-    op op_rem_double FALLBACK
-    op op_add_int_2addr FALLBACK
-    op op_sub_int_2addr FALLBACK
-    op op_mul_int_2addr FALLBACK
-    op op_div_int_2addr FALLBACK
-    op op_rem_int_2addr FALLBACK
-    op op_and_int_2addr FALLBACK
-    op op_or_int_2addr FALLBACK
-    op op_xor_int_2addr FALLBACK
-    op op_shl_int_2addr FALLBACK
-    op op_shr_int_2addr FALLBACK
-    op op_ushr_int_2addr FALLBACK
-    op op_add_long_2addr FALLBACK
-    op op_sub_long_2addr FALLBACK
-    op op_mul_long_2addr FALLBACK
-    op op_div_long_2addr FALLBACK
-    op op_rem_long_2addr FALLBACK
-    op op_and_long_2addr FALLBACK
-    op op_or_long_2addr FALLBACK
-    op op_xor_long_2addr FALLBACK
-    op op_shl_long_2addr FALLBACK
-    op op_shr_long_2addr FALLBACK
-    op op_ushr_long_2addr FALLBACK
-    op op_add_float_2addr FALLBACK
-    op op_sub_float_2addr FALLBACK
-    op op_mul_float_2addr FALLBACK
-    op op_div_float_2addr FALLBACK
-    op op_rem_float_2addr FALLBACK
-    op op_add_double_2addr FALLBACK
-    op op_sub_double_2addr FALLBACK
-    op op_mul_double_2addr FALLBACK
-    op op_div_double_2addr FALLBACK
-    op op_rem_double_2addr FALLBACK
-    op op_add_int_lit16 FALLBACK
-    op op_rsub_int FALLBACK
-    op op_mul_int_lit16 FALLBACK
-    op op_div_int_lit16 FALLBACK
-    op op_rem_int_lit16 FALLBACK
-    op op_and_int_lit16 FALLBACK
-    op op_or_int_lit16 FALLBACK
-    op op_xor_int_lit16 FALLBACK
-    op op_add_int_lit8 FALLBACK
-    op op_rsub_int_lit8 FALLBACK
-    op op_mul_int_lit8 FALLBACK
-    op op_div_int_lit8 FALLBACK
-    op op_rem_int_lit8 FALLBACK
-    op op_and_int_lit8 FALLBACK
-    op op_or_int_lit8 FALLBACK
-    op op_xor_int_lit8 FALLBACK
-    op op_shl_int_lit8 FALLBACK
-    op op_shr_int_lit8 FALLBACK
-    op op_ushr_int_lit8 FALLBACK
-    op op_iget_quick FALLBACK
-    op op_iget_wide_quick FALLBACK
-    op op_iget_object_quick FALLBACK
-    op op_iput_quick FALLBACK
-    op op_iput_wide_quick FALLBACK
-    op op_iput_object_quick FALLBACK
-    op op_invoke_virtual_quick FALLBACK
-    op op_invoke_virtual_range_quick FALLBACK
-    op op_iput_boolean_quick FALLBACK
-    op op_iput_byte_quick FALLBACK
-    op op_iput_char_quick FALLBACK
-    op op_iput_short_quick FALLBACK
-    op op_iget_boolean_quick FALLBACK
-    op op_iget_byte_quick FALLBACK
-    op op_iget_char_quick FALLBACK
-    op op_iget_short_quick FALLBACK
-    op_unused_f3 FALLBACK
-    op_unused_f4 FALLBACK
-    op_unused_f5 FALLBACK
-    op_unused_f6 FALLBACK
-    op_unused_f7 FALLBACK
-    op_unused_f8 FALLBACK
-    op_unused_f9 FALLBACK
-    op_unused_fa FALLBACK
-    op_unused_fb FALLBACK
-    op_unused_fc FALLBACK
-    op_unused_fd FALLBACK
-    op_unused_fe FALLBACK
-    op_unused_ff FALLBACK
+    # op op_nop FALLBACK
+    # op op_move FALLBACK
+    # op op_move_from16 FALLBACK
+    # op op_move_16 FALLBACK
+    # op op_move_wide FALLBACK
+    # op op_move_wide_from16 FALLBACK
+    # op op_move_wide_16 FALLBACK
+    # op op_move_object FALLBACK
+    # op op_move_object_from16 FALLBACK
+    # op op_move_object_16 FALLBACK
+    # op op_move_result FALLBACK
+    # op op_move_result_wide FALLBACK
+    # op op_move_result_object FALLBACK
+    # op op_move_exception FALLBACK
+    # op op_return_void FALLBACK
+    # op op_return FALLBACK
+    # op op_return_wide FALLBACK
+    # op op_return_object FALLBACK
+    # op op_const_4 FALLBACK
+    # op op_const_16 FALLBACK
+    # op op_const FALLBACK
+    # op op_const_high16 FALLBACK
+    # op op_const_wide_16 FALLBACK
+    # op op_const_wide_32 FALLBACK
+    # op op_const_wide FALLBACK
+    # op op_const_wide_high16 FALLBACK
+    # op op_const_string FALLBACK
+    # op op_const_string_jumbo FALLBACK
+    # op op_const_class FALLBACK
+    # op op_monitor_enter FALLBACK
+    # op op_monitor_exit FALLBACK
+    # op op_check_cast FALLBACK
+    # op op_instance_of FALLBACK
+    # op op_array_length FALLBACK
+    # op op_new_instance FALLBACK
+    # op op_new_array FALLBACK
+    # op op_filled_new_array FALLBACK
+    # op op_filled_new_array_range FALLBACK
+    # op op_fill_array_data FALLBACK
+    # op op_throw FALLBACK
+    # op op_goto FALLBACK
+    # op op_goto_16 FALLBACK
+    # op op_goto_32 FALLBACK
+    # op op_packed_switch FALLBACK
+    # op op_sparse_switch FALLBACK
+    # op op_cmpl_float FALLBACK
+    # op op_cmpg_float FALLBACK
+    # op op_cmpl_double FALLBACK
+    # op op_cmpg_double FALLBACK
+    # op op_cmp_long FALLBACK
+    # op op_if_eq FALLBACK
+    # op op_if_ne FALLBACK
+    # op op_if_lt FALLBACK
+    # op op_if_ge FALLBACK
+    # op op_if_gt FALLBACK
+    # op op_if_le FALLBACK
+    # op op_if_eqz FALLBACK
+    # op op_if_nez FALLBACK
+    # op op_if_ltz FALLBACK
+    # op op_if_gez FALLBACK
+    # op op_if_gtz FALLBACK
+    # op op_if_lez FALLBACK
+    # op op_unused_3e FALLBACK
+    # op op_unused_3f FALLBACK
+    # op op_unused_40 FALLBACK
+    # op op_unused_41 FALLBACK
+    # op op_unused_42 FALLBACK
+    # op op_unused_43 FALLBACK
+    # op op_aget FALLBACK
+    # op op_aget_wide FALLBACK
+    # op op_aget_object FALLBACK
+    # op op_aget_boolean FALLBACK
+    # op op_aget_byte FALLBACK
+    # op op_aget_char FALLBACK
+    # op op_aget_short FALLBACK
+    # op op_aput FALLBACK
+    # op op_aput_wide FALLBACK
+    # op op_aput_object FALLBACK
+    # op op_aput_boolean FALLBACK
+    # op op_aput_byte FALLBACK
+    # op op_aput_char FALLBACK
+    # op op_aput_short FALLBACK
+    # op op_iget FALLBACK
+    # op op_iget_wide FALLBACK
+    # op op_iget_object FALLBACK
+    # op op_iget_boolean FALLBACK
+    # op op_iget_byte FALLBACK
+    # op op_iget_char FALLBACK
+    # op op_iget_short FALLBACK
+    # op op_iput FALLBACK
+    # op op_iput_wide FALLBACK
+    # op op_iput_object FALLBACK
+    # op op_iput_boolean FALLBACK
+    # op op_iput_byte FALLBACK
+    # op op_iput_char FALLBACK
+    # op op_iput_short FALLBACK
+    # op op_sget FALLBACK
+    # op op_sget_wide FALLBACK
+    # op op_sget_object FALLBACK
+    # op op_sget_boolean FALLBACK
+    # op op_sget_byte FALLBACK
+    # op op_sget_char FALLBACK
+    # op op_sget_short FALLBACK
+    # op op_sput FALLBACK
+    # op op_sput_wide FALLBACK
+    # op op_sput_object FALLBACK
+    # op op_sput_boolean FALLBACK
+    # op op_sput_byte FALLBACK
+    # op op_sput_char FALLBACK
+    # op op_sput_short FALLBACK
+    # op op_invoke_virtual FALLBACK
+    # op op_invoke_super FALLBACK
+    # op op_invoke_direct FALLBACK
+    # op op_invoke_static FALLBACK
+    # op op_invoke_interface FALLBACK
+    # op op_return_void_no_barrier FALLBACK
+    # op op_invoke_virtual_range FALLBACK
+    # op op_invoke_super_range FALLBACK
+    # op op_invoke_direct_range FALLBACK
+    # op op_invoke_static_range FALLBACK
+    # op op_invoke_interface_range FALLBACK
+    # op op_unused_79 FALLBACK
+    # op op_unused_7a FALLBACK
+    # op op_neg_int FALLBACK
+    # op op_not_int FALLBACK
+    # op op_neg_long FALLBACK
+    # op op_not_long FALLBACK
+    # op op_neg_float FALLBACK
+    # op op_neg_double FALLBACK
+    # op op_int_to_long FALLBACK
+    # op op_int_to_float FALLBACK
+    # op op_int_to_double FALLBACK
+    # op op_long_to_int FALLBACK
+    # op op_long_to_float FALLBACK
+    # op op_long_to_double FALLBACK
+    # op op_float_to_int FALLBACK
+    # op op_float_to_long FALLBACK
+    # op op_float_to_double FALLBACK
+    # op op_double_to_int FALLBACK
+    # op op_double_to_long FALLBACK
+    # op op_double_to_float FALLBACK
+    # op op_int_to_byte FALLBACK
+    # op op_int_to_char FALLBACK
+    # op op_int_to_short FALLBACK
+    # op op_add_int FALLBACK
+    # op op_sub_int FALLBACK
+    # op op_mul_int FALLBACK
+    # op op_div_int FALLBACK
+    # op op_rem_int FALLBACK
+    # op op_and_int FALLBACK
+    # op op_or_int FALLBACK
+    # op op_xor_int FALLBACK
+    # op op_shl_int FALLBACK
+    # op op_shr_int FALLBACK
+    # op op_ushr_int FALLBACK
+    # op op_add_long FALLBACK
+    # op op_sub_long FALLBACK
+    # op op_mul_long FALLBACK
+    # op op_div_long FALLBACK
+    # op op_rem_long FALLBACK
+    # op op_and_long FALLBACK
+    # op op_or_long FALLBACK
+    # op op_xor_long FALLBACK
+    # op op_shl_long FALLBACK
+    # op op_shr_long FALLBACK
+    # op op_ushr_long FALLBACK
+    # op op_add_float FALLBACK
+    # op op_sub_float FALLBACK
+    # op op_mul_float FALLBACK
+    # op op_div_float FALLBACK
+    # op op_rem_float FALLBACK
+    # op op_add_double FALLBACK
+    # op op_sub_double FALLBACK
+    # op op_mul_double FALLBACK
+    # op op_div_double FALLBACK
+    # op op_rem_double FALLBACK
+    # op op_add_int_2addr FALLBACK
+    # op op_sub_int_2addr FALLBACK
+    # op op_mul_int_2addr FALLBACK
+    # op op_div_int_2addr FALLBACK
+    # op op_rem_int_2addr FALLBACK
+    # op op_and_int_2addr FALLBACK
+    # op op_or_int_2addr FALLBACK
+    # op op_xor_int_2addr FALLBACK
+    # op op_shl_int_2addr FALLBACK
+    # op op_shr_int_2addr FALLBACK
+    # op op_ushr_int_2addr FALLBACK
+    # op op_add_long_2addr FALLBACK
+    # op op_sub_long_2addr FALLBACK
+    # op op_mul_long_2addr FALLBACK
+    # op op_div_long_2addr FALLBACK
+    # op op_rem_long_2addr FALLBACK
+    # op op_and_long_2addr FALLBACK
+    # op op_or_long_2addr FALLBACK
+    # op op_xor_long_2addr FALLBACK
+    # op op_shl_long_2addr FALLBACK
+    # op op_shr_long_2addr FALLBACK
+    # op op_ushr_long_2addr FALLBACK
+    # op op_add_float_2addr FALLBACK
+    # op op_sub_float_2addr FALLBACK
+    # op op_mul_float_2addr FALLBACK
+    # op op_div_float_2addr FALLBACK
+    # op op_rem_float_2addr FALLBACK
+    # op op_add_double_2addr FALLBACK
+    # op op_sub_double_2addr FALLBACK
+    # op op_mul_double_2addr FALLBACK
+    # op op_div_double_2addr FALLBACK
+    # op op_rem_double_2addr FALLBACK
+    # op op_add_int_lit16 FALLBACK
+    # op op_rsub_int FALLBACK
+    # op op_mul_int_lit16 FALLBACK
+    # op op_div_int_lit16 FALLBACK
+    # op op_rem_int_lit16 FALLBACK
+    # op op_and_int_lit16 FALLBACK
+    # op op_or_int_lit16 FALLBACK
+    # op op_xor_int_lit16 FALLBACK
+    # op op_add_int_lit8 FALLBACK
+    # op op_rsub_int_lit8 FALLBACK
+    # op op_mul_int_lit8 FALLBACK
+    # op op_div_int_lit8 FALLBACK
+    # op op_rem_int_lit8 FALLBACK
+    # op op_and_int_lit8 FALLBACK
+    # op op_or_int_lit8 FALLBACK
+    # op op_xor_int_lit8 FALLBACK
+    # op op_shl_int_lit8 FALLBACK
+    # op op_shr_int_lit8 FALLBACK
+    # op op_ushr_int_lit8 FALLBACK
+    # op op_iget_quick FALLBACK
+    # op op_iget_wide_quick FALLBACK
+    # op op_iget_object_quick FALLBACK
+    # op op_iput_quick FALLBACK
+    # op op_iput_wide_quick FALLBACK
+    # op op_iput_object_quick FALLBACK
+    # op op_invoke_virtual_quick FALLBACK
+    # op op_invoke_virtual_range_quick FALLBACK
+    # op op_iput_boolean_quick FALLBACK
+    # op op_iput_byte_quick FALLBACK
+    # op op_iput_char_quick FALLBACK
+    # op op_iput_short_quick FALLBACK
+    # op op_iget_boolean_quick FALLBACK
+    # op op_iget_byte_quick FALLBACK
+    # op op_iget_char_quick FALLBACK
+    # op op_iget_short_quick FALLBACK
+    op op_invoke_lambda FALLBACK
+    # op op_unused_f4 FALLBACK
+    op op_capture_variable FALLBACK
+    op op_create_lambda FALLBACK
+    op op_liberate_variable FALLBACK
+    op op_box_lambda FALLBACK
+    op op_unbox_lambda FALLBACK
+    # op op_unused_fa FALLBACK
+    # op op_unused_fb FALLBACK
+    # op op_unused_fc FALLBACK
+    # op op_unused_fd FALLBACK
+    # op op_unused_fe FALLBACK
+    # op op_unused_ff FALLBACK
 op-end
 
 # common subroutines for asm
diff --git a/runtime/interpreter/mterp/out/mterp_arm64.S b/runtime/interpreter/mterp/out/mterp_arm64.S
new file mode 100644
index 0000000..e9d28ab
--- /dev/null
+++ b/runtime/interpreter/mterp/out/mterp_arm64.S
@@ -0,0 +1,11726 @@
+/*
+ * This file was generated automatically by gen-mterp.py for 'arm64'.
+ *
+ * --> DO NOT EDIT <--
+ */
+
+/* File: arm64/header.S */
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+  Art assembly interpreter notes:
+
+  First validate assembly code by implementing ExecuteXXXImpl() style body (doesn't
+  handle invoke, allows higher-level code to create frame & shadow frame.
+
+  Once that's working, support direct entry code & eliminate shadow frame (and
+  excess locals allocation.
+
+  Some (hopefully) temporary ugliness.  We'll treat xFP as pointing to the
+  base of the vreg array within the shadow frame.  Access the other fields,
+  dex_pc_, method_ and number_of_vregs_ via negative offsets.  For now, we'll continue
+  the shadow frame mechanism of double-storing object references - via xFP &
+  number_of_vregs_.
+
+ */
+
+/*
+ARM64 Runtime register usage conventions.
+
+  r0     : w0 is 32-bit return register and x0 is 64-bit.
+  r0-r7  : Argument registers.
+  r8-r15 : Caller save registers (used as temporary registers).
+  r16-r17: Also known as ip0-ip1, respectively. Used as scratch registers by
+           the linker, by the trampolines and other stubs (the backend uses
+           these as temporary registers).
+  r18    : Caller save register (used as temporary register).
+  r19    : Pointer to thread-local storage.
+  r20-r29: Callee save registers.
+  r30    : (lr) is reserved (the link register).
+  rsp    : (sp) is reserved (the stack pointer).
+  rzr    : (zr) is reserved (the zero register).
+
+  Floating-point registers
+  v0-v31
+
+  v0     : s0 is return register for singles (32-bit) and d0 for doubles (64-bit).
+           This is analogous to the C/C++ (hard-float) calling convention.
+  v0-v7  : Floating-point argument registers in both Dalvik and C/C++ conventions.
+           Also used as temporary and codegen scratch registers.
+
+  v0-v7 and v16-v31 : trashed across C calls.
+  v8-v15 : bottom 64-bits preserved across C calls (d8-d15 are preserved).
+
+  v16-v31: Used as codegen temp/scratch.
+  v8-v15 : Can be used for promotion.
+
+  Must maintain 16-byte stack alignment.
+
+Mterp notes:
+
+The following registers have fixed assignments:
+
+  reg nick      purpose
+  x20  xPC       interpreted program counter, used for fetching instructions
+  x21  xFP       interpreted frame pointer, used for accessing locals and args
+  x22  xSELF     self (Thread) pointer
+  x23  xINST     first 16-bit code unit of current instruction
+  x24  xIBASE    interpreted instruction base pointer, used for computed goto
+  x25  xREFS     base of object references in shadow frame  (ideally, we'll get rid of this later).
+  x16  ip        scratch reg
+  x17  ip2       scratch reg (used by macros)
+
+Macros are provided for common operations.  They MUST NOT alter unspecified registers or condition
+codes.
+*/
+
+/*
+ * This is a #include, not a %include, because we want the C pre-processor
+ * to expand the macros into assembler assignment statements.
+ */
+#include "asm_support.h"
+
+/* During bringup, we'll use the shadow frame model instead of xFP */
+/* single-purpose registers, given names for clarity */
+#define xPC     x20
+#define xFP     x21
+#define xSELF   x22
+#define xINST   x23
+#define wINST   w23
+#define xIBASE  x24
+#define xREFS   x25
+#define ip      x16
+#define ip2     x17
+
+/*
+ * Instead of holding a pointer to the shadow frame, we keep xFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+
+/*
+ *
+ * The reference interpreter performs explicit suspect checks, which is somewhat wasteful.
+ * Dalvik's interpreter folded suspend checks into the jump table mechanism, and eventually
+ * mterp should do so as well.
+ */
+#define MTERP_SUSPEND 0
+
+/*
+ * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
+ * be done *before* something throws.
+ *
+ * It's okay to do this more than once.
+ *
+ * NOTE: the fast interpreter keeps track of dex pc as a direct pointer to the mapped
+ * dex byte codes.  However, the rest of the runtime expects dex pc to be an instruction
+ * offset into the code_items_[] array.  For effiency, we will "export" the
+ * current dex pc as a direct pointer using the EXPORT_PC macro, and rely on GetDexPC
+ * to convert to a dex pc when needed.
+ */
+.macro EXPORT_PC
+    str  xPC, [xFP, #OFF_FP_DEX_PC_PTR]
+.endm
+
+/*
+ * Fetch the next instruction from xPC into wINST.  Does not advance xPC.
+ */
+.macro FETCH_INST
+    ldrh    wINST, [xPC]
+.endm
+
+/*
+ * Fetch the next instruction from the specified offset.  Advances xPC
+ * to point to the next instruction.  "_count" is in 16-bit code units.
+ *
+ * Because of the limited size of immediate constants on ARM, this is only
+ * suitable for small forward movements (i.e. don't try to implement "goto"
+ * with this).
+ *
+ * This must come AFTER anything that can throw an exception, or the
+ * exception catch may miss.  (This also implies that it must come after
+ * EXPORT_PC.)
+ */
+.macro FETCH_ADVANCE_INST count
+    ldrh    wINST, [xPC, #((\count)*2)]!
+.endm
+
+/*
+ * The operation performed here is similar to FETCH_ADVANCE_INST, except the
+ * src and dest registers are parameterized (not hard-wired to xPC and xINST).
+ */
+.macro PREFETCH_ADVANCE_INST dreg, sreg, count
+    ldrh    \dreg, [\sreg, #((\count)*2)]!
+.endm
+
+/*
+ * Similar to FETCH_ADVANCE_INST, but does not update xPC.  Used to load
+ * xINST ahead of possible exception point.  Be sure to manually advance xPC
+ * later.
+ */
+.macro PREFETCH_INST count
+    ldrh    wINST, [xPC, #((\count)*2)]
+.endm
+
+/* Advance xPC by some number of code units. */
+.macro ADVANCE count
+  add  xPC, xPC, #((\count)*2)
+.endm
+
+/*
+ * Fetch the next instruction from an offset specified by _reg and advance xPC.
+ * xPC to point to the next instruction.  "_reg" must specify the distance
+ * in bytes, *not* 16-bit code units, and may be a signed value.  Must not set flags.
+ *
+ */
+.macro FETCH_ADVANCE_INST_RB reg
+    add     xPC, xPC, \reg, sxtw
+    ldrh    wINST, [xPC]
+.endm
+
+/*
+ * Fetch a half-word code unit from an offset past the current PC.  The
+ * "_count" value is in 16-bit code units.  Does not advance xPC.
+ *
+ * The "_S" variant works the same but treats the value as signed.
+ */
+.macro FETCH reg, count
+    ldrh    \reg, [xPC, #((\count)*2)]
+.endm
+
+.macro FETCH_S reg, count
+    ldrsh   \reg, [xPC, #((\count)*2)]
+.endm
+
+/*
+ * Fetch one byte from an offset past the current PC.  Pass in the same
+ * "_count" as you would for FETCH, and an additional 0/1 indicating which
+ * byte of the halfword you want (lo/hi).
+ */
+.macro FETCH_B reg, count, byte
+    ldrb     \reg, [xPC, #((\count)*2+(\byte))]
+.endm
+
+/*
+ * Put the instruction's opcode field into the specified register.
+ */
+.macro GET_INST_OPCODE reg
+    and     \reg, xINST, #255
+.endm
+
+/*
+ * Put the prefetched instruction's opcode field into the specified register.
+ */
+.macro GET_PREFETCHED_OPCODE oreg, ireg
+    and     \oreg, \ireg, #255
+.endm
+
+/*
+ * Begin executing the opcode in _reg.  Clobbers reg
+ */
+
+.macro GOTO_OPCODE reg
+    add     \reg, xIBASE, \reg, lsl #7
+    br      \reg
+.endm
+.macro GOTO_OPCODE_BASE base,reg
+    add     \reg, \base, \reg, lsl #7
+    br      \reg
+.endm
+
+/*
+ * Get/set the 32-bit value from a Dalvik register.
+ */
+.macro GET_VREG reg, vreg
+    ldr     \reg, [xFP, \vreg, uxtw #2]
+.endm
+.macro SET_VREG reg, vreg
+    str     \reg, [xFP, \vreg, uxtw #2]
+    str     wzr, [xREFS, \vreg, uxtw #2]
+.endm
+.macro SET_VREG_OBJECT reg, vreg, tmpreg
+    str     \reg, [xFP, \vreg, uxtw #2]
+    str     \reg, [xREFS, \vreg, uxtw #2]
+.endm
+
+/*
+ * Get/set the 64-bit value from a Dalvik register.
+ * TUNING: can we do better here?
+ */
+.macro GET_VREG_WIDE reg, vreg
+    add     ip2, xFP, \vreg, lsl #2
+    ldr     \reg, [ip2]
+.endm
+.macro SET_VREG_WIDE reg, vreg
+    add     ip2, xFP, \vreg, lsl #2
+    str     \reg, [ip2]
+    add     ip2, xREFS, \vreg, lsl #2
+    str     xzr, [ip2]
+.endm
+
+/*
+ * Convert a virtual register index into an address.
+ */
+.macro VREG_INDEX_TO_ADDR reg, vreg
+    add     \reg, xFP, \vreg, lsl #2   /* WARNING/FIXME: handle shadow frame vreg zero if store */
+.endm
+
+/*
+ * Refresh handler table.
+ */
+.macro REFRESH_IBASE
+  ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
+.endm
+
+/* File: arm64/entry.S */
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+    .text
+
+/*
+ * Interpreter entry point.
+ * On entry:
+ *  x0  Thread* self/
+ *  x1  code_item
+ *  x2  ShadowFrame
+ *  x3  JValue* result_register
+ *
+ */
+    .global ExecuteMterpImpl
+    .type   ExecuteMterpImpl, %function
+    .balign 16
+
+ExecuteMterpImpl:
+    .cfi_startproc
+    stp     xIBASE, xREFS, [sp, #-64]!
+    stp     xSELF, xINST, [sp, #16]
+    stp     xPC, xFP, [sp, #32]
+    stp     fp, lr, [sp, #48]
+    add     fp, sp, #48
+
+    /* Remember the return register */
+    str     x3, [x2, #SHADOWFRAME_RESULT_REGISTER_OFFSET]
+
+    /* Remember the code_item */
+    str     x1, [x2, #SHADOWFRAME_CODE_ITEM_OFFSET]
+
+    /* set up "named" registers */
+    mov     xSELF, x0
+    ldr     w0, [x2, #SHADOWFRAME_NUMBER_OF_VREGS_OFFSET]
+    add     xFP, x2, #SHADOWFRAME_VREGS_OFFSET     // point to insns[] (i.e. - the dalivk byte code).
+    add     xREFS, xFP, w0, lsl #2                 // point to reference array in shadow frame
+    ldr     w0, [x2, #SHADOWFRAME_DEX_PC_OFFSET]   // Get starting dex_pc.
+    add     xPC, x1, #CODEITEM_INSNS_OFFSET        // Point to base of insns[]
+    add     xPC, xPC, w0, lsl #1                   // Create direct pointer to 1st dex opcode
+    EXPORT_PC
+
+    /* Starting ibase */
+    ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
+
+    /* start executing the instruction at rPC */
+    FETCH_INST                          // load wINST from rPC
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* NOTE: no fallthrough */
+
+
+    .global artMterpAsmInstructionStart
+    .type   artMterpAsmInstructionStart, %function
+artMterpAsmInstructionStart = .L_op_nop
+    .text
+
+/* ------------------------------ */
+    .balign 128
+.L_op_nop: /* 0x00 */
+/* File: arm64/op_nop.S */
+    FETCH_ADVANCE_INST 1                // advance to next instr, load rINST
+    GET_INST_OPCODE ip                  // ip<- opcode from rINST
+    GOTO_OPCODE ip                      // execute it
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move: /* 0x01 */
+/* File: arm64/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    lsr     w1, wINST, #12              // x1<- B from 15:12
+    ubfx    w0, wINST, #8, #4           // x0<- A from 11:8
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    GET_VREG w2, w1                     // x2<- fp[B]
+    GET_INST_OPCODE ip                  // ip<- opcode from wINST
+    .if 0
+    SET_VREG_OBJECT w2, w0              // fp[A]<- x2
+    .else
+    SET_VREG w2, w0                     // fp[A]<- x2
+    .endif
+    GOTO_OPCODE ip                      // execute next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_from16: /* 0x02 */
+/* File: arm64/op_move_from16.S */
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    FETCH w1, 1                         // r1<- BBBB
+    lsr     w0, wINST, #8               // r0<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    GET_VREG w2, w1                     // r2<- fp[BBBB]
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    .if 0
+    SET_VREG_OBJECT w2, w0              // fp[AA]<- r2
+    .else
+    SET_VREG w2, w0                     // fp[AA]<- r2
+    .endif
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_16: /* 0x03 */
+/* File: arm64/op_move_16.S */
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    FETCH w1, 2                         // w1<- BBBB
+    FETCH w0, 1                         // w0<- AAAA
+    FETCH_ADVANCE_INST 3                // advance xPC, load xINST
+    GET_VREG w2, w1                     // w2<- fp[BBBB]
+    GET_INST_OPCODE ip                  // extract opcode from xINST
+    .if 0
+    SET_VREG_OBJECT w2, w0              // fp[AAAA]<- w2
+    .else
+    SET_VREG w2, w0                     // fp[AAAA]<- w2
+    .endif
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide: /* 0x04 */
+/* File: arm64/op_move_wide.S */
+    /* move-wide vA, vB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE  x3, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE  x3, w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide_from16: /* 0x05 */
+/* File: arm64/op_move_wide_from16.S */
+    /* move-wide/from16 vAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    FETCH w3, 1                         // w3<- BBBB
+    lsr     w2, wINST, #8               // w2<- AA
+    GET_VREG_WIDE x3, w3
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x3, w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide_16: /* 0x06 */
+/* File: arm64/op_move_wide_16.S */
+    /* move-wide/16 vAAAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    FETCH w3, 2                         // w3<- BBBB
+    FETCH w2, 1                         // w2<- AAAA
+    GET_VREG_WIDE x3, w3
+    FETCH_ADVANCE_INST 3                // advance rPC, load rINST
+    SET_VREG_WIDE x3, w2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object: /* 0x07 */
+/* File: arm64/op_move_object.S */
+/* File: arm64/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    lsr     w1, wINST, #12              // x1<- B from 15:12
+    ubfx    w0, wINST, #8, #4           // x0<- A from 11:8
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    GET_VREG w2, w1                     // x2<- fp[B]
+    GET_INST_OPCODE ip                  // ip<- opcode from wINST
+    .if 1
+    SET_VREG_OBJECT w2, w0              // fp[A]<- x2
+    .else
+    SET_VREG w2, w0                     // fp[A]<- x2
+    .endif
+    GOTO_OPCODE ip                      // execute next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object_from16: /* 0x08 */
+/* File: arm64/op_move_object_from16.S */
+/* File: arm64/op_move_from16.S */
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    FETCH w1, 1                         // r1<- BBBB
+    lsr     w0, wINST, #8               // r0<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    GET_VREG w2, w1                     // r2<- fp[BBBB]
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    .if 1
+    SET_VREG_OBJECT w2, w0              // fp[AA]<- r2
+    .else
+    SET_VREG w2, w0                     // fp[AA]<- r2
+    .endif
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object_16: /* 0x09 */
+/* File: arm64/op_move_object_16.S */
+/* File: arm64/op_move_16.S */
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    FETCH w1, 2                         // w1<- BBBB
+    FETCH w0, 1                         // w0<- AAAA
+    FETCH_ADVANCE_INST 3                // advance xPC, load xINST
+    GET_VREG w2, w1                     // w2<- fp[BBBB]
+    GET_INST_OPCODE ip                  // extract opcode from xINST
+    .if 1
+    SET_VREG_OBJECT w2, w0              // fp[AAAA]<- w2
+    .else
+    SET_VREG w2, w0                     // fp[AAAA]<- w2
+    .endif
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result: /* 0x0a */
+/* File: arm64/op_move_result.S */
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    lsr     w2, wINST, #8               // r2<- AA
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    ldr     x0, [xFP, #OFF_FP_RESULT_REGISTER]  // get pointer to result JType.
+    ldr     w0, [x0]                    // r0 <- result.i.
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    .if 0
+    SET_VREG_OBJECT w0, w2, w1          // fp[AA]<- r0
+    .else
+    SET_VREG w0, w2                     // fp[AA]<- r0
+    .endif
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result_wide: /* 0x0b */
+/* File: arm64/op_move_result_wide.S */
+    /* for: move-result-wide */
+    /* op vAA */
+    lsr     w2, wINST, #8               // r2<- AA
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    ldr     x0, [xFP, #OFF_FP_RESULT_REGISTER]  // get pointer to result JType.
+    ldr     x0, [x0]                    // r0 <- result.i.
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x0, x2                // fp[AA]<- r0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result_object: /* 0x0c */
+/* File: arm64/op_move_result_object.S */
+/* File: arm64/op_move_result.S */
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    lsr     w2, wINST, #8               // r2<- AA
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    ldr     x0, [xFP, #OFF_FP_RESULT_REGISTER]  // get pointer to result JType.
+    ldr     w0, [x0]                    // r0 <- result.i.
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    .if 1
+    SET_VREG_OBJECT w0, w2, w1          // fp[AA]<- r0
+    .else
+    SET_VREG w0, w2                     // fp[AA]<- r0
+    .endif
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_exception: /* 0x0d */
+/* File: arm64/op_move_exception.S */
+    /* move-exception vAA */
+    lsr     w2, wINST, #8               // w2<- AA
+    ldr     x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    mov     x1, #0                      // w1<- 0
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    SET_VREG_OBJECT w3, w2              // fp[AA]<- exception obj
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    str     x1, [xSELF, #THREAD_EXCEPTION_OFFSET]  // clear exception
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_void: /* 0x0e */
+/* File: arm64/op_return_void.S */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     x0, xSELF
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .Lop_return_void_check
+.Lop_return_void_return:
+    mov     x0, #0
+    b       MterpReturn
+.Lop_return_void_check:
+    bl      MterpSuspendCheck           // (self)
+    b       .Lop_return_void_return
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return: /* 0x0f */
+/* File: arm64/op_return.S */
+    /*
+     * Return a 32-bit value.
+     *
+     * for: return, return-object
+     */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     x0, xSELF
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .Lop_return_check
+.Lop_return_return:
+    lsr     w2, wINST, #8               // r2<- AA
+    GET_VREG w0, w2                     // r0<- vAA
+    b       MterpReturn
+.Lop_return_check:
+    bl      MterpSuspendCheck           // (self)
+    b       .Lop_return_return
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_wide: /* 0x10 */
+/* File: arm64/op_return_wide.S */
+    /*
+     * Return a 64-bit value.
+     */
+    /* return-wide vAA */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     x0, xSELF
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .Lop_return_wide_check
+.Lop_return_wide_return:
+    lsr     w2, wINST, #8               // w2<- AA
+    GET_VREG_WIDE x0, w2                // x0<- vAA
+    b       MterpReturn
+.Lop_return_wide_check:
+    bl      MterpSuspendCheck           // (self)
+    b       .Lop_return_wide_return
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_object: /* 0x11 */
+/* File: arm64/op_return_object.S */
+/* File: arm64/op_return.S */
+    /*
+     * Return a 32-bit value.
+     *
+     * for: return, return-object
+     */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     x0, xSELF
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .Lop_return_object_check
+.Lop_return_object_return:
+    lsr     w2, wINST, #8               // r2<- AA
+    GET_VREG w0, w2                     // r0<- vAA
+    b       MterpReturn
+.Lop_return_object_check:
+    bl      MterpSuspendCheck           // (self)
+    b       .Lop_return_object_return
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_4: /* 0x12 */
+/* File: arm64/op_const_4.S */
+    /* const/4 vA, #+B */
+    lsl     w1, wINST, #16              // w1<- Bxxx0000
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    FETCH_ADVANCE_INST 1                // advance xPC, load wINST
+    asr     w1, w1, #28                 // w1<- sssssssB (sign-extended)
+    GET_INST_OPCODE ip                  // ip<- opcode from xINST
+    SET_VREG w1, w0                     // fp[A]<- w1
+    GOTO_OPCODE ip                      // execute next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_16: /* 0x13 */
+/* File: arm64/op_const_16.S */
+    /* const/16 vAA, #+BBBB */
+    FETCH_S w0, 1                       // w0<- ssssBBBB (sign-extended
+    lsr     w3, wINST, #8               // w3<- AA
+    FETCH_ADVANCE_INST 2                // advance xPC, load wINST
+    SET_VREG w0, w3                     // vAA<- w0
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const: /* 0x14 */
+/* File: arm64/op_const.S */
+    /* const vAA, #+BBBBbbbb */
+    lsr     w3, wINST, #8               // w3<- AA
+    FETCH w0, 1                         // w0<- bbbb (low
+    FETCH w1, 2                         // w1<- BBBB (high
+    FETCH_ADVANCE_INST 3                // advance rPC, load wINST
+    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG w0, w3                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_high16: /* 0x15 */
+/* File: arm64/op_const_high16.S */
+    /* const/high16 vAA, #+BBBB0000 */
+    FETCH   w0, 1                       // r0<- 0000BBBB (zero-extended
+    lsr     w3, wINST, #8               // r3<- AA
+    lsl     w0, w0, #16                 // r0<- BBBB0000
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    SET_VREG w0, w3                     // vAA<- r0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_16: /* 0x16 */
+/* File: arm64/op_const_wide_16.S */
+    /* const-wide/16 vAA, #+BBBB */
+    FETCH_S w0, 1                       // w0<- ssssBBBB (sign-extended
+    lsr     w3, wINST, #8               // w3<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    sbfm    x0, x0, 0, 31
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w3
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_32: /* 0x17 */
+/* File: arm64/op_const_wide_32.S */
+    /* const-wide/32 vAA, #+BBBBbbbb */
+    FETCH w0, 1                         // w0<- 0000bbbb (low)
+    lsr     w3, wINST, #8               // w3<- AA
+    FETCH_S w2, 2                       // w2<- ssssBBBB (high)
+    FETCH_ADVANCE_INST 3                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    orr     w0, w0, w2, lsl #16         // w0<- BBBBbbbb
+    sbfm    x0, x0, 0, 31
+    SET_VREG_WIDE x0, w3
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide: /* 0x18 */
+/* File: arm64/op_const_wide.S */
+    /* const-wide vAA, #+HHHHhhhhBBBBbbbb */
+    FETCH w0, 1                         // w0<- bbbb (low)
+    FETCH w1, 2                         // w1<- BBBB (low middle)
+    FETCH w2, 3                         // w2<- hhhh (high middle)
+    FETCH w3, 4                         // w3<- HHHH (high)
+    lsr     w4, wINST, #8               // r4<- AA
+    FETCH_ADVANCE_INST 5                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    orr     w0, w0, w1, lsl #16         // w0<-         BBBBbbbb
+    orr     x0, x0, x2, lsl #32         // w0<-     hhhhBBBBbbbb
+    orr     x0, x0, x3, lsl #48         // w0<- HHHHhhhhBBBBbbbb
+    SET_VREG_WIDE x0, w4
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_high16: /* 0x19 */
+/* File: arm64/op_const_wide_high16.S */
+    /* const-wide/high16 vAA, #+BBBB000000000000 */
+    FETCH w0, 1                         // w0<- 0000BBBB (zero-extended)
+    lsr     w1, wINST, #8               // w1<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    lsl     x0, x0, #48
+    SET_VREG_WIDE x0, w1
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_string: /* 0x1a */
+/* File: arm64/op_const_string.S */
+    /* const/string vAA, String//BBBB */
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- BBBB
+    lsr     w1, wINST, #8               // w1<- AA
+    add     x2, xFP, #OFF_FP_SHADOWFRAME
+    mov     x3, xSELF
+    bl      MterpConstString            // (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 2                     // load rINST
+    cbnz    w0, MterpPossibleException  // let reference interpreter deal with it.
+    ADVANCE 2                           // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_string_jumbo: /* 0x1b */
+/* File: arm64/op_const_string_jumbo.S */
+    /* const/string vAA, String//BBBBBBBB */
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- bbbb (low
+    FETCH w2, 2                         // w2<- BBBB (high
+    lsr     w1, wINST, #8               // w1<- AA
+    orr     w0, w0, w2, lsl #16         // w1<- BBBBbbbb
+    add     x2, xFP, #OFF_FP_SHADOWFRAME
+    mov     x3, xSELF
+    bl      MterpConstString            // (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 3                     // advance rPC
+    cbnz    w0, MterpPossibleException      // let reference interpreter deal with it.
+    ADVANCE 3                           // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_class: /* 0x1c */
+/* File: arm64/op_const_class.S */
+    /* const/class vAA, Class//BBBB */
+    EXPORT_PC
+    FETCH   w0, 1                       // w0<- BBBB
+    lsr     w1, wINST, #8               // w1<- AA
+    add     x2, xFP, #OFF_FP_SHADOWFRAME
+    mov     x3, xSELF
+    bl      MterpConstClass             // (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 2
+    cbnz    w0, MterpPossibleException
+    ADVANCE 2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_monitor_enter: /* 0x1d */
+/* File: arm64/op_monitor_enter.S */
+    /*
+     * Synchronize on an object.
+     */
+    /* monitor-enter vAA */
+    EXPORT_PC
+    lsr      w2, wINST, #8               // w2<- AA
+    GET_VREG w0, w2                      // w0<- vAA (object)
+    mov      x1, xSELF                   // w1<- self
+    bl       artLockObjectFromCode
+    cbnz     w0, MterpException
+    FETCH_ADVANCE_INST 1
+    GET_INST_OPCODE ip                   // extract opcode from rINST
+    GOTO_OPCODE ip                       // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_monitor_exit: /* 0x1e */
+/* File: arm64/op_monitor_exit.S */
+    /*
+     * Unlock an object.
+     *
+     * Exceptions that occur when unlocking a monitor need to appear as
+     * if they happened at the following instruction.  See the Dalvik
+     * instruction spec.
+     */
+    /* monitor-exit vAA */
+    EXPORT_PC
+    lsr      w2, wINST, #8              // w2<- AA
+    GET_VREG w0, w2                     // w0<- vAA (object)
+    mov      x1, xSELF                  // w0<- self
+    bl       artUnlockObjectFromCode    // w0<- success for unlock(self, obj)
+    cbnz     w0, MterpException
+    FETCH_ADVANCE_INST 1                // before throw: advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_check_cast: /* 0x1f */
+/* File: arm64/op_check_cast.S */
+    /*
+     * Check to see if a cast from one class to another is allowed.
+     */
+    /* check-cast vAA, class//BBBB */
+    EXPORT_PC
+    FETCH    w0, 1                      // w0<- BBBB
+    lsr      w1, wINST, #8              // w1<- AA
+    VREG_INDEX_TO_ADDR x1, w1           // w1<- &object
+    ldr      x2, [xFP, #OFF_FP_METHOD]  // w2<- method
+    mov      x3, xSELF                  // w3<- self
+    bl       MterpCheckCast             // (index, &obj, method, self)
+    PREFETCH_INST 2
+    cbnz     w0, MterpPossibleException
+    ADVANCE  2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_instance_of: /* 0x20 */
+/* File: arm64/op_instance_of.S */
+    /*
+     * Check to see if an object reference is an instance of a class.
+     *
+     * Most common situation is a non-null object, being compared against
+     * an already-resolved class.
+     */
+    /* instance-of vA, vB, class//CCCC */
+    EXPORT_PC
+    FETCH     w0, 1                     // w0<- CCCC
+    lsr       w1, wINST, #12            // w1<- B
+    VREG_INDEX_TO_ADDR x1, w1           // w1<- &object
+    ldr       x2, [xFP, #OFF_FP_METHOD] // w2<- method
+    mov       x3, xSELF                 // w3<- self
+    bl        MterpInstanceOf           // (index, &obj, method, self)
+    ldr       x1, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr       w2, wINST, #8             // w2<- A+
+    and       w2, w2, #15               // w2<- A
+    PREFETCH_INST 2
+    cbnz      x1, MterpException
+    ADVANCE 2                           // advance rPC
+    SET_VREG w0, w2                     // vA<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_array_length: /* 0x21 */
+/* File: arm64/op_array_length.S */
+    /*
+     * Return the length of an array.
+     */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG w0, w1                     // w0<- vB (object ref)
+    cbz     w0, common_errNullObject    // yup, fail
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- array length
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w3, w2                     // vB<- length
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_new_instance: /* 0x22 */
+/* File: arm64/op_new_instance.S */
+    /*
+     * Create a new instance of a class.
+     */
+    /* new-instance vAA, class//BBBB */
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xSELF
+    mov     w2, wINST
+    bl      MterpNewInstance           // (shadow_frame, self, inst_data)
+    cbz     w0, MterpPossibleException
+    FETCH_ADVANCE_INST 2               // advance rPC, load rINST
+    GET_INST_OPCODE ip                 // extract opcode from rINST
+    GOTO_OPCODE ip                     // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_new_array: /* 0x23 */
+/* File: arm64/op_new_array.S */
+    /*
+     * Allocate an array of objects, specified with the array class
+     * and a count.
+     *
+     * The verifier guarantees that this is an array class, so we don't
+     * check for it here.
+     */
+    /* new-array vA, vB, class//CCCC */
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     w2, wINST
+    mov     x3, xSELF
+    bl      MterpNewArray
+    cbz     w0, MterpPossibleException
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_filled_new_array: /* 0x24 */
+/* File: arm64/op_filled_new_array.S */
+    /*
+     * Create a new array with elements filled from registers.
+     *
+     * for: filled-new-array, filled-new-array/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class//CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type//BBBB */
+    .extern MterpFilledNewArray
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     x2, xSELF
+    bl      MterpFilledNewArray
+    cbz     w0, MterpPossibleException
+    FETCH_ADVANCE_INST 3                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_filled_new_array_range: /* 0x25 */
+/* File: arm64/op_filled_new_array_range.S */
+/* File: arm64/op_filled_new_array.S */
+    /*
+     * Create a new array with elements filled from registers.
+     *
+     * for: filled-new-array, filled-new-array/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class//CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type//BBBB */
+    .extern MterpFilledNewArrayRange
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     x2, xSELF
+    bl      MterpFilledNewArrayRange
+    cbz     w0, MterpPossibleException
+    FETCH_ADVANCE_INST 3                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_fill_array_data: /* 0x26 */
+/* File: arm64/op_fill_array_data.S */
+    /* fill-array-data vAA, +BBBBBBBB */
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- bbbb (lo)
+    FETCH w1, 2                         // w1<- BBBB (hi)
+    lsr     w3, wINST, #8               // w3<- AA
+    orr     w1, w0, w1, lsl #16         // w1<- BBBBbbbb
+    GET_VREG w0, w3                     // w0<- vAA (array object)
+    add     x1, xPC, w1, lsl #1         // w1<- PC + BBBBbbbb*2 (array data off.)
+    bl      MterpFillArrayData          // (obj, payload)
+    cbz     w0, MterpPossibleException      // exception?
+    FETCH_ADVANCE_INST 3                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_throw: /* 0x27 */
+/* File: arm64/op_throw.S */
+    /*
+     * Throw an exception object in the current thread.
+     */
+    /* throw vAA */
+    EXPORT_PC
+    lsr      w2, wINST, #8               // r2<- AA
+    GET_VREG w1, w2                      // r1<- vAA (exception object)
+    cbz      w1, common_errNullObject
+    str      x1, [xSELF, #THREAD_EXCEPTION_OFFSET]  // thread->exception<- obj
+    b        MterpException
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto: /* 0x28 */
+/* File: arm64/op_goto.S */
+    /*
+     * Unconditional branch, 8-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     */
+    /* goto +AA */
+    /* tuning: use sbfx for 6t2+ targets */
+#if MTERP_SUSPEND
+    mov     w0, wINST, lsl #16          // w0<- AAxx0000
+    movs    w1, w0, asr #24             // w1<- ssssssAA (sign-extended)
+    add     w2, w1, w1                  // w2<- byte offset, set flags
+       // If backwards branch refresh rIBASE
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]  // Preload flags for MterpCheckSuspendAndContinue
+    lsl     w0, wINST, #16              // w0<- AAxx0000
+    asr     w0, w0, #24                 // w0<- ssssssAA (sign-extended)
+    adds    w1, w0, w0                  // Convert dalvik offset to byte offset, setting flags
+    FETCH_ADVANCE_INST_RB w1            // load wINST and advance xPC
+       // If backwards branch refresh rIBASE
+    b.mi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto_16: /* 0x29 */
+/* File: arm64/op_goto_16.S */
+    /*
+     * Unconditional branch, 16-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     */
+    /* goto/16 +AAAA */
+#if MTERP_SUSPEND
+    FETCH_S w0, 1                       // w0<- ssssAAAA (sign-extended)
+    adds    w1, w0, w0                  // w1<- byte offset, flags set
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load rINST
+    ldrmi   xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    FETCH_S w0, 1                       // w0<- ssssAAAA (sign-extended)
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w1, w0, w0                  // w1<- byte offset, flags set
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load rINST
+    b.mi    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto_32: /* 0x2a */
+/* File: arm64/op_goto_32.S */
+    /*
+     * Unconditional branch, 32-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     *
+     * Unlike most opcodes, this one is allowed to branch to itself, so
+     * our "backward branch" test must be "<=0" instead of "<0".  Because
+     * we need the V bit set, we'll use an adds to convert from Dalvik
+     * offset to byte offset.
+     */
+    /* goto/32 +AAAAAAAA */
+#if MTERP_SUSPEND
+    FETCH w0, 1                         // w0<- aaaa (lo)
+    FETCH w1, 2                         // w1<- AAAA (hi)
+    orr     w0, w0, w1, lsl #16         // w0<- AAAAaaaa
+    adds    w1, w0, w0                  // w1<- byte offset
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load xINST
+    ldrle   xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
+    GET_INST_OPCODE ip                  // extract opcode from xINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    FETCH w0, 1                         // w0<- aaaa (lo)
+    FETCH w1, 2                         // w1<- AAAA (hi)
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    orr     w0, w0, w1, lsl #16         // w0<- AAAAaaaa
+    adds    w1, w0, w0                  // w1<- byte offset
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load xINST
+    b.le    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from xINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+/* ------------------------------ */
+    .balign 128
+.L_op_packed_switch: /* 0x2b */
+/* File: arm64/op_packed_switch.S */
+    /*
+     * Handle a packed-switch or sparse-switch instruction.  In both cases
+     * we decode it and hand it off to a helper function.
+     *
+     * We don't really expect backward branches in a switch statement, but
+     * they're perfectly legal, so we check for them here.
+     *
+     * for: packed-switch, sparse-switch
+     */
+    /* op vAA, +BBBB */
+#if MTERP_SUSPEND
+    FETCH w0, 1                         // w0<- bbbb (lo)
+    FETCH w1, 2                         // w1<- BBBB (hi)
+    mov     w3, wINST, lsr #8           // w3<- AA
+    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
+    GET_VREG w1, w3                     // w1<- vAA
+    add     w0, rPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
+    bl      MterpDoPackedSwitch                       // w0<- code-unit branch offset
+    adds    w1, w0, w0                  // w1<- byte offset; clear V
+    ldrle   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    FETCH w0, 1                         // w0<- bbbb (lo)
+    FETCH w1, 2                         // w1<- BBBB (hi)
+    lsr     w3, wINST, #8               // w3<- AA
+    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
+    GET_VREG w1, w3                     // w1<- vAA
+    add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
+    bl      MterpDoPackedSwitch                       // w0<- code-unit branch offset
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w1, w0, w0                  // w1<- byte offset; clear V
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    b.le    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sparse_switch: /* 0x2c */
+/* File: arm64/op_sparse_switch.S */
+/* File: arm64/op_packed_switch.S */
+    /*
+     * Handle a packed-switch or sparse-switch instruction.  In both cases
+     * we decode it and hand it off to a helper function.
+     *
+     * We don't really expect backward branches in a switch statement, but
+     * they're perfectly legal, so we check for them here.
+     *
+     * for: packed-switch, sparse-switch
+     */
+    /* op vAA, +BBBB */
+#if MTERP_SUSPEND
+    FETCH w0, 1                         // w0<- bbbb (lo)
+    FETCH w1, 2                         // w1<- BBBB (hi)
+    mov     w3, wINST, lsr #8           // w3<- AA
+    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
+    GET_VREG w1, w3                     // w1<- vAA
+    add     w0, rPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
+    bl      MterpDoSparseSwitch                       // w0<- code-unit branch offset
+    adds    w1, w0, w0                  // w1<- byte offset; clear V
+    ldrle   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    FETCH w0, 1                         // w0<- bbbb (lo)
+    FETCH w1, 2                         // w1<- BBBB (hi)
+    lsr     w3, wINST, #8               // w3<- AA
+    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
+    GET_VREG w1, w3                     // w1<- vAA
+    add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
+    bl      MterpDoSparseSwitch                       // w0<- code-unit branch offset
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w1, w0, w0                  // w1<- byte offset; clear V
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    b.le    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpl_float: /* 0x2d */
+/* File: arm64/op_cmpl_float.S */
+/* File: arm64/fcmp.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG s1, w2
+    GET_VREG s2, w3
+    mov     w0, #-1
+    fcmp s1, s2
+    csneg w0, w0, w0, le
+    csel w0, wzr, w0, eq
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w4                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpg_float: /* 0x2e */
+/* File: arm64/op_cmpg_float.S */
+/* File: arm64/fcmp.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG s1, w2
+    GET_VREG s2, w3
+    mov     w0, #1
+    fcmp s1, s2
+    csneg w0, w0, w0, pl
+    csel w0, wzr, w0, eq
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w4                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpl_double: /* 0x2f */
+/* File: arm64/op_cmpl_double.S */
+/* File: arm64/fcmp.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG_WIDE d1, w2
+    GET_VREG_WIDE d2, w3
+    mov     w0, #-1
+    fcmp d1, d2
+    csneg w0, w0, w0, le
+    csel w0, wzr, w0, eq
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w4                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpg_double: /* 0x30 */
+/* File: arm64/op_cmpg_double.S */
+/* File: arm64/fcmp.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG_WIDE d1, w2
+    GET_VREG_WIDE d2, w3
+    mov     w0, #1
+    fcmp d1, d2
+    csneg w0, w0, w0, pl
+    csel w0, wzr, w0, eq
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w4                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmp_long: /* 0x31 */
+/* File: arm64/op_cmp_long.S */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG_WIDE x1, w2
+    GET_VREG_WIDE x2, w3
+    cmp     x1, x2
+    csinc   w0, wzr, wzr, eq
+    csneg   w0, w0, w0, ge
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    SET_VREG w0, w4
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_eq: /* 0x32 */
+/* File: arm64/op_if_eq.S */
+/* File: arm64/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+#if MTERP_SUSPEND
+    mov     w1, wINST, lsr #12          // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    cmp     w2, w3                      // compare (vA, vB)
+    moveq w1, #2                 // w1<- BYTE branch dist for not-taken
+    adds    w2, w1, w1                  // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     w0, #2                      // Offset if branch not taken
+    cmp     w2, w3                      // compare (vA, vB)
+    csel    w1, w1, w0, eq    // Branch if true
+    adds    w2, w1, w1                  // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ne: /* 0x33 */
+/* File: arm64/op_if_ne.S */
+/* File: arm64/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+#if MTERP_SUSPEND
+    mov     w1, wINST, lsr #12          // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    cmp     w2, w3                      // compare (vA, vB)
+    movne w1, #2                 // w1<- BYTE branch dist for not-taken
+    adds    w2, w1, w1                  // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     w0, #2                      // Offset if branch not taken
+    cmp     w2, w3                      // compare (vA, vB)
+    csel    w1, w1, w0, ne    // Branch if true
+    adds    w2, w1, w1                  // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_lt: /* 0x34 */
+/* File: arm64/op_if_lt.S */
+/* File: arm64/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+#if MTERP_SUSPEND
+    mov     w1, wINST, lsr #12          // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    cmp     w2, w3                      // compare (vA, vB)
+    movlt w1, #2                 // w1<- BYTE branch dist for not-taken
+    adds    w2, w1, w1                  // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     w0, #2                      // Offset if branch not taken
+    cmp     w2, w3                      // compare (vA, vB)
+    csel    w1, w1, w0, lt    // Branch if true
+    adds    w2, w1, w1                  // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ge: /* 0x35 */
+/* File: arm64/op_if_ge.S */
+/* File: arm64/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+#if MTERP_SUSPEND
+    mov     w1, wINST, lsr #12          // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    cmp     w2, w3                      // compare (vA, vB)
+    movge w1, #2                 // w1<- BYTE branch dist for not-taken
+    adds    w2, w1, w1                  // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     w0, #2                      // Offset if branch not taken
+    cmp     w2, w3                      // compare (vA, vB)
+    csel    w1, w1, w0, ge    // Branch if true
+    adds    w2, w1, w1                  // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gt: /* 0x36 */
+/* File: arm64/op_if_gt.S */
+/* File: arm64/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+#if MTERP_SUSPEND
+    mov     w1, wINST, lsr #12          // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    cmp     w2, w3                      // compare (vA, vB)
+    movgt w1, #2                 // w1<- BYTE branch dist for not-taken
+    adds    w2, w1, w1                  // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     w0, #2                      // Offset if branch not taken
+    cmp     w2, w3                      // compare (vA, vB)
+    csel    w1, w1, w0, gt    // Branch if true
+    adds    w2, w1, w1                  // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_le: /* 0x37 */
+/* File: arm64/op_if_le.S */
+/* File: arm64/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+#if MTERP_SUSPEND
+    mov     w1, wINST, lsr #12          // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    cmp     w2, w3                      // compare (vA, vB)
+    movle w1, #2                 // w1<- BYTE branch dist for not-taken
+    adds    w2, w1, w1                  // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     w0, #2                      // Offset if branch not taken
+    cmp     w2, w3                      // compare (vA, vB)
+    csel    w1, w1, w0, le    // Branch if true
+    adds    w2, w1, w1                  // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_eqz: /* 0x38 */
+/* File: arm64/op_if_eqz.S */
+/* File: arm64/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+#if MTERP_SUSPEND
+    mov     w0, wINST, lsr #8           // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    cmp     w2, #0                      // compare (vA, 0)
+    moveq w1, #2                 // w1<- inst branch dist for not-taken
+    adds    w1, w1, w1                  // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    lsr     w0, wINST, #8               // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     w0, #2                      // Branch offset if not taken
+    cmp     w2, #0                      // compare (vA, 0)
+    csel    w1, w1, w0, eq    // Branch if true
+    adds    w2, w1, w1                  // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_nez: /* 0x39 */
+/* File: arm64/op_if_nez.S */
+/* File: arm64/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+#if MTERP_SUSPEND
+    mov     w0, wINST, lsr #8           // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    cmp     w2, #0                      // compare (vA, 0)
+    movne w1, #2                 // w1<- inst branch dist for not-taken
+    adds    w1, w1, w1                  // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    lsr     w0, wINST, #8               // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     w0, #2                      // Branch offset if not taken
+    cmp     w2, #0                      // compare (vA, 0)
+    csel    w1, w1, w0, ne    // Branch if true
+    adds    w2, w1, w1                  // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ltz: /* 0x3a */
+/* File: arm64/op_if_ltz.S */
+/* File: arm64/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+#if MTERP_SUSPEND
+    mov     w0, wINST, lsr #8           // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    cmp     w2, #0                      // compare (vA, 0)
+    movlt w1, #2                 // w1<- inst branch dist for not-taken
+    adds    w1, w1, w1                  // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    lsr     w0, wINST, #8               // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     w0, #2                      // Branch offset if not taken
+    cmp     w2, #0                      // compare (vA, 0)
+    csel    w1, w1, w0, lt    // Branch if true
+    adds    w2, w1, w1                  // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gez: /* 0x3b */
+/* File: arm64/op_if_gez.S */
+/* File: arm64/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+#if MTERP_SUSPEND
+    mov     w0, wINST, lsr #8           // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    cmp     w2, #0                      // compare (vA, 0)
+    movge w1, #2                 // w1<- inst branch dist for not-taken
+    adds    w1, w1, w1                  // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    lsr     w0, wINST, #8               // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     w0, #2                      // Branch offset if not taken
+    cmp     w2, #0                      // compare (vA, 0)
+    csel    w1, w1, w0, ge    // Branch if true
+    adds    w2, w1, w1                  // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gtz: /* 0x3c */
+/* File: arm64/op_if_gtz.S */
+/* File: arm64/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+#if MTERP_SUSPEND
+    mov     w0, wINST, lsr #8           // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    cmp     w2, #0                      // compare (vA, 0)
+    movgt w1, #2                 // w1<- inst branch dist for not-taken
+    adds    w1, w1, w1                  // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    lsr     w0, wINST, #8               // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     w0, #2                      // Branch offset if not taken
+    cmp     w2, #0                      // compare (vA, 0)
+    csel    w1, w1, w0, gt    // Branch if true
+    adds    w2, w1, w1                  // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_lez: /* 0x3d */
+/* File: arm64/op_if_lez.S */
+/* File: arm64/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+#if MTERP_SUSPEND
+    mov     w0, wINST, lsr #8           // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    cmp     w2, #0                      // compare (vA, 0)
+    movle w1, #2                 // w1<- inst branch dist for not-taken
+    adds    w1, w1, w1                  // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    lsr     w0, wINST, #8               // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     w0, #2                      // Branch offset if not taken
+    cmp     w2, #0                      // compare (vA, 0)
+    csel    w1, w1, w0, le    // Branch if true
+    adds    w2, w1, w1                  // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_3e: /* 0x3e */
+/* File: arm64/op_unused_3e.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_3f: /* 0x3f */
+/* File: arm64/op_unused_3f.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_40: /* 0x40 */
+/* File: arm64/op_unused_40.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_41: /* 0x41 */
+/* File: arm64/op_unused_41.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_42: /* 0x42 */
+/* File: arm64/op_unused_42.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_43: /* 0x43 */
+/* File: arm64/op_unused_43.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget: /* 0x44 */
+/* File: arm64/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     x0, common_errNullObject    // bail if null array object.
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- arrayObj->length
+    add     x0, x0, w1, uxtw #2    // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    ldr   w2, [x0, #MIRROR_INT_ARRAY_DATA_OFFSET]     // w2<- vBB[vCC]
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w2, w9                     // vAA<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_wide: /* 0x45 */
+/* File: arm64/op_aget_wide.S */
+    /*
+     * Array get, 64 bits.  vAA <- vBB[vCC].
+     *
+     */
+    /* aget-wide vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     w0, common_errNullObject        // yes, bail
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- arrayObj->length
+    add     x0, x0, w1, lsl #3          // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    ldr     x2, [x0, #MIRROR_WIDE_ARRAY_DATA_OFFSET]  // x2<- vBB[vCC]
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x2, w4
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_object: /* 0x46 */
+/* File: arm64/op_aget_object.S */
+    /*
+     * Array object get.  vAA <- vBB[vCC].
+     *
+     * for: aget-object
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    FETCH_B w3, 1, 1                    // w3<- CC
+    EXPORT_PC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    bl       artAGetObjectFromMterp     // (array, index)
+    ldr      x1, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr      w2, wINST, #8               // w9<- AA
+    PREFETCH_INST 2
+    cbnz     w1, MterpException
+    SET_VREG_OBJECT w0, w2
+    ADVANCE 2
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_boolean: /* 0x47 */
+/* File: arm64/op_aget_boolean.S */
+/* File: arm64/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     x0, common_errNullObject    // bail if null array object.
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- arrayObj->length
+    add     x0, x0, w1, uxtw #0    // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    ldrb   w2, [x0, #MIRROR_BOOLEAN_ARRAY_DATA_OFFSET]     // w2<- vBB[vCC]
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w2, w9                     // vAA<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_byte: /* 0x48 */
+/* File: arm64/op_aget_byte.S */
+/* File: arm64/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     x0, common_errNullObject    // bail if null array object.
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- arrayObj->length
+    add     x0, x0, w1, uxtw #0    // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    ldrsb   w2, [x0, #MIRROR_BYTE_ARRAY_DATA_OFFSET]     // w2<- vBB[vCC]
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w2, w9                     // vAA<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_char: /* 0x49 */
+/* File: arm64/op_aget_char.S */
+/* File: arm64/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     x0, common_errNullObject    // bail if null array object.
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- arrayObj->length
+    add     x0, x0, w1, uxtw #1    // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    ldrh   w2, [x0, #MIRROR_CHAR_ARRAY_DATA_OFFSET]     // w2<- vBB[vCC]
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w2, w9                     // vAA<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_short: /* 0x4a */
+/* File: arm64/op_aget_short.S */
+/* File: arm64/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     x0, common_errNullObject    // bail if null array object.
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- arrayObj->length
+    add     x0, x0, w1, uxtw #1    // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    ldrsh   w2, [x0, #MIRROR_SHORT_ARRAY_DATA_OFFSET]     // w2<- vBB[vCC]
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w2, w9                     // vAA<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput: /* 0x4b */
+/* File: arm64/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     w0, common_errNullObject    // bail if null
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]     // w3<- arrayObj->length
+    add     x0, x0, w1, lsl #2     // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_VREG w2, w9                     // w2<- vAA
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    str  w2, [x0, #MIRROR_INT_ARRAY_DATA_OFFSET]     // vBB[vCC]<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_wide: /* 0x4c */
+/* File: arm64/op_aput_wide.S */
+    /*
+     * Array put, 64 bits.  vBB[vCC] <- vAA.
+     *
+     */
+    /* aput-wide vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     w0, common_errNullObject    // bail if null
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- arrayObj->length
+    add     x0, x0, w1, lsl #3          // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    GET_VREG_WIDE x1, w4
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    str     x1, [x0, #MIRROR_WIDE_ARRAY_DATA_OFFSET]
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_object: /* 0x4d */
+/* File: arm64/op_aput_object.S */
+    /*
+     * Store an object into an array.  vBB[vCC] <- vAA.
+     */
+    /* op vAA, vBB, vCC */
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     w2, wINST
+    bl      MterpAputObject
+    cbz     w0, MterpPossibleException
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_boolean: /* 0x4e */
+/* File: arm64/op_aput_boolean.S */
+/* File: arm64/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     w0, common_errNullObject    // bail if null
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]     // w3<- arrayObj->length
+    add     x0, x0, w1, lsl #0     // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_VREG w2, w9                     // w2<- vAA
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    strb  w2, [x0, #MIRROR_BOOLEAN_ARRAY_DATA_OFFSET]     // vBB[vCC]<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_byte: /* 0x4f */
+/* File: arm64/op_aput_byte.S */
+/* File: arm64/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     w0, common_errNullObject    // bail if null
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]     // w3<- arrayObj->length
+    add     x0, x0, w1, lsl #0     // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_VREG w2, w9                     // w2<- vAA
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    strb  w2, [x0, #MIRROR_BYTE_ARRAY_DATA_OFFSET]     // vBB[vCC]<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_char: /* 0x50 */
+/* File: arm64/op_aput_char.S */
+/* File: arm64/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     w0, common_errNullObject    // bail if null
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]     // w3<- arrayObj->length
+    add     x0, x0, w1, lsl #1     // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_VREG w2, w9                     // w2<- vAA
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    strh  w2, [x0, #MIRROR_CHAR_ARRAY_DATA_OFFSET]     // vBB[vCC]<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_short: /* 0x51 */
+/* File: arm64/op_aput_short.S */
+/* File: arm64/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     w0, common_errNullObject    // bail if null
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]     // w3<- arrayObj->length
+    add     x0, x0, w1, lsl #1     // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_VREG w2, w9                     // w2<- vAA
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    strh  w2, [x0, #MIRROR_SHORT_ARRAY_DATA_OFFSET]     // vBB[vCC]<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget: /* 0x52 */
+/* File: arm64/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    w0, 1                         // w0<- field ref CCCC
+    lsr      w1, wINST, #12                // w1<- B
+    GET_VREG w1, w1                        // w1<- fp[B], the object pointer
+    ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
+    mov      x3, xSELF                     // w3<- self
+    bl       artGet32InstanceFromCode
+    ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     w2, wINST, #8, #4             // w2<- A
+    PREFETCH_INST 2
+    cbnz     x3, MterpPossibleException    // bail out
+    .if 0
+    SET_VREG_OBJECT w0, w2                 // fp[A]<- w0
+    .else
+    SET_VREG w0, w2                        // fp[A]<- w0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     // extract opcode from rINST
+    GOTO_OPCODE ip                         // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_wide: /* 0x53 */
+/* File: arm64/op_iget_wide.S */
+    /*
+     * 64-bit instance field get.
+     *
+     * for: iget-wide
+     */
+    EXPORT_PC
+    FETCH    w0, 1                         // w0<- field ref CCCC
+    lsr      w1, wINST, #12                // w1<- B
+    GET_VREG w1, w1                        // w1<- fp[B], the object pointer
+    ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
+    mov      x3, xSELF                     // w3<- self
+    bl       artGet64InstanceFromCode
+    ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     w2, wINST, #8, #4             // w2<- A
+    PREFETCH_INST 2
+    cmp      w3, #0
+    cbnz     w3, MterpException            // bail out
+    SET_VREG_WIDE x0, w2
+    ADVANCE 2
+    GET_INST_OPCODE ip                     // extract opcode from wINST
+    GOTO_OPCODE ip                         // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_object: /* 0x54 */
+/* File: arm64/op_iget_object.S */
+/* File: arm64/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    w0, 1                         // w0<- field ref CCCC
+    lsr      w1, wINST, #12                // w1<- B
+    GET_VREG w1, w1                        // w1<- fp[B], the object pointer
+    ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
+    mov      x3, xSELF                     // w3<- self
+    bl       artGetObjInstanceFromCode
+    ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     w2, wINST, #8, #4             // w2<- A
+    PREFETCH_INST 2
+    cbnz     x3, MterpPossibleException    // bail out
+    .if 1
+    SET_VREG_OBJECT w0, w2                 // fp[A]<- w0
+    .else
+    SET_VREG w0, w2                        // fp[A]<- w0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     // extract opcode from rINST
+    GOTO_OPCODE ip                         // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_boolean: /* 0x55 */
+/* File: arm64/op_iget_boolean.S */
+/* File: arm64/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    w0, 1                         // w0<- field ref CCCC
+    lsr      w1, wINST, #12                // w1<- B
+    GET_VREG w1, w1                        // w1<- fp[B], the object pointer
+    ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
+    mov      x3, xSELF                     // w3<- self
+    bl       artGetBooleanInstanceFromCode
+    ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     w2, wINST, #8, #4             // w2<- A
+    PREFETCH_INST 2
+    cbnz     x3, MterpPossibleException    // bail out
+    .if 0
+    SET_VREG_OBJECT w0, w2                 // fp[A]<- w0
+    .else
+    SET_VREG w0, w2                        // fp[A]<- w0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     // extract opcode from rINST
+    GOTO_OPCODE ip                         // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_byte: /* 0x56 */
+/* File: arm64/op_iget_byte.S */
+/* File: arm64/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    w0, 1                         // w0<- field ref CCCC
+    lsr      w1, wINST, #12                // w1<- B
+    GET_VREG w1, w1                        // w1<- fp[B], the object pointer
+    ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
+    mov      x3, xSELF                     // w3<- self
+    bl       artGetByteInstanceFromCode
+    ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     w2, wINST, #8, #4             // w2<- A
+    PREFETCH_INST 2
+    cbnz     x3, MterpPossibleException    // bail out
+    .if 0
+    SET_VREG_OBJECT w0, w2                 // fp[A]<- w0
+    .else
+    SET_VREG w0, w2                        // fp[A]<- w0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     // extract opcode from rINST
+    GOTO_OPCODE ip                         // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_char: /* 0x57 */
+/* File: arm64/op_iget_char.S */
+/* File: arm64/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    w0, 1                         // w0<- field ref CCCC
+    lsr      w1, wINST, #12                // w1<- B
+    GET_VREG w1, w1                        // w1<- fp[B], the object pointer
+    ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
+    mov      x3, xSELF                     // w3<- self
+    bl       artGetCharInstanceFromCode
+    ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     w2, wINST, #8, #4             // w2<- A
+    PREFETCH_INST 2
+    cbnz     x3, MterpPossibleException    // bail out
+    .if 0
+    SET_VREG_OBJECT w0, w2                 // fp[A]<- w0
+    .else
+    SET_VREG w0, w2                        // fp[A]<- w0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     // extract opcode from rINST
+    GOTO_OPCODE ip                         // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_short: /* 0x58 */
+/* File: arm64/op_iget_short.S */
+/* File: arm64/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    w0, 1                         // w0<- field ref CCCC
+    lsr      w1, wINST, #12                // w1<- B
+    GET_VREG w1, w1                        // w1<- fp[B], the object pointer
+    ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
+    mov      x3, xSELF                     // w3<- self
+    bl       artGetShortInstanceFromCode
+    ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     w2, wINST, #8, #4             // w2<- A
+    PREFETCH_INST 2
+    cbnz     x3, MterpPossibleException    // bail out
+    .if 0
+    SET_VREG_OBJECT w0, w2                 // fp[A]<- w0
+    .else
+    SET_VREG w0, w2                        // fp[A]<- w0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     // extract opcode from rINST
+    GOTO_OPCODE ip                         // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput: /* 0x59 */
+/* File: arm64/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field//CCCC */
+    .extern artSet32InstanceFromMterp
+    EXPORT_PC
+    FETCH    w0, 1                      // w0<- field ref CCCC
+    lsr      w1, wINST, #12             // w1<- B
+    GET_VREG w1, w1                     // w1<- fp[B], the object pointer
+    ubfx     w2, wINST, #8, #4          // w2<- A
+    GET_VREG w2, w2                     // w2<- fp[A]
+    ldr      x3, [xFP, #OFF_FP_METHOD]  // w3<- referrer
+    PREFETCH_INST 2
+    bl       artSet32InstanceFromMterp
+    cbnz     w0, MterpPossibleException
+    ADVANCE  2                          // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_wide: /* 0x5a */
+/* File: arm64/op_iput_wide.S */
+    /* iput-wide vA, vB, field//CCCC */
+    .extern artSet64InstanceFromMterp
+    EXPORT_PC
+    FETCH    w0, 1                      // w0<- field ref CCCC
+    lsr      w1, wINST, #12             // w1<- B
+    GET_VREG w1, w1                     // w1<- fp[B], the object pointer
+    ubfx     w2, wINST, #8, #4          // w2<- A
+    add      x2, xFP, x2, lsl #2        // w2<- &fp[A]
+    ldr      x3, [xFP, #OFF_FP_METHOD]  // w3<- referrer
+    PREFETCH_INST 2
+    bl       artSet64InstanceFromMterp
+    cbnz     w0, MterpPossibleException
+    ADVANCE  2                          // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_object: /* 0x5b */
+/* File: arm64/op_iput_object.S */
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     w2, wINST
+    mov     x3, xSELF
+    bl      MterpIputObject
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_boolean: /* 0x5c */
+/* File: arm64/op_iput_boolean.S */
+/* File: arm64/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field//CCCC */
+    .extern artSet8InstanceFromMterp
+    EXPORT_PC
+    FETCH    w0, 1                      // w0<- field ref CCCC
+    lsr      w1, wINST, #12             // w1<- B
+    GET_VREG w1, w1                     // w1<- fp[B], the object pointer
+    ubfx     w2, wINST, #8, #4          // w2<- A
+    GET_VREG w2, w2                     // w2<- fp[A]
+    ldr      x3, [xFP, #OFF_FP_METHOD]  // w3<- referrer
+    PREFETCH_INST 2
+    bl       artSet8InstanceFromMterp
+    cbnz     w0, MterpPossibleException
+    ADVANCE  2                          // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_byte: /* 0x5d */
+/* File: arm64/op_iput_byte.S */
+/* File: arm64/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field//CCCC */
+    .extern artSet8InstanceFromMterp
+    EXPORT_PC
+    FETCH    w0, 1                      // w0<- field ref CCCC
+    lsr      w1, wINST, #12             // w1<- B
+    GET_VREG w1, w1                     // w1<- fp[B], the object pointer
+    ubfx     w2, wINST, #8, #4          // w2<- A
+    GET_VREG w2, w2                     // w2<- fp[A]
+    ldr      x3, [xFP, #OFF_FP_METHOD]  // w3<- referrer
+    PREFETCH_INST 2
+    bl       artSet8InstanceFromMterp
+    cbnz     w0, MterpPossibleException
+    ADVANCE  2                          // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_char: /* 0x5e */
+/* File: arm64/op_iput_char.S */
+/* File: arm64/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field//CCCC */
+    .extern artSet16InstanceFromMterp
+    EXPORT_PC
+    FETCH    w0, 1                      // w0<- field ref CCCC
+    lsr      w1, wINST, #12             // w1<- B
+    GET_VREG w1, w1                     // w1<- fp[B], the object pointer
+    ubfx     w2, wINST, #8, #4          // w2<- A
+    GET_VREG w2, w2                     // w2<- fp[A]
+    ldr      x3, [xFP, #OFF_FP_METHOD]  // w3<- referrer
+    PREFETCH_INST 2
+    bl       artSet16InstanceFromMterp
+    cbnz     w0, MterpPossibleException
+    ADVANCE  2                          // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_short: /* 0x5f */
+/* File: arm64/op_iput_short.S */
+/* File: arm64/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field//CCCC */
+    .extern artSet16InstanceFromMterp
+    EXPORT_PC
+    FETCH    w0, 1                      // w0<- field ref CCCC
+    lsr      w1, wINST, #12             // w1<- B
+    GET_VREG w1, w1                     // w1<- fp[B], the object pointer
+    ubfx     w2, wINST, #8, #4          // w2<- A
+    GET_VREG w2, w2                     // w2<- fp[A]
+    ldr      x3, [xFP, #OFF_FP_METHOD]  // w3<- referrer
+    PREFETCH_INST 2
+    bl       artSet16InstanceFromMterp
+    cbnz     w0, MterpPossibleException
+    ADVANCE  2                          // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget: /* 0x60 */
+/* File: arm64/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field//BBBB */
+
+    .extern artGet32StaticFromCode
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- field ref BBBB
+    ldr   x1, [xFP, #OFF_FP_METHOD]
+    mov   x2, xSELF
+    bl    artGet32StaticFromCode
+    ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr   w2, wINST, #8                 // w2<- AA
+    
+    PREFETCH_INST 2
+    cbnz  x3, MterpException            // bail out
+.if 0
+    SET_VREG_OBJECT w0, w2              // fp[AA]<- w0
+.else
+    SET_VREG w0, w2                     // fp[AA]<- w0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_wide: /* 0x61 */
+/* File: arm64/op_sget_wide.S */
+    /*
+     * SGET_WIDE handler wrapper.
+     *
+     */
+    /* sget-wide vAA, field//BBBB */
+
+    .extern artGet64StaticFromCode
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- field ref BBBB
+    ldr   x1, [xFP, #OFF_FP_METHOD]
+    mov   x2, xSELF
+    bl    artGet64StaticFromCode
+    ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr   w4, wINST, #8                 // w4<- AA
+    cbnz  x3, MterpException            // bail out
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    SET_VREG_WIDE x0, w4
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_object: /* 0x62 */
+/* File: arm64/op_sget_object.S */
+/* File: arm64/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field//BBBB */
+
+    .extern artGetObjStaticFromCode
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- field ref BBBB
+    ldr   x1, [xFP, #OFF_FP_METHOD]
+    mov   x2, xSELF
+    bl    artGetObjStaticFromCode
+    ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr   w2, wINST, #8                 // w2<- AA
+    
+    PREFETCH_INST 2
+    cbnz  x3, MterpException            // bail out
+.if 1
+    SET_VREG_OBJECT w0, w2              // fp[AA]<- w0
+.else
+    SET_VREG w0, w2                     // fp[AA]<- w0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_boolean: /* 0x63 */
+/* File: arm64/op_sget_boolean.S */
+/* File: arm64/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field//BBBB */
+
+    .extern artGetBooleanStaticFromCode
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- field ref BBBB
+    ldr   x1, [xFP, #OFF_FP_METHOD]
+    mov   x2, xSELF
+    bl    artGetBooleanStaticFromCode
+    ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr   w2, wINST, #8                 // w2<- AA
+    uxtb w0, w0
+    PREFETCH_INST 2
+    cbnz  x3, MterpException            // bail out
+.if 0
+    SET_VREG_OBJECT w0, w2              // fp[AA]<- w0
+.else
+    SET_VREG w0, w2                     // fp[AA]<- w0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_byte: /* 0x64 */
+/* File: arm64/op_sget_byte.S */
+/* File: arm64/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field//BBBB */
+
+    .extern artGetByteStaticFromCode
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- field ref BBBB
+    ldr   x1, [xFP, #OFF_FP_METHOD]
+    mov   x2, xSELF
+    bl    artGetByteStaticFromCode
+    ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr   w2, wINST, #8                 // w2<- AA
+    sxtb w0, w0
+    PREFETCH_INST 2
+    cbnz  x3, MterpException            // bail out
+.if 0
+    SET_VREG_OBJECT w0, w2              // fp[AA]<- w0
+.else
+    SET_VREG w0, w2                     // fp[AA]<- w0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_char: /* 0x65 */
+/* File: arm64/op_sget_char.S */
+/* File: arm64/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field//BBBB */
+
+    .extern artGetCharStaticFromCode
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- field ref BBBB
+    ldr   x1, [xFP, #OFF_FP_METHOD]
+    mov   x2, xSELF
+    bl    artGetCharStaticFromCode
+    ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr   w2, wINST, #8                 // w2<- AA
+    uxth w0, w0
+    PREFETCH_INST 2
+    cbnz  x3, MterpException            // bail out
+.if 0
+    SET_VREG_OBJECT w0, w2              // fp[AA]<- w0
+.else
+    SET_VREG w0, w2                     // fp[AA]<- w0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_short: /* 0x66 */
+/* File: arm64/op_sget_short.S */
+/* File: arm64/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field//BBBB */
+
+    .extern artGetShortStaticFromCode
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- field ref BBBB
+    ldr   x1, [xFP, #OFF_FP_METHOD]
+    mov   x2, xSELF
+    bl    artGetShortStaticFromCode
+    ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr   w2, wINST, #8                 // w2<- AA
+    sxth w0, w0
+    PREFETCH_INST 2
+    cbnz  x3, MterpException            // bail out
+.if 0
+    SET_VREG_OBJECT w0, w2              // fp[AA]<- w0
+.else
+    SET_VREG w0, w2                     // fp[AA]<- w0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput: /* 0x67 */
+/* File: arm64/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field//BBBB */
+    EXPORT_PC
+    FETCH   w0, 1                       // r0<- field ref BBBB
+    lsr     w3, wINST, #8               // r3<- AA
+    GET_VREG w1, w3                     // r1<= fp[AA]
+    ldr     x2, [xFP, #OFF_FP_METHOD]
+    mov     x3, xSELF
+    PREFETCH_INST 2                     // Get next inst, but don't advance rPC
+    bl      artSet32StaticFromCode
+    cbnz    w0, MterpException          // 0 on success
+    ADVANCE 2                           // Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_wide: /* 0x68 */
+/* File: arm64/op_sput_wide.S */
+    /*
+     * SPUT_WIDE handler wrapper.
+     *
+     */
+    /* sput-wide vAA, field//BBBB */
+    .extern artSet64IndirectStaticFromMterp
+    EXPORT_PC
+    FETCH   w0, 1                       // w0<- field ref BBBB
+    ldr     x1, [xFP, #OFF_FP_METHOD]
+    lsr     w2, wINST, #8               // w3<- AA
+    add     x2, xFP, w2, lsl #2
+    mov     x3, xSELF
+    PREFETCH_INST 2                     // Get next inst, but don't advance rPC
+    bl      artSet64IndirectStaticFromMterp
+    cbnz    w0, MterpException          // 0 on success, -1 on failure
+    ADVANCE 2                           // Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_object: /* 0x69 */
+/* File: arm64/op_sput_object.S */
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     x2, xINST
+    mov     x3, xSELF
+    bl      MterpSputObject
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_boolean: /* 0x6a */
+/* File: arm64/op_sput_boolean.S */
+/* File: arm64/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field//BBBB */
+    EXPORT_PC
+    FETCH   w0, 1                       // r0<- field ref BBBB
+    lsr     w3, wINST, #8               // r3<- AA
+    GET_VREG w1, w3                     // r1<= fp[AA]
+    ldr     x2, [xFP, #OFF_FP_METHOD]
+    mov     x3, xSELF
+    PREFETCH_INST 2                     // Get next inst, but don't advance rPC
+    bl      artSet8StaticFromCode
+    cbnz    w0, MterpException          // 0 on success
+    ADVANCE 2                           // Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_byte: /* 0x6b */
+/* File: arm64/op_sput_byte.S */
+/* File: arm64/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field//BBBB */
+    EXPORT_PC
+    FETCH   w0, 1                       // r0<- field ref BBBB
+    lsr     w3, wINST, #8               // r3<- AA
+    GET_VREG w1, w3                     // r1<= fp[AA]
+    ldr     x2, [xFP, #OFF_FP_METHOD]
+    mov     x3, xSELF
+    PREFETCH_INST 2                     // Get next inst, but don't advance rPC
+    bl      artSet8StaticFromCode
+    cbnz    w0, MterpException          // 0 on success
+    ADVANCE 2                           // Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_char: /* 0x6c */
+/* File: arm64/op_sput_char.S */
+/* File: arm64/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field//BBBB */
+    EXPORT_PC
+    FETCH   w0, 1                       // r0<- field ref BBBB
+    lsr     w3, wINST, #8               // r3<- AA
+    GET_VREG w1, w3                     // r1<= fp[AA]
+    ldr     x2, [xFP, #OFF_FP_METHOD]
+    mov     x3, xSELF
+    PREFETCH_INST 2                     // Get next inst, but don't advance rPC
+    bl      artSet16StaticFromCode
+    cbnz    w0, MterpException          // 0 on success
+    ADVANCE 2                           // Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_short: /* 0x6d */
+/* File: arm64/op_sput_short.S */
+/* File: arm64/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field//BBBB */
+    EXPORT_PC
+    FETCH   w0, 1                       // r0<- field ref BBBB
+    lsr     w3, wINST, #8               // r3<- AA
+    GET_VREG w1, w3                     // r1<= fp[AA]
+    ldr     x2, [xFP, #OFF_FP_METHOD]
+    mov     x3, xSELF
+    PREFETCH_INST 2                     // Get next inst, but don't advance rPC
+    bl      artSet16StaticFromCode
+    cbnz    w0, MterpException          // 0 on success
+    ADVANCE 2                           // Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual: /* 0x6e */
+/* File: arm64/op_invoke_virtual.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtual
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    // and     x3, xINST, 0xFFFF
+    mov     x3, xINST
+    bl      MterpInvokeVirtual
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+    /*
+     * Handle a virtual method call.
+     *
+     * for: invoke-virtual, invoke-virtual/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_super: /* 0x6f */
+/* File: arm64/op_invoke_super.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeSuper
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    // and     x3, xINST, 0xFFFF
+    mov     x3, xINST
+    bl      MterpInvokeSuper
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+    /*
+     * Handle a "super" method call.
+     *
+     * for: invoke-super, invoke-super/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_direct: /* 0x70 */
+/* File: arm64/op_invoke_direct.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeDirect
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    // and     x3, xINST, 0xFFFF
+    mov     x3, xINST
+    bl      MterpInvokeDirect
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_static: /* 0x71 */
+/* File: arm64/op_invoke_static.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeStatic
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    // and     x3, xINST, 0xFFFF
+    mov     x3, xINST
+    bl      MterpInvokeStatic
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_interface: /* 0x72 */
+/* File: arm64/op_invoke_interface.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeInterface
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    // and     x3, xINST, 0xFFFF
+    mov     x3, xINST
+    bl      MterpInvokeInterface
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+    /*
+     * Handle an interface method call.
+     *
+     * for: invoke-interface, invoke-interface/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_void_no_barrier: /* 0x73 */
+/* File: arm64/op_return_void_no_barrier.S */
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     x0, xSELF
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .Lop_return_void_no_barrier_check
+.Lop_return_void_no_barrier_return:
+    mov     x0, #0
+    b       MterpReturn
+.Lop_return_void_no_barrier_check:
+    bl      MterpSuspendCheck           // (self)
+    b       .Lop_return_void_no_barrier_return
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_range: /* 0x74 */
+/* File: arm64/op_invoke_virtual_range.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualRange
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    // and     x3, xINST, 0xFFFF
+    mov     x3, xINST
+    bl      MterpInvokeVirtualRange
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_super_range: /* 0x75 */
+/* File: arm64/op_invoke_super_range.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeSuperRange
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    // and     x3, xINST, 0xFFFF
+    mov     x3, xINST
+    bl      MterpInvokeSuperRange
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_direct_range: /* 0x76 */
+/* File: arm64/op_invoke_direct_range.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeDirectRange
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    // and     x3, xINST, 0xFFFF
+    mov     x3, xINST
+    bl      MterpInvokeDirectRange
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_static_range: /* 0x77 */
+/* File: arm64/op_invoke_static_range.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeStaticRange
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    // and     x3, xINST, 0xFFFF
+    mov     x3, xINST
+    bl      MterpInvokeStaticRange
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_interface_range: /* 0x78 */
+/* File: arm64/op_invoke_interface_range.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeInterfaceRange
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    // and     x3, xINST, 0xFFFF
+    mov     x3, xINST
+    bl      MterpInvokeInterfaceRange
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_79: /* 0x79 */
+/* File: arm64/op_unused_79.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_7a: /* 0x7a */
+/* File: arm64/op_unused_7a.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_int: /* 0x7b */
+/* File: arm64/op_neg_int.S */
+/* File: arm64/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op w0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    GET_VREG w0, w3                     // w0<- vB
+    ubfx    w9, wINST, #8, #4           // w9<- A
+                               // optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    sub     w0, wzr, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_not_int: /* 0x7c */
+/* File: arm64/op_not_int.S */
+/* File: arm64/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op w0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    GET_VREG w0, w3                     // w0<- vB
+    ubfx    w9, wINST, #8, #4           // w9<- A
+                               // optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    mvn     w0, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_long: /* 0x7d */
+/* File: arm64/op_neg_long.S */
+/* File: arm64/unopWide.S */
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op x0".
+     *
+     * For: neg-long, not-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w4, wINST, #8, #4           // w4<- A
+    GET_VREG_WIDE x0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    
+    sub x0, xzr, x0
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x0, w4
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-11 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_not_long: /* 0x7e */
+/* File: arm64/op_not_long.S */
+/* File: arm64/unopWide.S */
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op x0".
+     *
+     * For: neg-long, not-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w4, wINST, #8, #4           // w4<- A
+    GET_VREG_WIDE x0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    
+    mvn     x0, x0
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x0, w4
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-11 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_float: /* 0x7f */
+/* File: arm64/op_neg_float.S */
+/* File: arm64/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op w0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    GET_VREG w0, w3                     // w0<- vB
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    mov w4, #0x80000000                           // optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    add     w0, w0, w4                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_double: /* 0x80 */
+/* File: arm64/op_neg_double.S */
+/* File: arm64/unopWide.S */
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op x0".
+     *
+     * For: neg-long, not-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w4, wINST, #8, #4           // w4<- A
+    GET_VREG_WIDE x0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    mov x1, #0x8000000000000000
+    add     x0, x0, x1
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x0, w4
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-11 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_long: /* 0x81 */
+/* File: arm64/op_int_to_long.S */
+/* File: arm64/funopWider.S */
+    /*
+     * Generic 32bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "x0 = op w0".
+     *
+     * For: int-to-double, float-to-double, float-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w4, wINST, #8               // w4<- A+
+    GET_VREG w0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    and     w4, w4, #15                 // w4<- A
+    sbfm x0, x0, 0, 31                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x0, w4           // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_float: /* 0x82 */
+/* File: arm64/op_int_to_float.S */
+/* File: arm64/funopNarrow.S */
+    /*
+     * Generic 32bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "s0 = op w0".
+     *
+     * For: int-to-float, float-to-int
+     * TODO: refactor all of the conversions - parameterize width and use same template.
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w4, wINST, #8               // w4<- A+
+    GET_VREG w0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    and     w4, w4, #15                 // w4<- A
+    scvtf s0, w0                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG s0, w4                // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_double: /* 0x83 */
+/* File: arm64/op_int_to_double.S */
+/* File: arm64/funopWider.S */
+    /*
+     * Generic 32bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "d0 = op w0".
+     *
+     * For: int-to-double, float-to-double, float-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w4, wINST, #8               // w4<- A+
+    GET_VREG w0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    and     w4, w4, #15                 // w4<- A
+    scvtf d0, w0                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE d0, w4           // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_int: /* 0x84 */
+/* File: arm64/op_long_to_int.S */
+/* File: arm64/funopNarrower.S */
+    /*
+     * Generic 64bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "w0 = op x0".
+     *
+     * For: int-to-double, float-to-double, float-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w4, wINST, #8               // w4<- A+
+    GET_VREG_WIDE x0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    and     w4, w4, #15                 // w4<- A
+                                  // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG w0, w4                // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_float: /* 0x85 */
+/* File: arm64/op_long_to_float.S */
+/* File: arm64/funopNarrower.S */
+    /*
+     * Generic 64bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "s0 = op x0".
+     *
+     * For: int-to-double, float-to-double, float-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w4, wINST, #8               // w4<- A+
+    GET_VREG_WIDE x0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    and     w4, w4, #15                 // w4<- A
+    scvtf s0, x0                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG s0, w4                // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_double: /* 0x86 */
+/* File: arm64/op_long_to_double.S */
+/* File: arm64/funopWide.S */
+    /*
+     * Generic 64bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "d0 = op x0".
+     *
+     * For: long-to-double, double-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w4, wINST, #8               // w4<- A+
+    GET_VREG_WIDE x0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    and     w4, w4, #15                 // w4<- A
+    scvtf d0, x0                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE d0, w4           // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_int: /* 0x87 */
+/* File: arm64/op_float_to_int.S */
+/* File: arm64/funopNarrow.S */
+    /*
+     * Generic 32bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "w0 = op s0".
+     *
+     * For: int-to-float, float-to-int
+     * TODO: refactor all of the conversions - parameterize width and use same template.
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w4, wINST, #8               // w4<- A+
+    GET_VREG s0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    and     w4, w4, #15                 // w4<- A
+    fcvtzs w0, s0                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG w0, w4                // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_long: /* 0x88 */
+/* File: arm64/op_float_to_long.S */
+/* File: arm64/funopWider.S */
+    /*
+     * Generic 32bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "x0 = op s0".
+     *
+     * For: int-to-double, float-to-double, float-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w4, wINST, #8               // w4<- A+
+    GET_VREG s0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    and     w4, w4, #15                 // w4<- A
+    fcvtzs x0, s0                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x0, w4           // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_double: /* 0x89 */
+/* File: arm64/op_float_to_double.S */
+/* File: arm64/funopWider.S */
+    /*
+     * Generic 32bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "d0 = op s0".
+     *
+     * For: int-to-double, float-to-double, float-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w4, wINST, #8               // w4<- A+
+    GET_VREG s0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    and     w4, w4, #15                 // w4<- A
+    fcvt  d0, s0                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE d0, w4           // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_int: /* 0x8a */
+/* File: arm64/op_double_to_int.S */
+/* File: arm64/funopNarrower.S */
+    /*
+     * Generic 64bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "w0 = op d0".
+     *
+     * For: int-to-double, float-to-double, float-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w4, wINST, #8               // w4<- A+
+    GET_VREG_WIDE d0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    and     w4, w4, #15                 // w4<- A
+    fcvtzs w0, d0                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG w0, w4                // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_long: /* 0x8b */
+/* File: arm64/op_double_to_long.S */
+/* File: arm64/funopWide.S */
+    /*
+     * Generic 64bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "x0 = op d0".
+     *
+     * For: long-to-double, double-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w4, wINST, #8               // w4<- A+
+    GET_VREG_WIDE d0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    and     w4, w4, #15                 // w4<- A
+    fcvtzs x0, d0                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x0, w4           // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_float: /* 0x8c */
+/* File: arm64/op_double_to_float.S */
+/* File: arm64/funopNarrower.S */
+    /*
+     * Generic 64bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "s0 = op d0".
+     *
+     * For: int-to-double, float-to-double, float-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w4, wINST, #8               // w4<- A+
+    GET_VREG_WIDE d0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    and     w4, w4, #15                 // w4<- A
+    fcvt s0, d0                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG s0, w4                // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_byte: /* 0x8d */
+/* File: arm64/op_int_to_byte.S */
+/* File: arm64/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op w0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    GET_VREG w0, w3                     // w0<- vB
+    ubfx    w9, wINST, #8, #4           // w9<- A
+                               // optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    sxtb    w0, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_char: /* 0x8e */
+/* File: arm64/op_int_to_char.S */
+/* File: arm64/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op w0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    GET_VREG w0, w3                     // w0<- vB
+    ubfx    w9, wINST, #8, #4           // w9<- A
+                               // optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    uxth    w0, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_short: /* 0x8f */
+/* File: arm64/op_int_to_short.S */
+/* File: arm64/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op w0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    GET_VREG w0, w3                     // w0<- vB
+    ubfx    w9, wINST, #8, #4           // w9<- A
+                               // optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    sxth    w0, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int: /* 0x90 */
+/* File: arm64/op_add_int.S */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 0
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    add     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_int: /* 0x91 */
+/* File: arm64/op_sub_int.S */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 0
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    sub     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int: /* 0x92 */
+/* File: arm64/op_mul_int.S */
+/* must be "mul w0, w1, w0" -- "w0, w0, w1" is illegal */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 0
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    mul     w0, w1, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int: /* 0x93 */
+/* File: arm64/op_div_int.S */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 1
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    sdiv     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int: /* 0x94 */
+/* File: arm64/op_rem_int.S */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 1
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    sdiv     w2, w0, w1                           // optional op; may set condition codes
+    msub w0, w2, w1, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int: /* 0x95 */
+/* File: arm64/op_and_int.S */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 0
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    and     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int: /* 0x96 */
+/* File: arm64/op_or_int.S */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 0
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    orr     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int: /* 0x97 */
+/* File: arm64/op_xor_int.S */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 0
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    eor     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int: /* 0x98 */
+/* File: arm64/op_shl_int.S */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 0
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    and     w1, w1, #31                           // optional op; may set condition codes
+    lsl     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int: /* 0x99 */
+/* File: arm64/op_shr_int.S */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 0
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    and     w1, w1, #31                           // optional op; may set condition codes
+    asr     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int: /* 0x9a */
+/* File: arm64/op_ushr_int.S */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 0
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    and     w1, w1, #31                           // optional op; may set condition codes
+    lsr     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_long: /* 0x9b */
+/* File: arm64/op_add_long.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x2, w2               // w2<- vCC
+    GET_VREG_WIDE x1, w1               // w1<- vBB
+    .if 0
+    cbz     x2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    add x0, x1, x2                              // x0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w4           // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_long: /* 0x9c */
+/* File: arm64/op_sub_long.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x2, w2               // w2<- vCC
+    GET_VREG_WIDE x1, w1               // w1<- vBB
+    .if 0
+    cbz     x2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    sub x0, x1, x2                              // x0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w4           // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_long: /* 0x9d */
+/* File: arm64/op_mul_long.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x2, w2               // w2<- vCC
+    GET_VREG_WIDE x1, w1               // w1<- vBB
+    .if 0
+    cbz     x2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    mul x0, x1, x2                              // x0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w4           // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_long: /* 0x9e */
+/* File: arm64/op_div_long.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x2, w2               // w2<- vCC
+    GET_VREG_WIDE x1, w1               // w1<- vBB
+    .if 1
+    cbz     x2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    sdiv x0, x1, x2                              // x0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w4           // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_long: /* 0x9f */
+/* File: arm64/op_rem_long.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x2, w2               // w2<- vCC
+    GET_VREG_WIDE x1, w1               // w1<- vBB
+    .if 1
+    cbz     x2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    sdiv x3, x1, x2
+    msub x0, x3, x2, x1                              // x0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w4           // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_long: /* 0xa0 */
+/* File: arm64/op_and_long.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x2, w2               // w2<- vCC
+    GET_VREG_WIDE x1, w1               // w1<- vBB
+    .if 0
+    cbz     x2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    and x0, x1, x2                              // x0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w4           // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_long: /* 0xa1 */
+/* File: arm64/op_or_long.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x2, w2               // w2<- vCC
+    GET_VREG_WIDE x1, w1               // w1<- vBB
+    .if 0
+    cbz     x2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    orr x0, x1, x2                              // x0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w4           // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_long: /* 0xa2 */
+/* File: arm64/op_xor_long.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x2, w2               // w2<- vCC
+    GET_VREG_WIDE x1, w1               // w1<- vBB
+    .if 0
+    cbz     x2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    eor x0, x1, x2                              // x0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w4           // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_long: /* 0xa3 */
+/* File: arm64/op_shl_long.S */
+/* File: arm64/shiftWide.S */
+    /*
+     * 64-bit shift operation.
+     *
+     * For: shl-long, shr-long, ushr-long
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr      w3, wINST, #8               // w3<- AA
+    lsr      w2, w0, #8                  // w2<- CC
+    GET_VREG w2, w2                     // w2<- vCC (shift count)
+    and      w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x1, w1                // x1<- vBB
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    and      x2, x2, #63                 // Mask low 6
+    lsl  x0, x1, x2                 // Do the shift.
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w3                // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_long: /* 0xa4 */
+/* File: arm64/op_shr_long.S */
+/* File: arm64/shiftWide.S */
+    /*
+     * 64-bit shift operation.
+     *
+     * For: shl-long, shr-long, ushr-long
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr      w3, wINST, #8               // w3<- AA
+    lsr      w2, w0, #8                  // w2<- CC
+    GET_VREG w2, w2                     // w2<- vCC (shift count)
+    and      w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x1, w1                // x1<- vBB
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    and      x2, x2, #63                 // Mask low 6
+    asr  x0, x1, x2                 // Do the shift.
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w3                // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_long: /* 0xa5 */
+/* File: arm64/op_ushr_long.S */
+/* File: arm64/shiftWide.S */
+    /*
+     * 64-bit shift operation.
+     *
+     * For: shl-long, shr-long, ushr-long
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr      w3, wINST, #8               // w3<- AA
+    lsr      w2, w0, #8                  // w2<- CC
+    GET_VREG w2, w2                     // w2<- vCC (shift count)
+    and      w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x1, w1                // x1<- vBB
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    and      x2, x2, #63                 // Mask low 6
+    lsr  x0, x1, x2                 // Do the shift.
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w3                // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_float: /* 0xa6 */
+/* File: arm64/op_add_float.S */
+/* File: arm64/fbinop.S */
+    /*:
+     * Generic 32-bit floating-point operation.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     * form: <op> s0, s0, s1
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH w0, 1                         // r0<- CCBB
+    lsr     w1, w0, #8                  // r2<- CC
+    and     w0, w0, #255                // r1<- BB
+    GET_VREG  s1, w1
+    GET_VREG  s0, w0
+    fadd   s0, s0, s1                              // s0<- op
+    lsr     w1, wINST, #8               // r1<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG  s0, w1
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_float: /* 0xa7 */
+/* File: arm64/op_sub_float.S */
+/* File: arm64/fbinop.S */
+    /*:
+     * Generic 32-bit floating-point operation.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     * form: <op> s0, s0, s1
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH w0, 1                         // r0<- CCBB
+    lsr     w1, w0, #8                  // r2<- CC
+    and     w0, w0, #255                // r1<- BB
+    GET_VREG  s1, w1
+    GET_VREG  s0, w0
+    fsub   s0, s0, s1                              // s0<- op
+    lsr     w1, wINST, #8               // r1<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG  s0, w1
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_float: /* 0xa8 */
+/* File: arm64/op_mul_float.S */
+/* File: arm64/fbinop.S */
+    /*:
+     * Generic 32-bit floating-point operation.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     * form: <op> s0, s0, s1
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH w0, 1                         // r0<- CCBB
+    lsr     w1, w0, #8                  // r2<- CC
+    and     w0, w0, #255                // r1<- BB
+    GET_VREG  s1, w1
+    GET_VREG  s0, w0
+    fmul   s0, s0, s1                              // s0<- op
+    lsr     w1, wINST, #8               // r1<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG  s0, w1
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_float: /* 0xa9 */
+/* File: arm64/op_div_float.S */
+/* File: arm64/fbinop.S */
+    /*:
+     * Generic 32-bit floating-point operation.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     * form: <op> s0, s0, s1
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH w0, 1                         // r0<- CCBB
+    lsr     w1, w0, #8                  // r2<- CC
+    and     w0, w0, #255                // r1<- BB
+    GET_VREG  s1, w1
+    GET_VREG  s0, w0
+    fdiv   s0, s0, s1                              // s0<- op
+    lsr     w1, wINST, #8               // r1<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG  s0, w1
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_float: /* 0xaa */
+/* File: arm64/op_rem_float.S */
+/* EABI doesn't define a float remainder function, but libm does */
+/* File: arm64/fbinop.S */
+    /*:
+     * Generic 32-bit floating-point operation.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     * form: <op> s0, s0, s1
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH w0, 1                         // r0<- CCBB
+    lsr     w1, w0, #8                  // r2<- CC
+    and     w0, w0, #255                // r1<- BB
+    GET_VREG  s1, w1
+    GET_VREG  s0, w0
+    bl      fmodf                              // s0<- op
+    lsr     w1, wINST, #8               // r1<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG  s0, w1
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_double: /* 0xab */
+/* File: arm64/op_add_double.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE d2, w2               // w2<- vCC
+    GET_VREG_WIDE d1, w1               // w1<- vBB
+    .if 0
+    cbz     d2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    fadd d0, d1, d2                              // d0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w4           // vAA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_double: /* 0xac */
+/* File: arm64/op_sub_double.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE d2, w2               // w2<- vCC
+    GET_VREG_WIDE d1, w1               // w1<- vBB
+    .if 0
+    cbz     d2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    fsub d0, d1, d2                              // d0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w4           // vAA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_double: /* 0xad */
+/* File: arm64/op_mul_double.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE d2, w2               // w2<- vCC
+    GET_VREG_WIDE d1, w1               // w1<- vBB
+    .if 0
+    cbz     d2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    fmul d0, d1, d2                              // d0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w4           // vAA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_double: /* 0xae */
+/* File: arm64/op_div_double.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE d2, w2               // w2<- vCC
+    GET_VREG_WIDE d1, w1               // w1<- vBB
+    .if 0
+    cbz     d2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    fdiv d0, d1, d2                              // d0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w4           // vAA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_double: /* 0xaf */
+/* File: arm64/op_rem_double.S */
+    /* rem vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE d1, w2                // d1<- vCC
+    GET_VREG_WIDE d0, w1                // d0<- vBB
+    bl  fmod
+    lsr     w4, wINST, #8               // w4<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w4                // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_2addr: /* 0xb0 */
+/* File: arm64/op_add_int_2addr.S */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    add     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_int_2addr: /* 0xb1 */
+/* File: arm64/op_sub_int_2addr.S */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    sub     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_2addr: /* 0xb2 */
+/* File: arm64/op_mul_int_2addr.S */
+/* must be "mul w0, w1, w0" -- "w0, w0, w1" is illegal */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    mul     w0, w1, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_2addr: /* 0xb3 */
+/* File: arm64/op_div_int_2addr.S */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 1
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    sdiv     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_2addr: /* 0xb4 */
+/* File: arm64/op_rem_int_2addr.S */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 1
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    sdiv     w2, w0, w1                           // optional op; may set condition codes
+    msub w0, w2, w1, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_2addr: /* 0xb5 */
+/* File: arm64/op_and_int_2addr.S */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    and     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_2addr: /* 0xb6 */
+/* File: arm64/op_or_int_2addr.S */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    orr     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_2addr: /* 0xb7 */
+/* File: arm64/op_xor_int_2addr.S */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    eor     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int_2addr: /* 0xb8 */
+/* File: arm64/op_shl_int_2addr.S */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    and     w1, w1, #31                           // optional op; may set condition codes
+    lsl     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int_2addr: /* 0xb9 */
+/* File: arm64/op_shr_int_2addr.S */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    and     w1, w1, #31                           // optional op; may set condition codes
+    asr     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int_2addr: /* 0xba */
+/* File: arm64/op_ushr_int_2addr.S */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    and     w1, w1, #31                           // optional op; may set condition codes
+    lsr     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_long_2addr: /* 0xbb */
+/* File: arm64/op_add_long_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE x1, w1               // x1<- vB
+    GET_VREG_WIDE x0, w2               // x0<- vA
+    .if 0
+    cbz     x1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    add     x0, x0, x1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_long_2addr: /* 0xbc */
+/* File: arm64/op_sub_long_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE x1, w1               // x1<- vB
+    GET_VREG_WIDE x0, w2               // x0<- vA
+    .if 0
+    cbz     x1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    sub     x0, x0, x1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_long_2addr: /* 0xbd */
+/* File: arm64/op_mul_long_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE x1, w1               // x1<- vB
+    GET_VREG_WIDE x0, w2               // x0<- vA
+    .if 0
+    cbz     x1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    mul     x0, x0, x1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_long_2addr: /* 0xbe */
+/* File: arm64/op_div_long_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE x1, w1               // x1<- vB
+    GET_VREG_WIDE x0, w2               // x0<- vA
+    .if 1
+    cbz     x1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    sdiv     x0, x0, x1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_long_2addr: /* 0xbf */
+/* File: arm64/op_rem_long_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE x1, w1               // x1<- vB
+    GET_VREG_WIDE x0, w2               // x0<- vA
+    .if 1
+    cbz     x1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    sdiv x3, x0, x1
+    msub x0, x3, x1, x0                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_long_2addr: /* 0xc0 */
+/* File: arm64/op_and_long_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE x1, w1               // x1<- vB
+    GET_VREG_WIDE x0, w2               // x0<- vA
+    .if 0
+    cbz     x1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    and     x0, x0, x1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_long_2addr: /* 0xc1 */
+/* File: arm64/op_or_long_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE x1, w1               // x1<- vB
+    GET_VREG_WIDE x0, w2               // x0<- vA
+    .if 0
+    cbz     x1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    orr     x0, x0, x1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_long_2addr: /* 0xc2 */
+/* File: arm64/op_xor_long_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE x1, w1               // x1<- vB
+    GET_VREG_WIDE x0, w2               // x0<- vA
+    .if 0
+    cbz     x1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    eor     x0, x0, x1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_long_2addr: /* 0xc3 */
+/* File: arm64/op_shl_long_2addr.S */
+/* File: arm64/shiftWide2addr.S */
+    /*
+     * Generic 64-bit shift operation.
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG w1, w1                     // x1<- vB
+    GET_VREG_WIDE x0, w2                // x0<- vA
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    and     x1, x1, #63                 // Mask low 6 bits.
+    lsl x0, x0, x1
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_long_2addr: /* 0xc4 */
+/* File: arm64/op_shr_long_2addr.S */
+/* File: arm64/shiftWide2addr.S */
+    /*
+     * Generic 64-bit shift operation.
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG w1, w1                     // x1<- vB
+    GET_VREG_WIDE x0, w2                // x0<- vA
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    and     x1, x1, #63                 // Mask low 6 bits.
+    asr x0, x0, x1
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_long_2addr: /* 0xc5 */
+/* File: arm64/op_ushr_long_2addr.S */
+/* File: arm64/shiftWide2addr.S */
+    /*
+     * Generic 64-bit shift operation.
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG w1, w1                     // x1<- vB
+    GET_VREG_WIDE x0, w2                // x0<- vA
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    and     x1, x1, #63                 // Mask low 6 bits.
+    lsr x0, x0, x1
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_float_2addr: /* 0xc6 */
+/* File: arm64/op_add_float_2addr.S */
+/* File: arm64/fbinop2addr.S */
+    /*
+     * Generic 32-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "s2 = s0 op s1".
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w9, wINST, #8               // w9<- A+
+    and     w9, w9, #15                 // w9<- A
+    GET_VREG s1, w3
+    GET_VREG s0, w9
+    fadd   s2, s0, s1                              // s2<- op
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG s2, w9
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_float_2addr: /* 0xc7 */
+/* File: arm64/op_sub_float_2addr.S */
+/* File: arm64/fbinop2addr.S */
+    /*
+     * Generic 32-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "s2 = s0 op s1".
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w9, wINST, #8               // w9<- A+
+    and     w9, w9, #15                 // w9<- A
+    GET_VREG s1, w3
+    GET_VREG s0, w9
+    fsub   s2, s0, s1                              // s2<- op
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG s2, w9
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_float_2addr: /* 0xc8 */
+/* File: arm64/op_mul_float_2addr.S */
+/* File: arm64/fbinop2addr.S */
+    /*
+     * Generic 32-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "s2 = s0 op s1".
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w9, wINST, #8               // w9<- A+
+    and     w9, w9, #15                 // w9<- A
+    GET_VREG s1, w3
+    GET_VREG s0, w9
+    fmul   s2, s0, s1                              // s2<- op
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG s2, w9
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_float_2addr: /* 0xc9 */
+/* File: arm64/op_div_float_2addr.S */
+/* File: arm64/fbinop2addr.S */
+    /*
+     * Generic 32-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "s2 = s0 op s1".
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w9, wINST, #8               // w9<- A+
+    and     w9, w9, #15                 // w9<- A
+    GET_VREG s1, w3
+    GET_VREG s0, w9
+    fdiv   s2, s0, s1                              // s2<- op
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG s2, w9
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_float_2addr: /* 0xca */
+/* File: arm64/op_rem_float_2addr.S */
+    /* rem vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w9, wINST, #8               // w9<- A+
+    and     w9, w9, #15                 // w9<- A
+    GET_VREG s1, w3
+    GET_VREG s0, w9
+    bl  fmodf
+    lsr     w9, wINST, #8               // w9<- A+
+    and     w9, w9, #15                 // w9<- A
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG s0, w9
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_double_2addr: /* 0xcb */
+/* File: arm64/op_add_double_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE d1, w1               // x1<- vB
+    GET_VREG_WIDE d0, w2               // x0<- vA
+    .if 0
+    cbz     d1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    fadd     d0, d0, d1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_double_2addr: /* 0xcc */
+/* File: arm64/op_sub_double_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE d1, w1               // x1<- vB
+    GET_VREG_WIDE d0, w2               // x0<- vA
+    .if 0
+    cbz     d1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    fsub     d0, d0, d1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_double_2addr: /* 0xcd */
+/* File: arm64/op_mul_double_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE d1, w1               // x1<- vB
+    GET_VREG_WIDE d0, w2               // x0<- vA
+    .if 0
+    cbz     d1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    fmul     d0, d0, d1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_double_2addr: /* 0xce */
+/* File: arm64/op_div_double_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE d1, w1               // x1<- vB
+    GET_VREG_WIDE d0, w2               // x0<- vA
+    .if 0
+    cbz     d1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    fdiv     d0, d0, d1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_double_2addr: /* 0xcf */
+/* File: arm64/op_rem_double_2addr.S */
+    /* rem vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE d1, w1                // d1<- vB
+    GET_VREG_WIDE d0, w2                // d0<- vA
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    bl fmod
+    ubfx    w2, wINST, #8, #4           // w2<- A (need to reload - killed across call)
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w2                // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_lit16: /* 0xd0 */
+/* File: arm64/op_add_int_lit16.S */
+/* File: arm64/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S w1, 1                       // w1<- ssssCCCC (sign-extended)
+    lsr     w2, wINST, #12              // w2<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w0, w2                     // w0<- vB
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    add     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rsub_int: /* 0xd1 */
+/* File: arm64/op_rsub_int.S */
+/* this op is "rsub-int", but can be thought of as "rsub-int/lit16" */
+/* File: arm64/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S w1, 1                       // w1<- ssssCCCC (sign-extended)
+    lsr     w2, wINST, #12              // w2<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w0, w2                     // w0<- vB
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    sub     w0, w1, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_lit16: /* 0xd2 */
+/* File: arm64/op_mul_int_lit16.S */
+/* must be "mul w0, w1, w0" -- "w0, w0, w1" is illegal */
+/* File: arm64/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S w1, 1                       // w1<- ssssCCCC (sign-extended)
+    lsr     w2, wINST, #12              // w2<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w0, w2                     // w0<- vB
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    mul     w0, w1, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_lit16: /* 0xd3 */
+/* File: arm64/op_div_int_lit16.S */
+/* File: arm64/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S w1, 1                       // w1<- ssssCCCC (sign-extended)
+    lsr     w2, wINST, #12              // w2<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w0, w2                     // w0<- vB
+    .if 1
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    sdiv w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_lit16: /* 0xd4 */
+/* File: arm64/op_rem_int_lit16.S */
+/* File: arm64/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S w1, 1                       // w1<- ssssCCCC (sign-extended)
+    lsr     w2, wINST, #12              // w2<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w0, w2                     // w0<- vB
+    .if 1
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    sdiv w3, w0, w1
+    msub w0, w3, w1, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_lit16: /* 0xd5 */
+/* File: arm64/op_and_int_lit16.S */
+/* File: arm64/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S w1, 1                       // w1<- ssssCCCC (sign-extended)
+    lsr     w2, wINST, #12              // w2<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w0, w2                     // w0<- vB
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    and     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_lit16: /* 0xd6 */
+/* File: arm64/op_or_int_lit16.S */
+/* File: arm64/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S w1, 1                       // w1<- ssssCCCC (sign-extended)
+    lsr     w2, wINST, #12              // w2<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w0, w2                     // w0<- vB
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    orr     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_lit16: /* 0xd7 */
+/* File: arm64/op_xor_int_lit16.S */
+/* File: arm64/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S w1, 1                       // w1<- ssssCCCC (sign-extended)
+    lsr     w2, wINST, #12              // w2<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w0, w2                     // w0<- vB
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    eor     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_lit8: /* 0xd8 */
+/* File: arm64/op_add_int_lit8.S */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    add     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rsub_int_lit8: /* 0xd9 */
+/* File: arm64/op_rsub_int_lit8.S */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    sub     w0, w1, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_lit8: /* 0xda */
+/* File: arm64/op_mul_int_lit8.S */
+/* must be "mul w0, w1, w0" -- "w0, w0, w1" is illegal */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    mul     w0, w1, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_lit8: /* 0xdb */
+/* File: arm64/op_div_int_lit8.S */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    .if 1
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    sdiv     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_lit8: /* 0xdc */
+/* File: arm64/op_rem_int_lit8.S */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    .if 1
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    sdiv w3, w0, w1                           // optional op; may set condition codes
+    msub w0, w3, w1, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_lit8: /* 0xdd */
+/* File: arm64/op_and_int_lit8.S */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    and     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_lit8: /* 0xde */
+/* File: arm64/op_or_int_lit8.S */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    orr     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_lit8: /* 0xdf */
+/* File: arm64/op_xor_int_lit8.S */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    eor     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int_lit8: /* 0xe0 */
+/* File: arm64/op_shl_int_lit8.S */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    and     w1, w1, #31                           // optional op; may set condition codes
+    lsl     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int_lit8: /* 0xe1 */
+/* File: arm64/op_shr_int_lit8.S */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    and     w1, w1, #31                           // optional op; may set condition codes
+    asr     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int_lit8: /* 0xe2 */
+/* File: arm64/op_ushr_int_lit8.S */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    and     w1, w1, #31                           // optional op; may set condition codes
+    lsr     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_quick: /* 0xe3 */
+/* File: arm64/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- object we're operating on
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cmp     x3, #0                      // check object for null
+    beq     common_errNullObject        // object was null
+    ldr   w0, [x3, x1]                // w0<- obj.field
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    SET_VREG w0, w2                     // fp[A]<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_wide_quick: /* 0xe4 */
+/* File: arm64/op_iget_wide_quick.S */
+    /* iget-wide-quick vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w4, 1                         // w4<- field byte offset
+    GET_VREG w3, w2                     // w3<- object we're operating on
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cbz     w3, common_errNullObject        // object was null
+    add     x4, x3, x4                  // create direct pointer
+    ldr     x0, [x4]
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    SET_VREG_WIDE x0, w2
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_object_quick: /* 0xe5 */
+/* File: arm64/op_iget_object_quick.S */
+    /* For: iget-object-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    EXPORT_PC
+    GET_VREG w0, w2                     // w0<- object we're operating on
+    bl      artIGetObjectFromMterp      // (obj, offset)
+    ldr     x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    PREFETCH_INST 2
+    cbnz    w3, MterpPossibleException      // bail out
+    SET_VREG_OBJECT w0, w2              // fp[A]<- w0
+    ADVANCE 2                           // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_quick: /* 0xe6 */
+/* File: arm64/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- fp[B], the object pointer
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cmp     w3, #0                      // check object for null
+    cbz     w3, common_errNullObject    // object was null
+    GET_VREG w0, w2                     // w0<- fp[A]
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    str     w0, [x3, x1]             // obj.field<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_wide_quick: /* 0xe7 */
+/* File: arm64/op_iput_wide_quick.S */
+    /* iput-wide-quick vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w3, 1                         // w3<- field byte offset
+    GET_VREG w2, w2                     // w2<- fp[B], the object pointer
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    cmp     w2, #0                      // check object for null
+    beq     common_errNullObject        // object was null
+    GET_VREG_WIDE x0, w0                // x0-< fp[A]
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    add     x1, x2, x3                  // create a direct pointer
+    str     x0, [x1]
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_object_quick: /* 0xe8 */
+/* File: arm64/op_iput_object_quick.S */
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     w2, wINST
+    bl      MterpIputObjectQuick
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_quick: /* 0xe9 */
+/* File: arm64/op_invoke_virtual_quick.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualQuick
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    // and     x3, xINST, 0xFFFF
+    mov     x3, xINST
+    bl      MterpInvokeVirtualQuick
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_range_quick: /* 0xea */
+/* File: arm64/op_invoke_virtual_range_quick.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualQuickRange
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    // and     x3, xINST, 0xFFFF
+    mov     x3, xINST
+    bl      MterpInvokeVirtualQuickRange
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_boolean_quick: /* 0xeb */
+/* File: arm64/op_iput_boolean_quick.S */
+/* File: arm64/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- fp[B], the object pointer
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cmp     w3, #0                      // check object for null
+    cbz     w3, common_errNullObject    // object was null
+    GET_VREG w0, w2                     // w0<- fp[A]
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    strb     w0, [x3, x1]             // obj.field<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_byte_quick: /* 0xec */
+/* File: arm64/op_iput_byte_quick.S */
+/* File: arm64/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- fp[B], the object pointer
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cmp     w3, #0                      // check object for null
+    cbz     w3, common_errNullObject    // object was null
+    GET_VREG w0, w2                     // w0<- fp[A]
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    strb     w0, [x3, x1]             // obj.field<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_char_quick: /* 0xed */
+/* File: arm64/op_iput_char_quick.S */
+/* File: arm64/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- fp[B], the object pointer
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cmp     w3, #0                      // check object for null
+    cbz     w3, common_errNullObject    // object was null
+    GET_VREG w0, w2                     // w0<- fp[A]
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    strh     w0, [x3, x1]             // obj.field<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_short_quick: /* 0xee */
+/* File: arm64/op_iput_short_quick.S */
+/* File: arm64/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- fp[B], the object pointer
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cmp     w3, #0                      // check object for null
+    cbz     w3, common_errNullObject    // object was null
+    GET_VREG w0, w2                     // w0<- fp[A]
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    strh     w0, [x3, x1]             // obj.field<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_boolean_quick: /* 0xef */
+/* File: arm64/op_iget_boolean_quick.S */
+/* File: arm64/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- object we're operating on
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cmp     x3, #0                      // check object for null
+    beq     common_errNullObject        // object was null
+    ldrb   w0, [x3, x1]                // w0<- obj.field
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    SET_VREG w0, w2                     // fp[A]<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_byte_quick: /* 0xf0 */
+/* File: arm64/op_iget_byte_quick.S */
+/* File: arm64/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- object we're operating on
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cmp     x3, #0                      // check object for null
+    beq     common_errNullObject        // object was null
+    ldrsb   w0, [x3, x1]                // w0<- obj.field
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    SET_VREG w0, w2                     // fp[A]<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_char_quick: /* 0xf1 */
+/* File: arm64/op_iget_char_quick.S */
+/* File: arm64/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- object we're operating on
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cmp     x3, #0                      // check object for null
+    beq     common_errNullObject        // object was null
+    ldrh   w0, [x3, x1]                // w0<- obj.field
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    SET_VREG w0, w2                     // fp[A]<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_short_quick: /* 0xf2 */
+/* File: arm64/op_iget_short_quick.S */
+/* File: arm64/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- object we're operating on
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cmp     x3, #0                      // check object for null
+    beq     common_errNullObject        // object was null
+    ldrsh   w0, [x3, x1]                // w0<- obj.field
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    SET_VREG w0, w2                     // fp[A]<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_lambda: /* 0xf3 */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f4: /* 0xf4 */
+/* File: arm64/op_unused_f4.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_capture_variable: /* 0xf5 */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_create_lambda: /* 0xf6 */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_liberate_variable: /* 0xf7 */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_box_lambda: /* 0xf8 */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unbox_lambda: /* 0xf9 */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fa: /* 0xfa */
+/* File: arm64/op_unused_fa.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fb: /* 0xfb */
+/* File: arm64/op_unused_fb.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fc: /* 0xfc */
+/* File: arm64/op_unused_fc.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fd: /* 0xfd */
+/* File: arm64/op_unused_fd.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fe: /* 0xfe */
+/* File: arm64/op_unused_fe.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_ff: /* 0xff */
+/* File: arm64/op_unused_ff.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+    .balign 128
+    .size   artMterpAsmInstructionStart, .-artMterpAsmInstructionStart
+    .global artMterpAsmInstructionEnd
+artMterpAsmInstructionEnd:
+
+/*
+ * ===========================================================================
+ *  Sister implementations
+ * ===========================================================================
+ */
+    .global artMterpAsmSisterStart
+    .type   artMterpAsmSisterStart, %function
+    .text
+    .balign 4
+artMterpAsmSisterStart:
+
+    .size   artMterpAsmSisterStart, .-artMterpAsmSisterStart
+    .global artMterpAsmSisterEnd
+artMterpAsmSisterEnd:
+
+
+    .global artMterpAsmAltInstructionStart
+    .type   artMterpAsmAltInstructionStart, %function
+    .text
+
+artMterpAsmAltInstructionStart = .L_ALT_op_nop
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_nop: /* 0x00 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (0 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move: /* 0x01 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (1 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_from16: /* 0x02 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (2 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_16: /* 0x03 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (3 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide: /* 0x04 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (4 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide_from16: /* 0x05 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (5 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide_16: /* 0x06 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (6 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object: /* 0x07 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (7 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object_from16: /* 0x08 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (8 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object_16: /* 0x09 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (9 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result: /* 0x0a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (10 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result_wide: /* 0x0b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (11 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result_object: /* 0x0c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (12 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_exception: /* 0x0d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (13 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_void: /* 0x0e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (14 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return: /* 0x0f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (15 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_wide: /* 0x10 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (16 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_object: /* 0x11 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (17 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_4: /* 0x12 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (18 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_16: /* 0x13 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (19 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const: /* 0x14 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (20 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_high16: /* 0x15 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (21 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_16: /* 0x16 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (22 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_32: /* 0x17 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (23 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide: /* 0x18 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (24 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_high16: /* 0x19 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (25 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_string: /* 0x1a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (26 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_string_jumbo: /* 0x1b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (27 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_class: /* 0x1c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (28 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_monitor_enter: /* 0x1d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (29 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_monitor_exit: /* 0x1e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (30 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_check_cast: /* 0x1f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (31 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_instance_of: /* 0x20 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (32 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_array_length: /* 0x21 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (33 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_new_instance: /* 0x22 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (34 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_new_array: /* 0x23 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (35 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_filled_new_array: /* 0x24 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (36 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_filled_new_array_range: /* 0x25 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (37 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_fill_array_data: /* 0x26 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (38 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_throw: /* 0x27 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (39 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto: /* 0x28 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (40 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto_16: /* 0x29 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (41 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto_32: /* 0x2a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (42 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_packed_switch: /* 0x2b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (43 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sparse_switch: /* 0x2c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (44 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpl_float: /* 0x2d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (45 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpg_float: /* 0x2e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (46 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpl_double: /* 0x2f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (47 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpg_double: /* 0x30 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (48 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmp_long: /* 0x31 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (49 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_eq: /* 0x32 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (50 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ne: /* 0x33 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (51 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_lt: /* 0x34 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (52 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ge: /* 0x35 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (53 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gt: /* 0x36 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (54 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_le: /* 0x37 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (55 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_eqz: /* 0x38 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (56 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_nez: /* 0x39 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (57 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ltz: /* 0x3a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (58 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gez: /* 0x3b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (59 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gtz: /* 0x3c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (60 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_lez: /* 0x3d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (61 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_3e: /* 0x3e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (62 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_3f: /* 0x3f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (63 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_40: /* 0x40 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (64 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_41: /* 0x41 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (65 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_42: /* 0x42 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (66 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_43: /* 0x43 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (67 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget: /* 0x44 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (68 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_wide: /* 0x45 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (69 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_object: /* 0x46 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (70 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_boolean: /* 0x47 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (71 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_byte: /* 0x48 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (72 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_char: /* 0x49 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (73 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_short: /* 0x4a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (74 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput: /* 0x4b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (75 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_wide: /* 0x4c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (76 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_object: /* 0x4d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (77 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_boolean: /* 0x4e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (78 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_byte: /* 0x4f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (79 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_char: /* 0x50 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (80 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_short: /* 0x51 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (81 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget: /* 0x52 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (82 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_wide: /* 0x53 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (83 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_object: /* 0x54 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (84 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_boolean: /* 0x55 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (85 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_byte: /* 0x56 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (86 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_char: /* 0x57 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (87 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_short: /* 0x58 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (88 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput: /* 0x59 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (89 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_wide: /* 0x5a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (90 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_object: /* 0x5b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (91 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_boolean: /* 0x5c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (92 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_byte: /* 0x5d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (93 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_char: /* 0x5e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (94 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_short: /* 0x5f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (95 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget: /* 0x60 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (96 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_wide: /* 0x61 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (97 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_object: /* 0x62 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (98 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_boolean: /* 0x63 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (99 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_byte: /* 0x64 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (100 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_char: /* 0x65 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (101 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_short: /* 0x66 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (102 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput: /* 0x67 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (103 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_wide: /* 0x68 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (104 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_object: /* 0x69 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (105 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_boolean: /* 0x6a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (106 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_byte: /* 0x6b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (107 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_char: /* 0x6c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (108 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_short: /* 0x6d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (109 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual: /* 0x6e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (110 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_super: /* 0x6f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (111 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_direct: /* 0x70 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (112 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_static: /* 0x71 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (113 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_interface: /* 0x72 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (114 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_void_no_barrier: /* 0x73 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (115 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_range: /* 0x74 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (116 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_super_range: /* 0x75 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (117 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_direct_range: /* 0x76 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (118 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_static_range: /* 0x77 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (119 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_interface_range: /* 0x78 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (120 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_79: /* 0x79 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (121 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_7a: /* 0x7a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (122 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_int: /* 0x7b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (123 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_not_int: /* 0x7c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (124 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_long: /* 0x7d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (125 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_not_long: /* 0x7e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (126 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_float: /* 0x7f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (127 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_double: /* 0x80 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (128 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_long: /* 0x81 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (129 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_float: /* 0x82 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (130 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_double: /* 0x83 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (131 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_int: /* 0x84 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (132 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_float: /* 0x85 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (133 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_double: /* 0x86 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (134 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_int: /* 0x87 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (135 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_long: /* 0x88 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (136 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_double: /* 0x89 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (137 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_int: /* 0x8a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (138 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_long: /* 0x8b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (139 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_float: /* 0x8c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (140 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_byte: /* 0x8d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (141 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_char: /* 0x8e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (142 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_short: /* 0x8f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (143 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int: /* 0x90 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (144 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_int: /* 0x91 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (145 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int: /* 0x92 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (146 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int: /* 0x93 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (147 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int: /* 0x94 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (148 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int: /* 0x95 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (149 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int: /* 0x96 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (150 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int: /* 0x97 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (151 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int: /* 0x98 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (152 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int: /* 0x99 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (153 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int: /* 0x9a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (154 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_long: /* 0x9b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (155 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_long: /* 0x9c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (156 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_long: /* 0x9d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (157 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_long: /* 0x9e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (158 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_long: /* 0x9f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (159 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_long: /* 0xa0 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (160 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_long: /* 0xa1 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (161 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_long: /* 0xa2 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (162 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_long: /* 0xa3 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (163 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_long: /* 0xa4 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (164 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_long: /* 0xa5 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (165 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_float: /* 0xa6 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (166 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_float: /* 0xa7 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (167 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_float: /* 0xa8 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (168 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_float: /* 0xa9 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (169 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_float: /* 0xaa */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (170 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_double: /* 0xab */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (171 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_double: /* 0xac */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (172 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_double: /* 0xad */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (173 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_double: /* 0xae */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (174 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_double: /* 0xaf */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (175 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_2addr: /* 0xb0 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (176 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_int_2addr: /* 0xb1 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (177 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_2addr: /* 0xb2 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (178 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_2addr: /* 0xb3 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (179 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_2addr: /* 0xb4 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (180 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_2addr: /* 0xb5 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (181 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_2addr: /* 0xb6 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (182 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_2addr: /* 0xb7 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (183 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int_2addr: /* 0xb8 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (184 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int_2addr: /* 0xb9 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (185 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int_2addr: /* 0xba */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (186 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_long_2addr: /* 0xbb */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (187 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_long_2addr: /* 0xbc */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (188 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_long_2addr: /* 0xbd */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (189 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_long_2addr: /* 0xbe */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (190 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_long_2addr: /* 0xbf */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (191 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_long_2addr: /* 0xc0 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (192 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_long_2addr: /* 0xc1 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (193 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_long_2addr: /* 0xc2 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (194 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_long_2addr: /* 0xc3 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (195 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_long_2addr: /* 0xc4 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (196 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_long_2addr: /* 0xc5 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (197 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_float_2addr: /* 0xc6 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (198 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_float_2addr: /* 0xc7 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (199 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_float_2addr: /* 0xc8 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (200 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_float_2addr: /* 0xc9 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (201 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_float_2addr: /* 0xca */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (202 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_double_2addr: /* 0xcb */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (203 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_double_2addr: /* 0xcc */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (204 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_double_2addr: /* 0xcd */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (205 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_double_2addr: /* 0xce */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (206 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_double_2addr: /* 0xcf */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (207 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_lit16: /* 0xd0 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (208 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rsub_int: /* 0xd1 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (209 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_lit16: /* 0xd2 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (210 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_lit16: /* 0xd3 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (211 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_lit16: /* 0xd4 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (212 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_lit16: /* 0xd5 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (213 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_lit16: /* 0xd6 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (214 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_lit16: /* 0xd7 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (215 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_lit8: /* 0xd8 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (216 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rsub_int_lit8: /* 0xd9 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (217 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_lit8: /* 0xda */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (218 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_lit8: /* 0xdb */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (219 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_lit8: /* 0xdc */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (220 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_lit8: /* 0xdd */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (221 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_lit8: /* 0xde */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (222 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_lit8: /* 0xdf */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (223 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int_lit8: /* 0xe0 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (224 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int_lit8: /* 0xe1 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (225 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int_lit8: /* 0xe2 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (226 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_quick: /* 0xe3 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (227 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_wide_quick: /* 0xe4 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (228 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_object_quick: /* 0xe5 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (229 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_quick: /* 0xe6 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (230 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_wide_quick: /* 0xe7 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (231 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_object_quick: /* 0xe8 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (232 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_quick: /* 0xe9 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (233 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_range_quick: /* 0xea */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (234 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_boolean_quick: /* 0xeb */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (235 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_byte_quick: /* 0xec */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (236 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_char_quick: /* 0xed */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (237 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_short_quick: /* 0xee */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (238 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_boolean_quick: /* 0xef */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (239 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_byte_quick: /* 0xf0 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (240 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_char_quick: /* 0xf1 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (241 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_short_quick: /* 0xf2 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (242 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_lambda: /* 0xf3 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (243 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f4: /* 0xf4 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (244 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_capture_variable: /* 0xf5 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (245 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_create_lambda: /* 0xf6 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (246 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_liberate_variable: /* 0xf7 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (247 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_box_lambda: /* 0xf8 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (248 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unbox_lambda: /* 0xf9 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (249 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fa: /* 0xfa */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (250 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fb: /* 0xfb */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (251 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fc: /* 0xfc */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (252 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fd: /* 0xfd */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (253 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fe: /* 0xfe */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (254 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_ff: /* 0xff */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (255 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+    .balign 128
+    .size   artMterpAsmAltInstructionStart, .-artMterpAsmAltInstructionStart
+    .global artMterpAsmAltInstructionEnd
+artMterpAsmAltInstructionEnd:
+/* File: arm64/footer.S */
+/*
+ * ===========================================================================
+ *  Common subroutines and data
+ * ===========================================================================
+ */
+
+
+/*
+ * We've detected a condition that will result in an exception, but the exception
+ * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
+ * TUNING: for consistency, we may want to just go ahead and handle these here.
+ */
+#define MTERP_LOGGING 0
+common_errDivideByZero:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogDivideByZeroException
+#endif
+    b MterpCommonFallback
+
+common_errArrayIndex:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogArrayIndexException
+#endif
+    b MterpCommonFallback
+
+common_errNegativeArraySize:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNegativeArraySizeException
+#endif
+    b MterpCommonFallback
+
+common_errNoSuchMethod:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNoSuchMethodException
+#endif
+    b MterpCommonFallback
+
+common_errNullObject:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNullObjectException
+#endif
+    b MterpCommonFallback
+
+common_exceptionThrown:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogExceptionThrownException
+#endif
+    b MterpCommonFallback
+
+MterpSuspendFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    ldr  x2, [xSELF, #THREAD_FLAGS_OFFSET]
+    bl MterpLogSuspendFallback
+#endif
+    b MterpCommonFallback
+
+/*
+ * If we're here, something is out of the ordinary.  If there is a pending
+ * exception, handle it.  Otherwise, roll back and retry with the reference
+ * interpreter.
+ */
+MterpPossibleException:
+    ldr     x0, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    cbz     x0, MterpFallback                       // If not, fall back to reference interpreter.
+    /* intentional fallthrough - handle pending exception. */
+/*
+ * On return from a runtime helper routine, we've found a pending exception.
+ * Can we handle it here - or need to bail out to caller?
+ *
+ */
+MterpException:
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    bl      MterpHandleException                    // (self, shadow_frame)
+    cbz     w0, MterpExceptionReturn                // no local catch, back to caller.
+    ldr     x0, [xFP, #OFF_FP_CODE_ITEM]
+    ldr     w1, [xFP, #OFF_FP_DEX_PC]
+    ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
+    add     xPC, x0, #CODEITEM_INSNS_OFFSET
+    add     xPC, xPC, x1, lsl #1                    // generate new dex_pc_ptr
+    str     xPC, [xFP, #OFF_FP_DEX_PC_PTR]
+    /* resume execution at catch block */
+    FETCH_INST
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+    /* NOTE: no fallthrough */
+
+/*
+ * Check for suspend check request.  Assumes wINST already loaded, xPC advanced and
+ * still needs to get the opcode and branch to it, and flags are in lr.
+ */
+MterpCheckSuspendAndContinue:
+    ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh xIBASE
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    check1
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+check1:
+    EXPORT_PC
+    mov     x0, xSELF
+    bl      MterpSuspendCheck           // (self)
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/*
+ * Bail out to reference interpreter.
+ */
+MterpFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogFallback
+#endif
+MterpCommonFallback:
+    mov     x0, #0                                  // signal retry with reference interpreter.
+    b       MterpDone
+
+/*
+ * We pushed some registers on the stack in ExecuteMterpImpl, then saved
+ * SP and LR.  Here we restore SP, restore the registers, and then restore
+ * LR to PC.
+ *
+ * On entry:
+ *  uint32_t* xFP  (should still be live, pointer to base of vregs)
+ */
+MterpExceptionReturn:
+    mov     x0, #1                                  // signal return to caller.
+    b MterpDone
+MterpReturn:
+    ldr     x2, [xFP, #OFF_FP_RESULT_REGISTER]
+    ldr     lr, [xSELF, #THREAD_FLAGS_OFFSET]
+    str     x0, [x2]
+    mov     x0, xSELF
+    ands    lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.eq    check2
+    bl      MterpSuspendCheck                       // (self)
+check2:
+    mov     x0, #1                                  // signal return to caller.
+MterpDone:
+    ldp     fp, lr, [sp, #48]
+    ldp     xPC, xFP, [sp, #32]
+    ldp     xSELF, xINST, [sp, #16]
+    ldp     xIBASE, xREFS, [sp], #64
+    ret
+
+    .cfi_endproc
+    .size   ExecuteMterpImpl, .-ExecuteMterpImpl
+
+
diff --git a/runtime/interpreter/mterp/rebuild.sh b/runtime/interpreter/mterp/rebuild.sh
index 8b26976..ac87945 100755
--- a/runtime/interpreter/mterp/rebuild.sh
+++ b/runtime/interpreter/mterp/rebuild.sh
@@ -21,4 +21,4 @@
 set -e
 
 # for arch in arm x86 mips arm64 x86_64 mips64; do TARGET_ARCH_EXT=$arch make -f Makefile_mterp; done
-for arch in arm x86; do TARGET_ARCH_EXT=$arch make -f Makefile_mterp; done
+for arch in arm x86 arm64 ; do TARGET_ARCH_EXT=$arch make -f Makefile_mterp; done
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 4e0146c..3e152e1 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -25,10 +25,12 @@
 #include "jit_code_cache.h"
 #include "jit_instrumentation.h"
 #include "oat_file_manager.h"
+#include "oat_quick_method_header.h"
 #include "offline_profiling_info.h"
 #include "profile_saver.h"
 #include "runtime.h"
 #include "runtime_options.h"
+#include "stack_map.h"
 #include "utils.h"
 
 namespace art {
@@ -43,6 +45,8 @@
       options.GetOrDefault(RuntimeArgumentMap::JITCodeCacheMaxCapacity);
   jit_options->compile_threshold_ =
       options.GetOrDefault(RuntimeArgumentMap::JITCompileThreshold);
+  // TODO(ngeoffray): Make this a proper option.
+  jit_options->osr_threshold_ = jit_options->compile_threshold_ * 2;
   jit_options->warmup_threshold_ =
       options.GetOrDefault(RuntimeArgumentMap::JITWarmupThreshold);
   jit_options->dump_info_on_shutdown_ =
@@ -121,7 +125,7 @@
     *error_msg = "JIT couldn't find jit_unload entry point";
     return false;
   }
-  jit_compile_method_ = reinterpret_cast<bool (*)(void*, ArtMethod*, Thread*)>(
+  jit_compile_method_ = reinterpret_cast<bool (*)(void*, ArtMethod*, Thread*, bool)>(
       dlsym(jit_library_handle_, "jit_compile_method"));
   if (jit_compile_method_ == nullptr) {
     dlclose(jit_library_handle_);
@@ -156,7 +160,7 @@
   return true;
 }
 
-bool Jit::CompileMethod(ArtMethod* method, Thread* self) {
+bool Jit::CompileMethod(ArtMethod* method, Thread* self, bool osr) {
   DCHECK(!method->IsRuntimeMethod());
   // Don't compile the method if it has breakpoints.
   if (Dbg::IsDebuggerActive() && Dbg::MethodHasAnyBreakpoints(method)) {
@@ -171,10 +175,11 @@
     return false;
   }
 
-  if (!code_cache_->NotifyCompilationOf(method, self)) {
+  if (!code_cache_->NotifyCompilationOf(method, self, osr)) {
+    VLOG(jit) << "JIT not compiling " << PrettyMethod(method) << " due to code cache";
     return false;
   }
-  bool success = jit_compile_method_(jit_compiler_handle_, method, self);
+  bool success = jit_compile_method_(jit_compiler_handle_, method, self, osr);
   code_cache_->DoneCompiling(method, self);
   return success;
 }
@@ -224,9 +229,11 @@
   }
 }
 
-void Jit::CreateInstrumentationCache(size_t compile_threshold, size_t warmup_threshold) {
+void Jit::CreateInstrumentationCache(size_t compile_threshold,
+                                     size_t warmup_threshold,
+                                     size_t osr_threshold) {
   instrumentation_cache_.reset(
-      new jit::JitInstrumentationCache(compile_threshold, warmup_threshold));
+      new jit::JitInstrumentationCache(compile_threshold, warmup_threshold, osr_threshold));
 }
 
 void Jit::NewTypeLoadedIfUsingJit(mirror::Class* type) {
@@ -239,7 +246,7 @@
 
 void Jit::DumpTypeInfoForLoadedTypes(ClassLinker* linker) {
   struct CollectClasses : public ClassVisitor {
-    bool Visit(mirror::Class* klass) override {
+    bool operator()(mirror::Class* klass) override {
       classes_.push_back(klass);
       return true;
     }
@@ -255,5 +262,120 @@
   }
 }
 
+extern "C" void art_quick_osr_stub(void** stack,
+                                   uint32_t stack_size_in_bytes,
+                                   const uint8_t* native_pc,
+                                   JValue* result,
+                                   const char* shorty,
+                                   Thread* self);
+
+bool Jit::MaybeDoOnStackReplacement(Thread* thread,
+                                    ArtMethod* method,
+                                    uint32_t dex_pc,
+                                    int32_t dex_pc_offset,
+                                    JValue* result) {
+  Jit* jit = Runtime::Current()->GetJit();
+  if (jit == nullptr) {
+    return false;
+  }
+
+  if (kRuntimeISA == kMips || kRuntimeISA == kMips64) {
+    VLOG(jit) << "OSR not supported on this platform";
+    return false;
+  }
+
+  // Cheap check if the method has been compiled already. That's an indicator that we should
+  // osr into it.
+  if (!jit->GetCodeCache()->ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
+    return false;
+  }
+
+  const OatQuickMethodHeader* osr_method = jit->GetCodeCache()->LookupOsrMethodHeader(method);
+  if (osr_method == nullptr) {
+    // No osr method yet, just return to the interpreter.
+    return false;
+  }
+
+  const size_t number_of_vregs = method->GetCodeItem()->registers_size_;
+  CodeInfo code_info = osr_method->GetOptimizedCodeInfo();
+  StackMapEncoding encoding = code_info.ExtractEncoding();
+
+  // Find stack map starting at the target dex_pc.
+  StackMap stack_map = code_info.GetOsrStackMapForDexPc(dex_pc + dex_pc_offset, encoding);
+  if (!stack_map.IsValid()) {
+    // There is no OSR stack map for this dex pc offset. Just return to the interpreter in the
+    // hope that the next branch has one.
+    return false;
+  }
+
+  // We found a stack map, now fill the frame with dex register values from the interpreter's
+  // shadow frame.
+  DexRegisterMap vreg_map =
+      code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_vregs);
+
+  ShadowFrame* shadow_frame = thread->PopShadowFrame();
+
+  size_t frame_size = osr_method->GetFrameSizeInBytes();
+  void** memory = reinterpret_cast<void**>(malloc(frame_size));
+  memset(memory, 0, frame_size);
+
+  // Art ABI: ArtMethod is at the bottom of the stack.
+  memory[0] = method;
+
+  if (!vreg_map.IsValid()) {
+    // If we don't have a dex register map, then there are no live dex registers at
+    // this dex pc.
+  } else {
+    for (uint16_t vreg = 0; vreg < number_of_vregs; ++vreg) {
+      DexRegisterLocation::Kind location =
+          vreg_map.GetLocationKind(vreg, number_of_vregs, code_info, encoding);
+      if (location == DexRegisterLocation::Kind::kNone) {
+        // Dex register is dead or unitialized.
+        continue;
+      }
+
+      if (location == DexRegisterLocation::Kind::kConstant) {
+        // We skip constants because the compiled code knows how to handle them.
+        continue;
+      }
+
+      DCHECK(location == DexRegisterLocation::Kind::kInStack);
+
+      int32_t vreg_value = shadow_frame->GetVReg(vreg);
+      int32_t slot_offset = vreg_map.GetStackOffsetInBytes(vreg,
+                                                           number_of_vregs,
+                                                           code_info,
+                                                           encoding);
+      DCHECK_LT(slot_offset, static_cast<int32_t>(frame_size));
+      DCHECK_GT(slot_offset, 0);
+      (reinterpret_cast<int32_t*>(memory))[slot_offset / sizeof(int32_t)] = vreg_value;
+    }
+  }
+
+  const uint8_t* native_pc = stack_map.GetNativePcOffset(encoding) + osr_method->GetEntryPoint();
+  VLOG(jit) << "Jumping to "
+            << PrettyMethod(method)
+            << "@"
+            << std::hex << reinterpret_cast<uintptr_t>(native_pc);
+  {
+    ManagedStack fragment;
+    thread->PushManagedStackFragment(&fragment);
+    (*art_quick_osr_stub)(memory,
+                          frame_size,
+                          native_pc,
+                          result,
+                          method->GetInterfaceMethodIfProxy(sizeof(void*))->GetShorty(),
+                          thread);
+    if (UNLIKELY(thread->GetException() == Thread::GetDeoptimizationException())) {
+      thread->DeoptimizeWithDeoptimizationException(result);
+    }
+    thread->PopManagedStackFragment(fragment);
+  }
+  free(memory);
+  thread->PushShadowFrame(shadow_frame);
+  VLOG(jit) << "Done running OSR code for " << PrettyMethod(method);
+  return true;
+}
+
 }  // namespace jit
 }  // namespace art
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index a80f51f..042da92 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -49,9 +49,11 @@
 
   virtual ~Jit();
   static Jit* Create(JitOptions* options, std::string* error_msg);
-  bool CompileMethod(ArtMethod* method, Thread* self)
+  bool CompileMethod(ArtMethod* method, Thread* self, bool osr)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  void CreateInstrumentationCache(size_t compile_threshold, size_t warmup_threshold);
+  void CreateInstrumentationCache(size_t compile_threshold,
+                                  size_t warmup_threshold,
+                                  size_t osr_threshold);
   void CreateThreadPool();
   CompilerCallbacks* GetCompilerCallbacks() {
     return compiler_callbacks_;
@@ -88,6 +90,17 @@
 
   bool JitAtFirstUse();
 
+  // If an OSR compiled version is available for `method`,
+  // and `dex_pc + dex_pc_offset` is an entry point of that compiled
+  // version, this method will jump to the compiled code, let it run,
+  // and return true afterwards. Return false otherwise.
+  static bool MaybeDoOnStackReplacement(Thread* thread,
+                                        ArtMethod* method,
+                                        uint32_t dex_pc,
+                                        int32_t dex_pc_offset,
+                                        JValue* result)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   Jit();
   bool LoadCompiler(std::string* error_msg);
@@ -97,7 +110,7 @@
   void* jit_compiler_handle_;
   void* (*jit_load_)(CompilerCallbacks**, bool*);
   void (*jit_unload_)(void*);
-  bool (*jit_compile_method_)(void*, ArtMethod*, Thread*);
+  bool (*jit_compile_method_)(void*, ArtMethod*, Thread*, bool);
   void (*jit_types_loaded_)(void*, mirror::Class**, size_t count);
 
   // Performance monitoring.
@@ -123,6 +136,9 @@
   size_t GetWarmupThreshold() const {
     return warmup_threshold_;
   }
+  size_t GetOsrThreshold() const {
+    return osr_threshold_;
+  }
   size_t GetCodeCacheInitialCapacity() const {
     return code_cache_initial_capacity_;
   }
@@ -155,6 +171,7 @@
   size_t code_cache_max_capacity_;
   size_t compile_threshold_;
   size_t warmup_threshold_;
+  size_t osr_threshold_;
   bool dump_info_on_shutdown_;
   bool save_profiling_info_;
 
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index f325949..464c441 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -184,7 +184,8 @@
                                   size_t core_spill_mask,
                                   size_t fp_spill_mask,
                                   const uint8_t* code,
-                                  size_t code_size) {
+                                  size_t code_size,
+                                  bool osr) {
   uint8_t* result = CommitCodeInternal(self,
                                        method,
                                        mapping_table,
@@ -194,7 +195,8 @@
                                        core_spill_mask,
                                        fp_spill_mask,
                                        code,
-                                       code_size);
+                                       code_size,
+                                       osr);
   if (result == nullptr) {
     // Retry.
     GarbageCollectCache(self);
@@ -207,7 +209,8 @@
                                 core_spill_mask,
                                 fp_spill_mask,
                                 code,
-                                code_size);
+                                code_size,
+                                osr);
   }
   return result;
 }
@@ -287,7 +290,8 @@
                                           size_t core_spill_mask,
                                           size_t fp_spill_mask,
                                           const uint8_t* code,
-                                          size_t code_size) {
+                                          size_t code_size,
+                                          bool osr) {
   size_t alignment = GetInstructionSetAlignment(kRuntimeISA);
   // Ensure the header ends up at expected instruction alignment.
   size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment);
@@ -329,8 +333,12 @@
   {
     MutexLock mu(self, lock_);
     method_code_map_.Put(code_ptr, method);
-    Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
-        method, method_header->GetEntryPoint());
+    if (osr) {
+      osr_code_map_.Put(method, code_ptr);
+    } else {
+      Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
+          method, method_header->GetEntryPoint());
+    }
     if (collection_in_progress_) {
       // We need to update the live bitmap if there is a GC to ensure it sees this new
       // code.
@@ -338,7 +346,7 @@
     }
     last_update_time_ns_.StoreRelease(NanoTime());
     VLOG(jit)
-        << "JIT added "
+        << "JIT added (osr = " << std::boolalpha << osr << std::noboolalpha << ") "
         << PrettyMethod(method) << "@" << method
         << " ccache_size=" << PrettySize(CodeCacheSizeLocked()) << ": "
         << " dcache_size=" << PrettySize(DataCacheSizeLocked()) << ": "
@@ -569,6 +577,10 @@
         info->GetMethod()->SetProfilingInfo(nullptr);
       }
     }
+
+    // Empty osr method map, as osr compile code will be deleted (except the ones
+    // on thread stacks).
+    osr_code_map_.clear();
   }
 
   // Run a checkpoint on all threads to mark the JIT compiled code they are running.
@@ -662,6 +674,15 @@
   return method_header;
 }
 
+OatQuickMethodHeader* JitCodeCache::LookupOsrMethodHeader(ArtMethod* method) {
+  MutexLock mu(Thread::Current(), lock_);
+  auto it = osr_code_map_.find(method);
+  if (it == osr_code_map_.end()) {
+    return nullptr;
+  }
+  return OatQuickMethodHeader::FromCodePointer(it->second);
+}
+
 ProfilingInfo* JitCodeCache::AddProfilingInfo(Thread* self,
                                               ArtMethod* method,
                                               const std::vector<uint32_t>& entries,
@@ -733,12 +754,15 @@
   return last_update_time_ns_.LoadAcquire();
 }
 
-bool JitCodeCache::NotifyCompilationOf(ArtMethod* method, Thread* self) {
-  if (ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
+bool JitCodeCache::NotifyCompilationOf(ArtMethod* method, Thread* self, bool osr) {
+  if (!osr && ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
     return false;
   }
 
   MutexLock mu(self, lock_);
+  if (osr && (osr_code_map_.find(method) != osr_code_map_.end())) {
+    return false;
+  }
   ProfilingInfo* info = method->GetProfilingInfo(sizeof(void*));
   if (info == nullptr || info->IsMethodBeingCompiled()) {
     return false;
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 69fc553..048f8d0 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -71,7 +71,7 @@
   // Number of compilations done throughout the lifetime of the JIT.
   size_t NumberOfCompilations() REQUIRES(!lock_);
 
-  bool NotifyCompilationOf(ArtMethod* method, Thread* self)
+  bool NotifyCompilationOf(ArtMethod* method, Thread* self, bool osr)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!lock_);
 
@@ -89,7 +89,8 @@
                       size_t core_spill_mask,
                       size_t fp_spill_mask,
                       const uint8_t* code,
-                      size_t code_size)
+                      size_t code_size,
+                      bool osr)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!lock_);
 
@@ -131,6 +132,10 @@
       REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  OatQuickMethodHeader* LookupOsrMethodHeader(ArtMethod* method)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Remove all methods in our cache that were allocated by 'alloc'.
   void RemoveMethodsIn(Thread* self, const LinearAlloc& alloc)
       REQUIRES(!lock_)
@@ -187,7 +192,8 @@
                               size_t core_spill_mask,
                               size_t fp_spill_mask,
                               const uint8_t* code,
-                              size_t code_size)
+                              size_t code_size,
+                              bool osr)
       REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -237,8 +243,10 @@
   void* data_mspace_ GUARDED_BY(lock_);
   // Bitmap for collecting code and data.
   std::unique_ptr<CodeCacheBitmap> live_bitmap_;
-  // This map holds compiled code associated to the ArtMethod.
+  // Holds compiled code associated to the ArtMethod.
   SafeMap<const void*, ArtMethod*> method_code_map_ GUARDED_BY(lock_);
+  // Holds osr compiled code associated to the ArtMethod.
+  SafeMap<ArtMethod*, const void*> osr_code_map_ GUARDED_BY(lock_);
   // ProfilingInfo objects we have allocated.
   std::vector<ProfilingInfo*> profiling_infos_ GUARDED_BY(lock_);
 
diff --git a/runtime/jit/jit_instrumentation.cc b/runtime/jit/jit_instrumentation.cc
index d597b36..a4e40ad 100644
--- a/runtime/jit/jit_instrumentation.cc
+++ b/runtime/jit/jit_instrumentation.cc
@@ -29,7 +29,8 @@
  public:
   enum TaskKind {
     kAllocateProfile,
-    kCompile
+    kCompile,
+    kCompileOsr
   };
 
   JitCompileTask(ArtMethod* method, TaskKind kind) : method_(method), kind_(kind) {
@@ -48,9 +49,14 @@
     ScopedObjectAccess soa(self);
     if (kind_ == kCompile) {
       VLOG(jit) << "JitCompileTask compiling method " << PrettyMethod(method_);
-      if (!Runtime::Current()->GetJit()->CompileMethod(method_, self)) {
+      if (!Runtime::Current()->GetJit()->CompileMethod(method_, self, /* osr */ false)) {
         VLOG(jit) << "Failed to compile method " << PrettyMethod(method_);
       }
+    } else if (kind_ == kCompileOsr) {
+      VLOG(jit) << "JitCompileTask compiling method osr " << PrettyMethod(method_);
+      if (!Runtime::Current()->GetJit()->CompileMethod(method_, self, /* osr */ true)) {
+        VLOG(jit) << "Failed to compile method osr " << PrettyMethod(method_);
+      }
     } else {
       DCHECK(kind_ == kAllocateProfile);
       if (ProfilingInfo::Create(self, method_, /* retry_allocation */ true)) {
@@ -72,9 +78,11 @@
 };
 
 JitInstrumentationCache::JitInstrumentationCache(size_t hot_method_threshold,
-                                                 size_t warm_method_threshold)
+                                                 size_t warm_method_threshold,
+                                                 size_t osr_method_threshold)
     : hot_method_threshold_(hot_method_threshold),
       warm_method_threshold_(warm_method_threshold),
+      osr_method_threshold_(osr_method_threshold),
       listener_(this) {
 }
 
@@ -151,6 +159,11 @@
     DCHECK(thread_pool_ != nullptr);
     thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompile));
   }
+
+  if (sample_count == osr_method_threshold_) {
+    DCHECK(thread_pool_ != nullptr);
+    thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompileOsr));
+  }
 }
 
 JitInstrumentationListener::JitInstrumentationListener(JitInstrumentationCache* cache)
diff --git a/runtime/jit/jit_instrumentation.h b/runtime/jit/jit_instrumentation.h
index 06559ad..d1c5c44 100644
--- a/runtime/jit/jit_instrumentation.h
+++ b/runtime/jit/jit_instrumentation.h
@@ -96,7 +96,9 @@
 // Keeps track of which methods are hot.
 class JitInstrumentationCache {
  public:
-  JitInstrumentationCache(size_t hot_method_threshold, size_t warm_method_threshold);
+  JitInstrumentationCache(size_t hot_method_threshold,
+                          size_t warm_method_threshold,
+                          size_t osr_method_threshold);
   void AddSamples(Thread* self, ArtMethod* method, size_t samples)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void CreateThreadPool();
@@ -112,6 +114,7 @@
  private:
   size_t hot_method_threshold_;
   size_t warm_method_threshold_;
+  size_t osr_method_threshold_;
   JitInstrumentationListener listener_;
   std::unique_ptr<ThreadPool> thread_pool_;
 
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index b3439f7..c6fa15d 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -370,15 +370,17 @@
   }
 }
 
-template<typename T>
+template<typename T, VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline T PointerArray::GetElementPtrSize(uint32_t idx, size_t ptr_size) {
   // C style casts here since we sometimes have T be a pointer, or sometimes an integer
   // (for stack traces).
   if (ptr_size == 8) {
-    return (T)static_cast<uintptr_t>(AsLongArray()->GetWithoutChecks(idx));
+    return (T)static_cast<uintptr_t>(
+        AsLongArray<kVerifyFlags, kReadBarrierOption>()->GetWithoutChecks(idx));
   }
   DCHECK_EQ(ptr_size, 4u);
-  return (T)static_cast<uintptr_t>(AsIntArray()->GetWithoutChecks(idx));
+  return (T)static_cast<uintptr_t>(
+      AsIntArray<kVerifyFlags, kReadBarrierOption>()->GetWithoutChecks(idx));
 }
 
 template<bool kTransactionActive, bool kUnchecked>
@@ -401,12 +403,12 @@
                                                     ptr_size);
 }
 
-template <typename Visitor>
+template <VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption, typename Visitor>
 inline void PointerArray::Fixup(mirror::PointerArray* dest,
                                 size_t pointer_size,
                                 const Visitor& visitor) {
   for (size_t i = 0, count = GetLength(); i < count; ++i) {
-    void* ptr = GetElementPtrSize<void*>(i, pointer_size);
+    void* ptr = GetElementPtrSize<void*, kVerifyFlags, kReadBarrierOption>(i, pointer_size);
     void* new_ptr = visitor(ptr);
     if (ptr != new_ptr) {
       dest->SetElementPtrSize<false, true>(i, new_ptr, pointer_size);
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 2bd6c5b..9a21ec2 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -183,7 +183,9 @@
 // Either an IntArray or a LongArray.
 class PointerArray : public Array {
  public:
-  template<typename T>
+  template<typename T,
+           VerifyObjectFlags kVerifyFlags = kVerifyNone,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   T GetElementPtrSize(uint32_t idx, size_t ptr_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -196,7 +198,9 @@
 
   // Fixup the pointers in the dest arrays by passing our pointers through the visitor. Only copies
   // to dest if visitor(source_ptr) != source_ptr.
-  template <typename Visitor>
+  template <VerifyObjectFlags kVerifyFlags = kVerifyNone,
+            ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
+            typename Visitor>
   void Fixup(mirror::PointerArray* dest, size_t pointer_size, const Visitor& visitor)
       SHARED_REQUIRES(Locks::mutator_lock_);
 };
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index d5783c0..422832e 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -253,14 +253,16 @@
       EmbeddedImTableOffset(pointer_size).Uint32Value() + i * ImTableEntrySize(pointer_size));
 }
 
+template <VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline ArtMethod* Class::GetEmbeddedImTableEntry(uint32_t i, size_t pointer_size) {
-  DCHECK(ShouldHaveEmbeddedImtAndVTable());
+  DCHECK((ShouldHaveEmbeddedImtAndVTable<kVerifyFlags, kReadBarrierOption>()));
   return GetFieldPtrWithSize<ArtMethod*>(
       EmbeddedImTableEntryOffset(i, pointer_size), pointer_size);
 }
 
+template <VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline void Class::SetEmbeddedImTableEntry(uint32_t i, ArtMethod* method, size_t pointer_size) {
-  DCHECK(ShouldHaveEmbeddedImtAndVTable());
+  DCHECK((ShouldHaveEmbeddedImtAndVTable<kVerifyFlags, kReadBarrierOption>()));
   SetFieldPtrWithSize<false>(EmbeddedImTableEntryOffset(i, pointer_size), method, pointer_size);
 }
 
@@ -538,10 +540,11 @@
       : ClassOffset();
 }
 
+template <VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline MemberOffset Class::GetFirstReferenceStaticFieldOffset(size_t pointer_size) {
   DCHECK(IsResolved());
   uint32_t base = sizeof(mirror::Class);  // Static fields come after the class.
-  if (ShouldHaveEmbeddedImtAndVTable()) {
+  if (ShouldHaveEmbeddedImtAndVTable<kVerifyFlags, kReadBarrierOption>()) {
     // Static fields come after the embedded tables.
     base = mirror::Class::ComputeClassSize(
         true, GetEmbeddedVTableLength(), 0, 0, 0, 0, 0, pointer_size);
@@ -1057,7 +1060,7 @@
   return arr != nullptr ? arr->size() : 0u;
 }
 
-template <typename Visitor>
+template <VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption, typename Visitor>
 inline void Class::FixupNativePointers(mirror::Class* dest,
                                        size_t pointer_size,
                                        const Visitor& visitor) {
@@ -1085,7 +1088,7 @@
     dest->SetDexCacheStrings(new_strings);
   }
   // Fix up embedded tables.
-  if (!IsTemp() && ShouldHaveEmbeddedImtAndVTable()) {
+  if (!IsTemp() && ShouldHaveEmbeddedImtAndVTable<kVerifyNone, kReadBarrierOption>()) {
     for (int32_t i = 0, count = GetEmbeddedVTableLength(); i < count; ++i) {
       ArtMethod* method = GetEmbeddedVTableEntry(i, pointer_size);
       ArtMethod* new_method = visitor(method);
@@ -1094,10 +1097,13 @@
       }
     }
     for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
-      ArtMethod* method = GetEmbeddedImTableEntry(i, pointer_size);
+      ArtMethod* method = GetEmbeddedImTableEntry<kVerifyFlags, kReadBarrierOption>(i,
+                                                                                    pointer_size);
       ArtMethod* new_method = visitor(method);
       if (method != new_method) {
-        dest->SetEmbeddedImTableEntry(i, new_method, pointer_size);
+        dest->SetEmbeddedImTableEntry<kVerifyFlags, kReadBarrierOption>(i,
+                                                                        new_method,
+                                                                        pointer_size);
       }
     }
   }
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index b97d994..cdc6204 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -800,11 +800,11 @@
   return nullptr;
 }
 
-void Class::SetPreverifiedFlagOnAllMethods(size_t pointer_size) {
+void Class::SetSkipAccessChecksFlagOnAllMethods(size_t pointer_size) {
   DCHECK(IsVerified());
   for (auto& m : GetMethods(pointer_size)) {
     if (!m.IsNative() && m.IsInvokable()) {
-      m.SetPreverified();
+      m.SetSkipAccessChecks();
     }
   }
 }
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index be5c668..388a231 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -287,14 +287,18 @@
     return (GetAccessFlags() & kAccSynthetic) != 0;
   }
 
-  // Returns true if the class can avoid access checks.
-  bool IsPreverified() SHARED_REQUIRES(Locks::mutator_lock_) {
-    return (GetAccessFlags() & kAccPreverified) != 0;
+  // Return whether the class had run the verifier at least once.
+  // This does not necessarily mean that access checks are avoidable,
+  // since the class methods might still need to be run with access checks.
+  bool WasVerificationAttempted() SHARED_REQUIRES(Locks::mutator_lock_) {
+    return (GetAccessFlags() & kAccSkipAccessChecks) != 0;
   }
 
-  void SetPreverified() SHARED_REQUIRES(Locks::mutator_lock_) {
+  // Mark the class as having gone through a verification attempt.
+  // Mutually exclusive from whether or not each method is allowed to skip access checks.
+  void SetVerificationAttempted() SHARED_REQUIRES(Locks::mutator_lock_) {
     uint32_t flags = GetField32(OFFSET_OF_OBJECT_MEMBER(Class, access_flags_));
-    SetAccessFlags(flags | kAccPreverified);
+    SetAccessFlags(flags | kAccVerificationAttempted);
   }
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
@@ -444,7 +448,6 @@
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-
   bool IsArrayClass() SHARED_REQUIRES(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
@@ -489,9 +492,11 @@
     return !IsPrimitive() && !IsInterface() && !IsAbstract() && !IsArrayClass();
   }
 
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsInstantiable() SHARED_REQUIRES(Locks::mutator_lock_) {
     return (!IsPrimitive() && !IsInterface() && !IsAbstract()) ||
-        (IsAbstract() && IsArrayClass());
+        (IsAbstract() && IsArrayClass<kVerifyFlags, kReadBarrierOption>());
   }
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
@@ -561,7 +566,7 @@
   // The size of java.lang.Class.class.
   static uint32_t ClassClassSize(size_t pointer_size) {
     // The number of vtable entries in java.lang.Class.
-    uint32_t vtable_entries = Object::kVTableLength + 69;
+    uint32_t vtable_entries = Object::kVTableLength + 72;
     return ComputeClassSize(true, vtable_entries, 0, 0, 4, 1, 0, pointer_size);
   }
 
@@ -810,8 +815,10 @@
     return MemberOffset(sizeof(Class));
   }
 
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool ShouldHaveEmbeddedImtAndVTable() SHARED_REQUIRES(Locks::mutator_lock_) {
-    return IsInstantiable();
+    return IsInstantiable<kVerifyFlags, kReadBarrierOption>();
   }
 
   bool HasVTable() SHARED_REQUIRES(Locks::mutator_lock_);
@@ -820,9 +827,13 @@
 
   static MemberOffset EmbeddedVTableEntryOffset(uint32_t i, size_t pointer_size);
 
+  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ArtMethod* GetEmbeddedImTableEntry(uint32_t i, size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   void SetEmbeddedImTableEntry(uint32_t i, ArtMethod* method, size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -1015,6 +1026,8 @@
   }
 
   // Get the offset of the first reference static field. Other reference static fields follow.
+  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   MemberOffset GetFirstReferenceStaticFieldOffset(size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -1127,8 +1140,8 @@
   void VisitNativeRoots(Visitor& visitor, size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // When class is verified, set the kAccPreverified flag on each method.
-  void SetPreverifiedFlagOnAllMethods(size_t pointer_size)
+  // When class is verified, set the kAccSkipAccessChecks flag on each method.
+  void SetSkipAccessChecksFlagOnAllMethods(size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Get the descriptor of the class. In a few cases a std::string is required, rather than
@@ -1238,7 +1251,9 @@
   // the corresponding entry in dest if visitor(obj) != obj to prevent dirty memory. Dest should be
   // initialized to a copy of *this to prevent issues. Does not visit the ArtMethod and ArtField
   // roots.
-  template <typename Visitor>
+  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+            ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
+            typename Visitor>
   void FixupNativePointers(mirror::Class* dest, size_t pointer_size, const Visitor& visitor)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/mirror/iftable.h b/runtime/mirror/iftable.h
index 605deac..d6571f2 100644
--- a/runtime/mirror/iftable.h
+++ b/runtime/mirror/iftable.h
@@ -43,8 +43,11 @@
     return method_array;
   }
 
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   size_t GetMethodArrayCount(int32_t i) SHARED_REQUIRES(Locks::mutator_lock_) {
-    auto* method_array = down_cast<PointerArray*>(Get((i * kMax) + kMethodArray));
+    auto* method_array = down_cast<PointerArray*>(
+        Get<kVerifyFlags, kReadBarrierOption>((i * kMax) + kMethodArray));
     return method_array == nullptr ? 0u : method_array->GetLength();
   }
 
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 760de9a..eb391be 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -255,16 +255,17 @@
   return down_cast<Class*>(this);
 }
 
-template<VerifyObjectFlags kVerifyFlags>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Object::IsObjectArray() {
   constexpr auto kNewFlags = static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis);
-  return IsArrayInstance<kVerifyFlags>() &&
-      !GetClass<kNewFlags>()->template GetComponentType<kNewFlags>()->IsPrimitive();
+  return IsArrayInstance<kVerifyFlags, kReadBarrierOption>() &&
+      !GetClass<kNewFlags, kReadBarrierOption>()->
+          template GetComponentType<kNewFlags, kReadBarrierOption>()->IsPrimitive();
 }
 
-template<class T, VerifyObjectFlags kVerifyFlags>
+template<class T, VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline ObjectArray<T>* Object::AsObjectArray() {
-  DCHECK(IsObjectArray<kVerifyFlags>());
+  DCHECK((IsObjectArray<kVerifyFlags, kReadBarrierOption>()));
   return down_cast<ObjectArray<T>*>(this);
 }
 
@@ -274,14 +275,14 @@
       template IsArrayClass<kVerifyFlags, kReadBarrierOption>();
 }
 
-template<VerifyObjectFlags kVerifyFlags>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Object::IsReferenceInstance() {
-  return GetClass<kVerifyFlags>()->IsTypeOfReferenceClass();
+  return GetClass<kVerifyFlags, kReadBarrierOption>()->IsTypeOfReferenceClass();
 }
 
-template<VerifyObjectFlags kVerifyFlags>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline Reference* Object::AsReference() {
-  DCHECK(IsReferenceInstance<kVerifyFlags>());
+  DCHECK((IsReferenceInstance<kVerifyFlags, kReadBarrierOption>()));
   return down_cast<Reference*>(this);
 }
 
@@ -341,29 +342,31 @@
   return down_cast<ShortArray*>(this);
 }
 
-template<VerifyObjectFlags kVerifyFlags>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Object::IsIntArray() {
   constexpr auto kNewFlags = static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis);
-  auto* component_type = GetClass<kVerifyFlags>()->GetComponentType();
+  mirror::Class* klass = GetClass<kVerifyFlags, kReadBarrierOption>();
+  mirror::Class* component_type = klass->GetComponentType<kVerifyFlags, kReadBarrierOption>();
   return component_type != nullptr && component_type->template IsPrimitiveInt<kNewFlags>();
 }
 
-template<VerifyObjectFlags kVerifyFlags>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline IntArray* Object::AsIntArray() {
-  DCHECK(IsIntArray<kVerifyFlags>());
+  DCHECK((IsIntArray<kVerifyFlags, kReadBarrierOption>()));
   return down_cast<IntArray*>(this);
 }
 
-template<VerifyObjectFlags kVerifyFlags>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Object::IsLongArray() {
   constexpr auto kNewFlags = static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis);
-  auto* component_type = GetClass<kVerifyFlags>()->GetComponentType();
+  mirror::Class* klass = GetClass<kVerifyFlags, kReadBarrierOption>();
+  mirror::Class* component_type = klass->GetComponentType<kVerifyFlags, kReadBarrierOption>();
   return component_type != nullptr && component_type->template IsPrimitiveLong<kNewFlags>();
 }
 
-template<VerifyObjectFlags kVerifyFlags>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline LongArray* Object::AsLongArray() {
-  DCHECK(IsLongArray<kVerifyFlags>());
+  DCHECK((IsLongArray<kVerifyFlags, kReadBarrierOption>()));
   return down_cast<LongArray*>(this);
 }
 
@@ -1063,7 +1066,7 @@
       // Presumably GC can happen when we are cross compiling, it should not cause performance
       // problems to do pointer size logic.
       MemberOffset field_offset = kIsStatic
-          ? klass->GetFirstReferenceStaticFieldOffset(
+          ? klass->GetFirstReferenceStaticFieldOffset<kVerifyFlags, kReadBarrierOption>(
               Runtime::Current()->GetClassLinker()->GetImagePointerSize())
           : klass->GetFirstReferenceInstanceFieldOffset();
       for (size_t i = 0u; i < num_reference_fields; ++i) {
@@ -1123,26 +1126,26 @@
   visitor(this, ClassOffset(), false);
   const uint32_t class_flags = klass->GetClassFlags<kVerifyNone>();
   if (LIKELY(class_flags == kClassFlagNormal)) {
-    DCHECK(!klass->IsVariableSize());
-    VisitInstanceFieldsReferences(klass, visitor);
+    DCHECK((!klass->IsVariableSize<kVerifyFlags, kReadBarrierOption>()));
+    VisitInstanceFieldsReferences<kVerifyFlags, kReadBarrierOption>(klass, visitor);
     DCHECK((!klass->IsClassClass<kVerifyFlags, kReadBarrierOption>()));
     DCHECK(!klass->IsStringClass());
     DCHECK(!klass->IsClassLoaderClass());
-    DCHECK(!klass->IsArrayClass());
+    DCHECK((!klass->IsArrayClass<kVerifyFlags, kReadBarrierOption>()));
   } else {
     if ((class_flags & kClassFlagNoReferenceFields) == 0) {
       DCHECK(!klass->IsStringClass());
       if (class_flags == kClassFlagClass) {
-        DCHECK(klass->IsClassClass());
-        AsClass<kVerifyNone>()->VisitReferences<kVisitNativeRoots,
-                                                kVerifyFlags,
-                                                kReadBarrierOption>(klass, visitor);
+        DCHECK((klass->IsClassClass<kVerifyFlags, kReadBarrierOption>()));
+        mirror::Class* as_klass = AsClass<kVerifyNone, kReadBarrierOption>();
+        as_klass->VisitReferences<kVisitNativeRoots, kVerifyFlags, kReadBarrierOption>(klass,
+                                                                                       visitor);
       } else if (class_flags == kClassFlagObjectArray) {
         DCHECK((klass->IsObjectArrayClass<kVerifyFlags, kReadBarrierOption>()));
-        AsObjectArray<mirror::Object, kVerifyNone>()->VisitReferences(visitor);
+        AsObjectArray<mirror::Object, kVerifyNone, kReadBarrierOption>()->VisitReferences(visitor);
       } else if ((class_flags & kClassFlagReference) != 0) {
-        VisitInstanceFieldsReferences(klass, visitor);
-        ref_visitor(klass, AsReference());
+        VisitInstanceFieldsReferences<kVerifyFlags, kReadBarrierOption>(klass, visitor);
+        ref_visitor(klass, AsReference<kVerifyFlags, kReadBarrierOption>());
       } else if (class_flags == kClassFlagDexCache) {
         mirror::DexCache* const dex_cache = AsDexCache<kVerifyFlags, kReadBarrierOption>();
         dex_cache->VisitReferences<kVisitNativeRoots,
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index d635002..3f739df 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -159,9 +159,12 @@
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   Class* AsClass() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsObjectArray() SHARED_REQUIRES(Locks::mutator_lock_);
-  template<class T, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<class T,
+           VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ObjectArray<T>* AsObjectArray() SHARED_REQUIRES(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
@@ -199,14 +202,18 @@
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   ShortArray* AsShortSizedArray() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsIntArray() SHARED_REQUIRES(Locks::mutator_lock_);
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   IntArray* AsIntArray() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsLongArray() SHARED_REQUIRES(Locks::mutator_lock_);
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   LongArray* AsLongArray() SHARED_REQUIRES(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
@@ -230,9 +237,11 @@
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   Throwable* AsThrowable() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsReferenceInstance() SHARED_REQUIRES(Locks::mutator_lock_);
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   Reference* AsReference() SHARED_REQUIRES(Locks::mutator_lock_);
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   bool IsWeakReferenceInstance() SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/mirror/reference-inl.h b/runtime/mirror/reference-inl.h
index bd4a9c1..12bfe38 100644
--- a/runtime/mirror/reference-inl.h
+++ b/runtime/mirror/reference-inl.h
@@ -27,14 +27,6 @@
   return Class::ComputeClassSize(false, vtable_entries, 2, 0, 0, 0, 0, pointer_size);
 }
 
-inline bool Reference::IsEnqueuable() {
-  // Not using volatile reads as an optimization since this is only called with all the mutators
-  // suspended.
-  const Object* queue = GetFieldObject<mirror::Object>(QueueOffset());
-  const Object* queue_next = GetFieldObject<mirror::Object>(QueueNextOffset());
-  return queue != nullptr && queue_next == nullptr;
-}
-
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/reference.h b/runtime/mirror/reference.h
index 5e467ab..3baa12e 100644
--- a/runtime/mirror/reference.h
+++ b/runtime/mirror/reference.h
@@ -75,9 +75,7 @@
   void ClearReferent() SHARED_REQUIRES(Locks::mutator_lock_) {
     SetFieldObjectVolatile<kTransactionActive>(ReferentOffset(), nullptr);
   }
-  // Volatile read/write is not necessary since the java pending next is only accessed from
-  // the java threads for cleared references. Once these cleared references have a null referent,
-  // we never end up reading their pending next from the GC again.
+
   Reference* GetPendingNext() SHARED_REQUIRES(Locks::mutator_lock_) {
     return GetFieldObject<Reference>(PendingNextOffset());
   }
@@ -91,14 +89,22 @@
     }
   }
 
-  bool IsEnqueued() SHARED_REQUIRES(Locks::mutator_lock_) {
-    // Since the references are stored as cyclic lists it means that once enqueued, the pending
-    // next is always non-null.
-    return GetPendingNext() != nullptr;
+  // Returns true if the reference's pendingNext is null, indicating it is
+  // okay to process this reference.
+  //
+  // If pendingNext is not null, then one of the following cases holds:
+  // 1. The reference has already been enqueued to a java ReferenceQueue. In
+  // this case the referent should not be considered for reference processing
+  // ever again.
+  // 2. The reference is currently part of a list of references that may
+  // shortly be enqueued on a java ReferenceQueue. In this case the reference
+  // should not be processed again until and unless the reference has been
+  // removed from the list after having determined the reference is not ready
+  // to be enqueued on a java ReferenceQueue.
+  bool IsUnprocessed() SHARED_REQUIRES(Locks::mutator_lock_) {
+    return GetPendingNext() == nullptr;
   }
 
-  bool IsEnqueuable() SHARED_REQUIRES(Locks::mutator_lock_);
-
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   static Class* GetJavaLangRefReference() SHARED_REQUIRES(Locks::mutator_lock_) {
     DCHECK(!java_lang_ref_Reference_.IsNull());
@@ -115,9 +121,9 @@
   }
 
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
-  HeapReference<Reference> pending_next_;  // Note this is Java volatile:
-  HeapReference<Object> queue_;  // Note this is Java volatile:
-  HeapReference<Reference> queue_next_;  // Note this is Java volatile:
+  HeapReference<Reference> pending_next_;
+  HeapReference<Object> queue_;
+  HeapReference<Reference> queue_next_;
   HeapReference<Object> referent_;  // Note this is Java volatile:
 
   static GcRoot<Class> java_lang_ref_Reference_;
diff --git a/runtime/modifiers.h b/runtime/modifiers.h
index 9946eab..ed4c5fc 100644
--- a/runtime/modifiers.h
+++ b/runtime/modifiers.h
@@ -42,14 +42,16 @@
 
 static constexpr uint32_t kAccJavaFlagsMask = 0xffff;  // bits set from Java sources (low 16)
 
-static constexpr uint32_t kAccConstructor =          0x00010000;  // method (dex only) <(cl)init>
-static constexpr uint32_t kAccDeclaredSynchronized = 0x00020000;  // method (dex only)
-static constexpr uint32_t kAccClassIsProxy =         0x00040000;  // class  (dex only)
-static constexpr uint32_t kAccPreverified =          0x00080000;  // class (runtime),
-                                                                  // method (dex only)
-static constexpr uint32_t kAccFastNative =           0x00080000;  // method (dex only)
-static constexpr uint32_t kAccMiranda =              0x00200000;  // method (dex only)
-static constexpr uint32_t kAccDefault =              0x00400000;  // method (runtime)
+static constexpr uint32_t kAccConstructor =           0x00010000;  // method (dex only) <(cl)init>
+static constexpr uint32_t kAccDeclaredSynchronized =  0x00020000;  // method (dex only)
+static constexpr uint32_t kAccClassIsProxy =          0x00040000;  // class  (dex only)
+// Used by a method to denote that its execution does not need to go through slow path interpreter.
+static constexpr uint32_t kAccSkipAccessChecks =      0x00080000;  // method (dex only)
+// Used by a class to denote that the verifier has attempted to check it at least once.
+static constexpr uint32_t kAccVerificationAttempted = 0x00080000;  // class (runtime)
+static constexpr uint32_t kAccFastNative =            0x00080000;  // method (dex only)
+static constexpr uint32_t kAccMiranda =               0x00200000;  // method (dex only)
+static constexpr uint32_t kAccDefault =               0x00400000;  // method (runtime)
 // This is set by the class linker during LinkInterfaceMethods. Prior to that point we do not know
 // if any particular method needs to be a default conflict. Used to figure out at runtime if
 // invoking this method will throw an exception.
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index 0ddd4a2..a80585a 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -469,14 +469,21 @@
   return soa.AddLocalReference<jobjectArray>(ret.Get());
 }
 
-static jobject Class_getDeclaredAnnotation(JNIEnv* env, jobject javaThis, jclass annotationType) {
+static jobject Class_getDeclaredAnnotation(JNIEnv* env, jobject javaThis, jclass annotationClass) {
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<2> hs(soa.Self());
   Handle<mirror::Class> klass(hs.NewHandle(DecodeClass(soa, javaThis)));
+
+  // Handle public contract to throw NPE if the "annotationClass" argument was null.
+  if (UNLIKELY(annotationClass == nullptr)) {
+    ThrowNullPointerException("annotationClass");
+    return nullptr;
+  }
+
   if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
     return nullptr;
   }
-  Handle<mirror::Class> annotation_class(hs.NewHandle(soa.Decode<mirror::Class*>(annotationType)));
+  Handle<mirror::Class> annotation_class(hs.NewHandle(soa.Decode<mirror::Class*>(annotationClass)));
   return soa.AddLocalReference<jobject>(
       klass->GetDexFile().GetAnnotationForClass(klass, annotation_class));
 }
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index de90f0a..e76e443 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -24,6 +24,7 @@
 #include "base/stl_util.h"
 #include "class_linker.h"
 #include "dex_file-inl.h"
+#include "gc/scoped_gc_critical_section.h"
 #include "gc/space/image_space.h"
 #include "handle_scope-inl.h"
 #include "mirror/class_loader.h"
@@ -379,6 +380,9 @@
           // spaces array.
           {
             ScopedThreadSuspension sts(self, kSuspended);
+            gc::ScopedGCCriticalSection gcs(self,
+                                            gc::kGcCauseAddRemoveAppImageSpace,
+                                            gc::kCollectorTypeAddRemoveAppImageSpace);
             ScopedSuspendAll ssa("Add image space");
             runtime->GetHeap()->AddSpace(image_space.get());
           }
@@ -393,6 +397,9 @@
             dex_files.clear();
             {
               ScopedThreadSuspension sts(self, kSuspended);
+              gc::ScopedGCCriticalSection gcs(self,
+                                              gc::kGcCauseAddRemoveAppImageSpace,
+                                              gc::kCollectorTypeAddRemoveAppImageSpace);
               ScopedSuspendAll ssa("Remove image space");
               runtime->GetHeap()->RemoveSpace(image_space.get());
             }
diff --git a/runtime/oat_quick_method_header.h b/runtime/oat_quick_method_header.h
index 5643739..2b7eca2 100644
--- a/runtime/oat_quick_method_header.h
+++ b/runtime/oat_quick_method_header.h
@@ -108,7 +108,7 @@
   }
 
   template <bool kCheckFrameSize = true>
-  uint32_t GetFrameSizeInBytes() {
+  uint32_t GetFrameSizeInBytes() const {
     uint32_t result = frame_info_.FrameSizeInBytes();
     if (kCheckFrameSize) {
       DCHECK_LE(static_cast<size_t>(kStackAlignment), result);
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index aa64ee3..2ea4b14 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -277,6 +277,8 @@
           .WithType<ExperimentalFlags>()
           .AppendValues()
           .IntoKey(M::Experimental)
+      .Define("-Xforce-nb-testing")
+          .IntoKey(M::ForceNativeBridge)
       .Ignore({
           "-ea", "-da", "-enableassertions", "-disableassertions", "--runtime-arg", "-esa",
           "-dsa", "-enablesystemassertions", "-disablesystemassertions", "-Xrs", "-Xint:_",
diff --git a/runtime/quick/inline_method_analyser.cc b/runtime/quick/inline_method_analyser.cc
index 17306c9..6b84c8f 100644
--- a/runtime/quick/inline_method_analyser.cc
+++ b/runtime/quick/inline_method_analyser.cc
@@ -108,7 +108,7 @@
 
   switch (opcode) {
     case Instruction::RETURN_VOID:
-      if (method != nullptr) {
+      if (result != nullptr) {
         result->opcode = kInlineOpNop;
         result->flags = kInlineSpecial;
         result->d.data = 0u;
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index 19cf759..0c3eb3b 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -32,50 +32,61 @@
 inline MirrorType* ReadBarrier::Barrier(
     mirror::Object* obj, MemberOffset offset, mirror::HeapReference<MirrorType>* ref_addr) {
   constexpr bool with_read_barrier = kReadBarrierOption == kWithReadBarrier;
-  if (with_read_barrier && kUseBakerReadBarrier) {
-    // The higher bits of the rb_ptr, rb_ptr_high_bits (must be zero)
-    // is used to create artificial data dependency from the is_gray
-    // load to the ref field (ptr) load to avoid needing a load-load
-    // barrier between the two.
-    uintptr_t rb_ptr_high_bits;
-    bool is_gray = HasGrayReadBarrierPointer(obj, &rb_ptr_high_bits);
-    ref_addr = reinterpret_cast<mirror::HeapReference<MirrorType>*>(
-        rb_ptr_high_bits | reinterpret_cast<uintptr_t>(ref_addr));
-    MirrorType* ref = ref_addr->AsMirrorPtr();
-    MirrorType* old_ref = ref;
-    if (is_gray) {
-      // Slow-path.
-      ref = reinterpret_cast<MirrorType*>(Mark(ref));
-      // If kAlwaysUpdateField is true, update the field atomically. This may fail if mutator
-      // updates before us, but it's ok.
-      if (kAlwaysUpdateField && ref != old_ref) {
-        obj->CasFieldStrongRelaxedObjectWithoutWriteBarrier<false, false>(
-            offset, old_ref, ref);
+  if (kUseReadBarrier && with_read_barrier) {
+    if (kIsDebugBuild) {
+      Thread* const self = Thread::Current();
+      if (self != nullptr) {
+        CHECK_EQ(self->GetDebugDisallowReadBarrierCount(), 0u);
       }
     }
-    if (kEnableReadBarrierInvariantChecks) {
-      CHECK_EQ(rb_ptr_high_bits, 0U) << obj << " rb_ptr=" << obj->GetReadBarrierPointer();
-    }
-    AssertToSpaceInvariant(obj, offset, ref);
-    return ref;
-  } else if (with_read_barrier && kUseBrooksReadBarrier) {
-    // To be implemented.
-    return ref_addr->AsMirrorPtr();
-  } else if (with_read_barrier && kUseTableLookupReadBarrier) {
-    MirrorType* ref = ref_addr->AsMirrorPtr();
-    MirrorType* old_ref = ref;
-    // The heap or the collector can be null at startup. TODO: avoid the need for this null check.
-    gc::Heap* heap = Runtime::Current()->GetHeap();
-    if (heap != nullptr && heap->GetReadBarrierTable()->IsSet(old_ref)) {
-      ref = reinterpret_cast<MirrorType*>(Mark(old_ref));
-      // Update the field atomically. This may fail if mutator updates before us, but it's ok.
-      if (ref != old_ref) {
-        obj->CasFieldStrongRelaxedObjectWithoutWriteBarrier<false, false>(
-            offset, old_ref, ref);
+    if (kUseBakerReadBarrier) {
+      // The higher bits of the rb_ptr, rb_ptr_high_bits (must be zero)
+      // is used to create artificial data dependency from the is_gray
+      // load to the ref field (ptr) load to avoid needing a load-load
+      // barrier between the two.
+      uintptr_t rb_ptr_high_bits;
+      bool is_gray = HasGrayReadBarrierPointer(obj, &rb_ptr_high_bits);
+      ref_addr = reinterpret_cast<mirror::HeapReference<MirrorType>*>(
+          rb_ptr_high_bits | reinterpret_cast<uintptr_t>(ref_addr));
+      MirrorType* ref = ref_addr->AsMirrorPtr();
+      MirrorType* old_ref = ref;
+      if (is_gray) {
+        // Slow-path.
+        ref = reinterpret_cast<MirrorType*>(Mark(ref));
+        // If kAlwaysUpdateField is true, update the field atomically. This may fail if mutator
+        // updates before us, but it's ok.
+        if (kAlwaysUpdateField && ref != old_ref) {
+          obj->CasFieldStrongRelaxedObjectWithoutWriteBarrier<false, false>(
+              offset, old_ref, ref);
+        }
       }
+      if (kEnableReadBarrierInvariantChecks) {
+        CHECK_EQ(rb_ptr_high_bits, 0U) << obj << " rb_ptr=" << obj->GetReadBarrierPointer();
+      }
+      AssertToSpaceInvariant(obj, offset, ref);
+      return ref;
+    } else if (kUseBrooksReadBarrier) {
+      // To be implemented.
+      return ref_addr->AsMirrorPtr();
+    } else if (kUseTableLookupReadBarrier) {
+      MirrorType* ref = ref_addr->AsMirrorPtr();
+      MirrorType* old_ref = ref;
+      // The heap or the collector can be null at startup. TODO: avoid the need for this null check.
+      gc::Heap* heap = Runtime::Current()->GetHeap();
+      if (heap != nullptr && heap->GetReadBarrierTable()->IsSet(old_ref)) {
+        ref = reinterpret_cast<MirrorType*>(Mark(old_ref));
+        // Update the field atomically. This may fail if mutator updates before us, but it's ok.
+        if (ref != old_ref) {
+          obj->CasFieldStrongRelaxedObjectWithoutWriteBarrier<false, false>(
+              offset, old_ref, ref);
+        }
+      }
+      AssertToSpaceInvariant(obj, offset, ref);
+      return ref;
+    } else {
+      LOG(FATAL) << "Unexpected read barrier type";
+      UNREACHABLE();
     }
-    AssertToSpaceInvariant(obj, offset, ref);
-    return ref;
   } else {
     // No read barrier.
     return ref_addr->AsMirrorPtr();
@@ -87,32 +98,43 @@
                                                GcRootSource* gc_root_source) {
   MirrorType* ref = *root;
   const bool with_read_barrier = kReadBarrierOption == kWithReadBarrier;
-  if (with_read_barrier && kUseBakerReadBarrier) {
-    // TODO: separate the read barrier code from the collector code more.
-    Thread* self = Thread::Current();
-    if (self != nullptr && self->GetIsGcMarking()) {
-      ref = reinterpret_cast<MirrorType*>(Mark(ref));
-    }
-    AssertToSpaceInvariant(gc_root_source, ref);
-    return ref;
-  } else if (with_read_barrier && kUseBrooksReadBarrier) {
-    // To be implemented.
-    return ref;
-  } else if (with_read_barrier && kUseTableLookupReadBarrier) {
-    Thread* self = Thread::Current();
-    if (self != nullptr &&
-        self->GetIsGcMarking() &&
-        Runtime::Current()->GetHeap()->GetReadBarrierTable()->IsSet(ref)) {
-      MirrorType* old_ref = ref;
-      ref = reinterpret_cast<MirrorType*>(Mark(old_ref));
-      // Update the field atomically. This may fail if mutator updates before us, but it's ok.
-      if (ref != old_ref) {
-        Atomic<mirror::Object*>* atomic_root = reinterpret_cast<Atomic<mirror::Object*>*>(root);
-        atomic_root->CompareExchangeStrongRelaxed(old_ref, ref);
+  if (kUseReadBarrier && with_read_barrier) {
+    if (kIsDebugBuild) {
+      Thread* const self = Thread::Current();
+      if (self != nullptr) {
+        CHECK_EQ(self->GetDebugDisallowReadBarrierCount(), 0u);
       }
     }
-    AssertToSpaceInvariant(gc_root_source, ref);
-    return ref;
+    if (kUseBakerReadBarrier) {
+      // TODO: separate the read barrier code from the collector code more.
+      Thread* self = Thread::Current();
+      if (self != nullptr && self->GetIsGcMarking()) {
+        ref = reinterpret_cast<MirrorType*>(Mark(ref));
+      }
+      AssertToSpaceInvariant(gc_root_source, ref);
+      return ref;
+    } else if (kUseBrooksReadBarrier) {
+      // To be implemented.
+      return ref;
+    } else if (kUseTableLookupReadBarrier) {
+      Thread* self = Thread::Current();
+      if (self != nullptr &&
+          self->GetIsGcMarking() &&
+          Runtime::Current()->GetHeap()->GetReadBarrierTable()->IsSet(ref)) {
+        MirrorType* old_ref = ref;
+        ref = reinterpret_cast<MirrorType*>(Mark(old_ref));
+        // Update the field atomically. This may fail if mutator updates before us, but it's ok.
+        if (ref != old_ref) {
+          Atomic<mirror::Object*>* atomic_root = reinterpret_cast<Atomic<mirror::Object*>*>(root);
+          atomic_root->CompareExchangeStrongRelaxed(old_ref, ref);
+        }
+      }
+      AssertToSpaceInvariant(gc_root_source, ref);
+      return ref;
+    } else {
+      LOG(FATAL) << "Unexpected read barrier type";
+      UNREACHABLE();
+    }
   } else {
     return ref;
   }
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 0c06ca6..1b59c6f 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -602,9 +602,12 @@
     if (is_native_bridge_loaded_) {
       PreInitializeNativeBridge(".");
     }
+    NativeBridgeAction action = force_native_bridge_
+        ? NativeBridgeAction::kInitialize
+        : NativeBridgeAction::kUnload;
     InitNonZygoteOrPostFork(self->GetJniEnv(),
                             /* is_system_server */ false,
-                            NativeBridgeAction::kInitialize,
+                            action,
                             GetInstructionSetString(kRuntimeISA));
   }
 
@@ -939,6 +942,7 @@
   allow_dex_file_fallback_ = !runtime_options.Exists(Opt::NoDexFileFallback);
 
   no_sig_chain_ = runtime_options.Exists(Opt::NoSigChain);
+  force_native_bridge_ = runtime_options.Exists(Opt::ForceNativeBridge);
 
   Split(runtime_options.GetOrDefault(Opt::CpuAbiList), ',', &cpu_abilist_);
 
@@ -1883,7 +1887,8 @@
   if (jit_.get() != nullptr) {
     compiler_callbacks_ = jit_->GetCompilerCallbacks();
     jit_->CreateInstrumentationCache(jit_options_->GetCompileThreshold(),
-                                     jit_options_->GetWarmupThreshold());
+                                     jit_options_->GetWarmupThreshold(),
+                                     jit_options_->GetOsrThreshold());
     jit_->CreateThreadPool();
 
     // Notify native debugger about the classes already loaded before the creation of the jit.
@@ -1914,7 +1919,8 @@
 }
 
 bool Runtime::IsVerificationEnabled() const {
-  return verify_ == verifier::VerifyMode::kEnable;
+  return verify_ == verifier::VerifyMode::kEnable ||
+      verify_ == verifier::VerifyMode::kSoftFail;
 }
 
 bool Runtime::IsVerificationSoftFail() const {
diff --git a/runtime/runtime.h b/runtime/runtime.h
index c8c2ee5..bec26f8 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -774,6 +774,9 @@
   // building a statically link version of dex2oat.
   bool no_sig_chain_;
 
+  // Force the use of native bridge even if the app ISA matches the runtime ISA.
+  bool force_native_bridge_;
+
   // Whether or not a native bridge has been loaded.
   //
   // The native bridge allows running native code compiled for a foreign ISA. The way it works is,
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 308f3ba..097bccb 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -92,6 +92,7 @@
 
 RUNTIME_OPTIONS_KEY (Unit,                DisableExplicitGC)
 RUNTIME_OPTIONS_KEY (Unit,                NoSigChain)
+RUNTIME_OPTIONS_KEY (Unit,                ForceNativeBridge)
 RUNTIME_OPTIONS_KEY (LogVerbosity,        Verbose)
 RUNTIME_OPTIONS_KEY (unsigned int,        LockProfThreshold)
 RUNTIME_OPTIONS_KEY (std::string,         StackTraceFile)
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index 84185ce..97eb805 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -1195,6 +1195,35 @@
     return StackMap();
   }
 
+  StackMap GetOsrStackMapForDexPc(uint32_t dex_pc, const StackMapEncoding& encoding) const {
+    size_t e = GetNumberOfStackMaps();
+    if (e == 0) {
+      // There cannot be OSR stack map if there is no stack map.
+      return StackMap();
+    }
+    // Walk over all stack maps. If two consecutive stack maps are identical, then we
+    // have found a stack map suitable for OSR.
+    for (size_t i = 0; i < e - 1; ++i) {
+      StackMap stack_map = GetStackMapAt(i, encoding);
+      if (stack_map.GetDexPc(encoding) == dex_pc) {
+        StackMap other = GetStackMapAt(i + 1, encoding);
+        if (other.GetDexPc(encoding) == dex_pc &&
+            other.GetNativePcOffset(encoding) == stack_map.GetNativePcOffset(encoding)) {
+          DCHECK_EQ(other.GetDexRegisterMapOffset(encoding),
+                    stack_map.GetDexRegisterMapOffset(encoding));
+          DCHECK(!stack_map.HasInlineInfo(encoding));
+          if (i < e - 2) {
+            // Make sure there are not three identical stack maps following each other.
+            DCHECK_NE(stack_map.GetNativePcOffset(encoding),
+                      GetStackMapAt(i + 2, encoding).GetNativePcOffset(encoding));
+          }
+          return stack_map;
+        }
+      }
+    }
+    return StackMap();
+  }
+
   StackMap GetStackMapForNativePcOffset(uint32_t native_pc_offset,
                                         const StackMapEncoding& encoding) const {
     // TODO: Safepoint stack maps are sorted by native_pc_offset but catch stack
diff --git a/runtime/thread.cc b/runtime/thread.cc
index e47fdbc..7a45594 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -2704,7 +2704,7 @@
   // Visiting the declaring class is necessary so that we don't unload the class of a method that
   // is executing. We need to ensure that the code stays mapped.
   void VisitDeclaringClass(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_) {
-    mirror::Class* klass = method->GetDeclaringClassNoBarrier();
+    mirror::Class* klass = method->GetDeclaringClassUnchecked<kWithoutReadBarrier>();
     // klass can be null for runtime methods.
     if (klass != nullptr) {
       mirror::Object* new_ref = klass;
@@ -3012,4 +3012,25 @@
   return count;
 }
 
+
+void Thread::DeoptimizeWithDeoptimizationException(JValue* result) {
+  DCHECK_EQ(GetException(), Thread::GetDeoptimizationException());
+  ClearException();
+  ShadowFrame* shadow_frame =
+      PopStackedShadowFrame(StackedShadowFrameType::kDeoptimizationShadowFrame);
+  mirror::Throwable* pending_exception = nullptr;
+  bool from_code = false;
+  PopDeoptimizationContext(result, &pending_exception, &from_code);
+  CHECK(!from_code) << "Deoptimizing from code should be done with single frame deoptimization";
+  SetTopOfStack(nullptr);
+  SetTopOfShadowStack(shadow_frame);
+
+  // Restore the exception that was pending before deoptimization then interpret the
+  // deoptimized frames.
+  if (pending_exception != nullptr) {
+    SetException(pending_exception);
+  }
+  interpreter::EnterInterpreterFromDeoptimize(this, shadow_frame, from_code, result);
+}
+
 }  // namespace art
diff --git a/runtime/thread.h b/runtime/thread.h
index 96919a4..3a5d72e 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -553,6 +553,9 @@
         OFFSETOF_MEMBER(tls_32bit_sized_values, is_gc_marking));
   }
 
+  // Deoptimize the Java stack.
+  void DeoptimizeWithDeoptimizationException(JValue* result) SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   template<size_t pointer_size>
   static ThreadOffset<pointer_size> ThreadOffsetFromTlsPtr(size_t tls_ptr_offset) {
@@ -1068,6 +1071,14 @@
 
   void InitStringEntryPoints();
 
+  void ModifyDebugDisallowReadBarrier(int8_t delta) {
+    debug_disallow_read_barrier_ += delta;
+  }
+
+  uint8_t GetDebugDisallowReadBarrierCount() const {
+    return debug_disallow_read_barrier_;
+  }
+
  private:
   explicit Thread(bool daemon);
   ~Thread() REQUIRES(!Locks::mutator_lock_, !Locks::thread_suspend_count_lock_);
@@ -1447,6 +1458,9 @@
   // Thread "interrupted" status; stays raised until queried or thrown.
   bool interrupted_ GUARDED_BY(wait_mutex_);
 
+  // Debug disable read barrier count, only is checked for debug builds and only in the runtime.
+  uint8_t debug_disallow_read_barrier_ = 0;
+
   friend class Dbg;  // For SetStateUnsafe.
   friend class gc::collector::SemiSpace;  // For getting stack traces.
   friend class Runtime;  // For CreatePeer.
@@ -1494,6 +1508,20 @@
   DISALLOW_COPY_AND_ASSIGN(ScopedStackedShadowFramePusher);
 };
 
+// Only works for debug builds.
+class ScopedDebugDisallowReadBarriers {
+ public:
+  explicit ScopedDebugDisallowReadBarriers(Thread* self) : self_(self) {
+    self_->ModifyDebugDisallowReadBarrier(1);
+  }
+  ~ScopedDebugDisallowReadBarriers() {
+    self_->ModifyDebugDisallowReadBarrier(-1);
+  }
+
+ private:
+  Thread* const self_;
+};
+
 std::ostream& operator<<(std::ostream& os, const Thread& thread);
 std::ostream& operator<<(std::ostream& os, const StackedShadowFrameType& thread);
 
diff --git a/runtime/utils/dex_cache_arrays_layout-inl.h b/runtime/utils/dex_cache_arrays_layout-inl.h
index f6ee6a2..6922564 100644
--- a/runtime/utils/dex_cache_arrays_layout-inl.h
+++ b/runtime/utils/dex_cache_arrays_layout-inl.h
@@ -31,14 +31,14 @@
                                                   const DexFile::Header& header)
     : pointer_size_(pointer_size),
       /* types_offset_ is always 0u, so it's constexpr */
-      methods_offset_(types_offset_ +
-                      RoundUp(TypesSize(header.type_ids_size_), MethodsAlignment())),
-      strings_offset_(methods_offset_ +
-                      RoundUp(MethodsSize(header.method_ids_size_), StringsAlignment())),
-      fields_offset_(strings_offset_ +
-                     RoundUp(StringsSize(header.string_ids_size_), FieldsAlignment())),
-      size_(fields_offset_ +
-            RoundUp(FieldsSize(header.field_ids_size_), Alignment())) {
+      methods_offset_(
+          RoundUp(types_offset_ + TypesSize(header.type_ids_size_), MethodsAlignment())),
+      strings_offset_(
+          RoundUp(methods_offset_ + MethodsSize(header.method_ids_size_), StringsAlignment())),
+      fields_offset_(
+          RoundUp(strings_offset_ + StringsSize(header.string_ids_size_), FieldsAlignment())),
+      size_(
+          RoundUp(fields_offset_ + FieldsSize(header.field_ids_size_), Alignment())) {
   DCHECK(ValidPointerSize(pointer_size)) << pointer_size;
 }
 
diff --git a/test/048-reflect-v8/expected.txt b/test/048-reflect-v8/expected.txt
index 2d0b4cc..3109ecc 100644
--- a/test/048-reflect-v8/expected.txt
+++ b/test/048-reflect-v8/expected.txt
@@ -1,4 +1,95 @@
-Main$DefaultInterface is default = yes
-Main$RegularInterface is default = no
-Main$ImplementsWithDefault is default = yes
-Main$ImplementsWithRegular is default = no
+==============================
+Are These Methods Default:
+==============================
+IsDefaultTest$DefaultInterface is default = yes
+IsDefaultTest$RegularInterface is default = no
+IsDefaultTest$ImplementsWithDefault is default = yes
+IsDefaultTest$ImplementsWithRegular is default = no
+==============================
+Class annotations by type:
+==============================
+Annotations by type, defined by class SingleUser with annotation Calendar: @Calendar(dayOfMonth=unspecified_month, dayOfWeek=single, hour=23)
+Annotations by type, defined by class SingleUser with annotation Calendars: <empty>
+Annotations by type, defined by class User with annotation Calendar: @Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)
+Annotations by type, defined by class User with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)])
+Annotations by type, defined by class User2 with annotation Calendar: @Calendar(dayOfMonth=z, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=x, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=y, dayOfWeek=unspecified_week, hour=6)
+Annotations by type, defined by class User2 with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=z, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=x, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=y, dayOfWeek=unspecified_week, hour=6)])
+Annotations by type, defined by class UserComplex with annotation Calendar: @Calendar(dayOfMonth=afirst, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=zsecond, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=athird, dayOfWeek=unspecified_week, hour=23)
+Annotations by type, defined by class UserComplex with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=zsecond, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=athird, dayOfWeek=unspecified_week, hour=23)])
+Annotations by type, defined by class UserSub with annotation Calendar: @Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)
+Annotations by type, defined by class UserSub with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)])
+Annotations by type, defined by class UserSub2 with annotation Calendar: @Calendar(dayOfMonth=sub2, dayOfWeek=unspecified_week, hour=6)
+Annotations by type, defined by class UserSub2 with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)])
+-----------------------------
+-----------------------------
+==============================
+Class declared annotation:
+==============================
+Declared annotations by class class SingleUser, annotation interface Calendar: @Calendar(dayOfMonth=unspecified_month, dayOfWeek=single, hour=23)
+Declared annotations by class class SingleUser, annotation interface Calendars: <null>
+Declared annotations by class class User, annotation interface Calendar: <null>
+Declared annotations by class class User, annotation interface Calendars: @Calendars(value=[@Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)])
+Declared annotations by class class UserComplex, annotation interface Calendar: @Calendar(dayOfMonth=afirst, dayOfWeek=unspecified_week, hour=6)
+Declared annotations by class class UserComplex, annotation interface Calendars: @Calendars(value=[@Calendar(dayOfMonth=zsecond, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=athird, dayOfWeek=unspecified_week, hour=23)])
+Declared annotations by class class UserSub, annotation interface Calendar: <null>
+Declared annotations by class class UserSub, annotation interface Calendars: <null>
+Declared annotations by class class UserSub2, annotation interface Calendar: @Calendar(dayOfMonth=sub2, dayOfWeek=unspecified_week, hour=6)
+Declared annotations by class class UserSub2, annotation interface Calendars: <null>
+-----------------------------
+-----------------------------
+==============================
+Declared class annotations by type:
+==============================
+Declared annnotations by type, defined by class SingleUser with annotation Calendar: @Calendar(dayOfMonth=unspecified_month, dayOfWeek=single, hour=23)
+Declared annnotations by type, defined by class SingleUser with annotation Calendars: <empty>
+Declared annnotations by type, defined by class User with annotation Calendar: @Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)
+Declared annnotations by type, defined by class User with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)])
+Declared annnotations by type, defined by class User2 with annotation Calendar: @Calendar(dayOfMonth=z, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=x, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=y, dayOfWeek=unspecified_week, hour=6)
+Declared annnotations by type, defined by class User2 with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=z, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=x, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=y, dayOfWeek=unspecified_week, hour=6)])
+Declared annnotations by type, defined by class UserComplex with annotation Calendar: @Calendar(dayOfMonth=afirst, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=zsecond, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=athird, dayOfWeek=unspecified_week, hour=23)
+Declared annnotations by type, defined by class UserComplex with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=zsecond, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=athird, dayOfWeek=unspecified_week, hour=23)])
+Declared annnotations by type, defined by class UserSub with annotation Calendar: <empty>
+Declared annnotations by type, defined by class UserSub with annotation Calendars: <empty>
+Declared annnotations by type, defined by class UserSub2 with annotation Calendar: @Calendar(dayOfMonth=sub2, dayOfWeek=unspecified_week, hour=6)
+Declared annnotations by type, defined by class UserSub2 with annotation Calendars: <empty>
+-----------------------------
+-----------------------------
+==============================
+Method annotations by type:
+==============================
+Annotations by type, defined by method singleUser with annotation Calendar: @Calendar(dayOfMonth=unspecified_month, dayOfWeek=single, hour=23)
+Annotations by type, defined by method singleUser with annotation Calendars: <empty>
+Annotations by type, defined by method user with annotation Calendar: @Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)
+Annotations by type, defined by method user with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)])
+Annotations by type, defined by method user2 with annotation Calendar: @Calendar(dayOfMonth=z, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=x, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=y, dayOfWeek=unspecified_week, hour=6)
+Annotations by type, defined by method user2 with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=z, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=x, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=y, dayOfWeek=unspecified_week, hour=6)])
+Annotations by type, defined by method userComplex with annotation Calendar: @Calendar(dayOfMonth=afirst, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=zsecond, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=athird, dayOfWeek=unspecified_week, hour=23)
+Annotations by type, defined by method userComplex with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=zsecond, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=athird, dayOfWeek=unspecified_week, hour=23)])
+-----------------------------
+-----------------------------
+==============================
+Declared method annotations:
+==============================
+Annotations declared by method singleUser with annotation Calendar: @Calendar(dayOfMonth=unspecified_month, dayOfWeek=single, hour=23)
+Annotations declared by method singleUser with annotation Calendars: <null>
+Annotations declared by method user with annotation Calendar: <null>
+Annotations declared by method user with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)])
+Annotations declared by method user2 with annotation Calendar: <null>
+Annotations declared by method user2 with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=z, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=x, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=y, dayOfWeek=unspecified_week, hour=6)])
+Annotations declared by method userComplex with annotation Calendar: @Calendar(dayOfMonth=afirst, dayOfWeek=unspecified_week, hour=6)
+Annotations declared by method userComplex with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=zsecond, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=athird, dayOfWeek=unspecified_week, hour=23)])
+-----------------------------
+-----------------------------
+==============================
+Declared method annotations by type:
+==============================
+Annotations by type, defined by method singleUser with annotation Calendar: @Calendar(dayOfMonth=unspecified_month, dayOfWeek=single, hour=23)
+Annotations by type, defined by method singleUser with annotation Calendars: <empty>
+Annotations by type, defined by method user with annotation Calendar: @Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)
+Annotations by type, defined by method user with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=last, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=unspecified_month, dayOfWeek=Fri, hour=23)])
+Annotations by type, defined by method user2 with annotation Calendar: @Calendar(dayOfMonth=z, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=x, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=y, dayOfWeek=unspecified_week, hour=6)
+Annotations by type, defined by method user2 with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=z, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=x, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=y, dayOfWeek=unspecified_week, hour=6)])
+Annotations by type, defined by method userComplex with annotation Calendar: @Calendar(dayOfMonth=afirst, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=zsecond, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=athird, dayOfWeek=unspecified_week, hour=23)
+Annotations by type, defined by method userComplex with annotation Calendars: @Calendars(value=[@Calendar(dayOfMonth=zsecond, dayOfWeek=unspecified_week, hour=6), @Calendar(dayOfMonth=athird, dayOfWeek=unspecified_week, hour=23)])
+-----------------------------
+-----------------------------
diff --git a/test/048-reflect-v8/src/AnnotationTest.java b/test/048-reflect-v8/src/AnnotationTest.java
new file mode 100644
index 0000000..75e6845
--- /dev/null
+++ b/test/048-reflect-v8/src/AnnotationTest.java
@@ -0,0 +1,291 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.annotation.Annotation;
+import java.lang.reflect.Method;
+
+public class AnnotationTest extends AnnotationTestHelpers {
+  public static void testAnnotationsByType() {
+    System.out.println("==============================");
+    System.out.println("Class annotations by type:");
+    System.out.println("==============================");
+
+    // Print associated annotations:
+    // * A is directly present or repeatably present on an element E;
+    // * No annotation of A is directly/repeatably present on an element
+    //   AND E is a class AND A's type is inheritable, AND A is associated with its superclass.
+    // (Looks through subtypes recursively only if there's 0 result at each level,
+    // and the annotation is @Inheritable).
+    printAnnotationsByType(Calendar.class, SingleUser.class);
+    printAnnotationsByType(Calendars.class, SingleUser.class);
+
+    printAnnotationsByType(Calendar.class, User.class);
+    printAnnotationsByType(Calendars.class, User.class);
+
+    printAnnotationsByType(Calendar.class, User2.class);  // Enforce ordering 'z,x,y'
+    printAnnotationsByType(Calendars.class, User2.class);
+
+    // NOTE:
+    //    Order of outer-most annotations Calendars[C,C],S vs C,Calendars[C,C] is unspecified.
+    //    In particular it's the order of #getDeclaredAnnotations which is completely unmentioned.
+    //    The only requirement for #getAnnotationsByType is to have same ordering as
+    //    #getDeclaredAnnotations.
+    //    (Calendars[] itself has to maintain value() order).
+    printAnnotationsByType(Calendar.class, UserComplex.class);  // Cs(C,C),C collapses into C,C,C.
+    printAnnotationsByType(Calendars.class, UserComplex.class);
+
+    printAnnotationsByType(Calendar.class, UserSub.class);
+    printAnnotationsByType(Calendars.class, UserSub.class);
+
+    printAnnotationsByType(Calendar.class, UserSub2.class);
+    // The directly present "Calendar" annotation masks all the repeatably present
+    // "Calendar" annotations coming from User.
+    printAnnotationsByType(Calendars.class, UserSub2.class);
+    // Edge case: UserSub2 doesn't directly have a Calendars annotation,
+    // so it doesn't mask the "User" Calendars annotation.
+
+    System.out.println("-----------------------------");
+    System.out.println("-----------------------------");
+
+  }
+
+  public static void testDeclaredAnnotation() {
+    System.out.println("==============================");
+    System.out.println("Class declared annotation:");
+    System.out.println("==============================");
+
+    // Print directly present annotations:
+    //
+    // The element E has an annotation_item for it (accessible through an
+    // annotations_directory_item) corresponding to an annotation A,
+    // and A's type_idx must match that on the encoded_annotation (from the annotation_item).
+    // (Does not look through the subtypes recursively)
+    printDeclaredAnnotation(SingleUser.class, Calendar.class);
+    printDeclaredAnnotation(SingleUser.class, Calendars.class);
+
+    printDeclaredAnnotation(User.class, Calendar.class);
+    printDeclaredAnnotation(User.class, Calendars.class);
+
+    printDeclaredAnnotation(UserComplex.class, Calendar.class);
+    printDeclaredAnnotation(UserComplex.class, Calendars.class);
+
+    printDeclaredAnnotation(UserSub.class, Calendar.class);
+    printDeclaredAnnotation(UserSub.class, Calendars.class);
+
+    printDeclaredAnnotation(UserSub2.class, Calendar.class);
+    printDeclaredAnnotation(UserSub2.class, Calendars.class);
+
+    System.out.println("-----------------------------");
+    System.out.println("-----------------------------");
+  }
+
+  public static void testDeclaredAnnotationsByType() {
+    System.out.println("==============================");
+    System.out.println("Declared class annotations by type:");
+    System.out.println("==============================");
+
+    // A is directly present or repeatably present on an element E;
+    // -- (does not do any recursion for classes regardless of @Inherited)
+    printDeclaredAnnotationsByType(Calendar.class, SingleUser.class);
+    printDeclaredAnnotationsByType(Calendars.class, SingleUser.class);
+
+    printDeclaredAnnotationsByType(Calendar.class, User.class);
+    printDeclaredAnnotationsByType(Calendars.class, User.class);
+
+    printDeclaredAnnotationsByType(Calendar.class, User2.class);  // Enforce ordering 'z,x,y'
+    printDeclaredAnnotationsByType(Calendars.class, User2.class);
+
+    printDeclaredAnnotationsByType(Calendar.class, UserComplex.class);
+    printDeclaredAnnotationsByType(Calendars.class, UserComplex.class);
+
+    printDeclaredAnnotationsByType(Calendar.class, UserSub.class);
+    printDeclaredAnnotationsByType(Calendars.class, UserSub.class);
+
+    printDeclaredAnnotationsByType(Calendar.class, UserSub2.class);
+    // The directly present "Calendar" annotation masks all the repeatably present "Calendar"
+    // annotations coming from User.
+    printDeclaredAnnotationsByType(Calendars.class, UserSub2.class);
+    // Edge case: UserSub2 doesn't directly have a Calendars annotation,
+    // so it doesn't mask the "User" Calendars annotation.
+
+    System.out.println("-----------------------------");
+    System.out.println("-----------------------------");
+  }
+
+  // Print the annotation "annotationClass" that is associated with an element denoted by
+  // "annotationUseClass."
+  private static <A extends Annotation> void printAnnotationsByType(Class<A> annotationClass,
+      Class<?> annotationUseClass) {
+    A[] annotationsByType = annotationUseClass.getAnnotationsByType(annotationClass);
+
+    String msg = "Annotations by type, defined by class "
+        + annotationUseClass.getName() + " with annotation " + annotationClass.getName() + ": "
+        + asString(annotationsByType);
+
+
+    System.out.println(msg);
+  }
+
+  private static <A extends Annotation> void printDeclaredAnnotation(Class<?> annotationUseClass,
+      Class<A> annotationDefClass) {
+    A anno = annotationUseClass.getDeclaredAnnotation(annotationDefClass);
+
+    String msg = asString(anno);
+
+    System.out.println("Declared annotations by class " + annotationUseClass
+        + ", annotation " + annotationDefClass + ": " + msg);
+  }
+
+  // Print the annotation "annotationClass" that is directly/indirectly present with an element
+  // denoted by "annotationUseClass."
+  private static <A extends Annotation> void printDeclaredAnnotationsByType(
+      Class<A> annotationClass, Class<?> annotationUseClass) {
+    A[] annotationsByType = annotationUseClass.getDeclaredAnnotationsByType(annotationClass);
+
+    String msg = "Declared annnotations by type, defined by class " + annotationUseClass.getName()
+        + " with annotation " + annotationClass.getName() + ": "
+        + asString(annotationsByType);
+
+    System.out.println(msg);
+  }
+
+  public static void testMethodAnnotationsByType() {
+    System.out.println("==============================");
+    System.out.println("Method annotations by type:");
+    System.out.println("==============================");
+
+    // Print associated annotations:
+    // * A is directly present or repeatably present on an element E;
+    // * No annotation of A is directly/repeatably present on an element AND E is a class
+    //   AND A's type is inheritable, AND A is associated with its superclass.
+    // (Looks through subtypes recursively only if there's 0 result at each level,
+    // and the annotation is @Inheritable).
+    printMethodAnnotationsByType(Calendar.class, "singleUser", AnnotationTestFixture.class);
+    printMethodAnnotationsByType(Calendars.class, "singleUser", AnnotationTestFixture.class);
+
+    printMethodAnnotationsByType(Calendar.class, "user", AnnotationTestFixture.class);
+    printMethodAnnotationsByType(Calendars.class, "user", AnnotationTestFixture.class);
+
+    printMethodAnnotationsByType(Calendar.class, "user2", AnnotationTestFixture.class);
+    printMethodAnnotationsByType(Calendars.class, "user2", AnnotationTestFixture.class);
+
+    printMethodAnnotationsByType(Calendar.class, "userComplex", AnnotationTestFixture.class);
+    printMethodAnnotationsByType(Calendars.class, "userComplex", AnnotationTestFixture.class);
+
+    System.out.println("-----------------------------");
+    System.out.println("-----------------------------");
+  }
+
+  // Print the annotation "annotationClass" that is associated with an element denoted by
+  // "annotationUseClass" method methodName.
+  private static <A extends Annotation> void printMethodAnnotationsByType(Class<A> annotationClass,
+      String methodName, Class<?> annotationUseClass) {
+    Method m = null;
+    try {
+      m = annotationUseClass.getDeclaredMethod(methodName);
+    } catch (Throwable t) {
+      throw new AssertionError(t);
+    }
+    A[] annotationsByType = m.getAnnotationsByType(annotationClass);
+
+    String msg = "Annotations by type, defined by method " + m.getName() + " with annotation " +
+      annotationClass.getName() + ": " +
+      asString(annotationsByType);
+
+    System.out.println(msg);
+  }
+
+  public static void testMethodDeclaredAnnotations() {
+    System.out.println("==============================");
+    System.out.println("Declared method annotations:");
+    System.out.println("==============================");
+
+    printMethodDeclaredAnnotation(Calendar.class, "singleUser", AnnotationTestFixture.class);
+    printMethodDeclaredAnnotation(Calendars.class, "singleUser", AnnotationTestFixture.class);
+
+    printMethodDeclaredAnnotation(Calendar.class, "user", AnnotationTestFixture.class);
+    printMethodDeclaredAnnotation(Calendars.class, "user", AnnotationTestFixture.class);
+
+    printMethodDeclaredAnnotation(Calendar.class, "user2", AnnotationTestFixture.class);
+    printMethodDeclaredAnnotation(Calendars.class, "user2", AnnotationTestFixture.class);
+
+    printMethodDeclaredAnnotation(Calendar.class, "userComplex", AnnotationTestFixture.class);
+    printMethodDeclaredAnnotation(Calendars.class, "userComplex", AnnotationTestFixture.class);
+
+    System.out.println("-----------------------------");
+    System.out.println("-----------------------------");
+  }
+
+  // Print the annotation "annotationClass" that is associated with an element denoted by
+  // methodName in annotationUseClass.
+  private static <A extends Annotation> void printMethodDeclaredAnnotation(Class<A> annotationClass,
+      String methodName, Class<?> annotationUseClass) {
+    Method m = null;
+    try {
+      m = annotationUseClass.getDeclaredMethod(methodName);
+    } catch (Throwable t) {
+      throw new AssertionError(t);
+    }
+    Annotation annotationsByType = m.getDeclaredAnnotation(annotationClass);
+
+    String msg = "Annotations declared by method " + m.getName() + " with annotation "
+        + annotationClass.getName() + ": "
+        + asString(annotationsByType);
+
+    System.out.println(msg);
+  }
+
+  public static void testMethodDeclaredAnnotationsByType() {
+    System.out.println("==============================");
+    System.out.println("Declared method annotations by type:");
+    System.out.println("==============================");
+
+    printMethodDeclaredAnnotationByType(Calendar.class, "singleUser", AnnotationTestFixture.class);
+    printMethodDeclaredAnnotationByType(Calendars.class, "singleUser", AnnotationTestFixture.class);
+
+    printMethodDeclaredAnnotationByType(Calendar.class, "user", AnnotationTestFixture.class);
+    printMethodDeclaredAnnotationByType(Calendars.class, "user", AnnotationTestFixture.class);
+
+    printMethodDeclaredAnnotationByType(Calendar.class, "user2", AnnotationTestFixture.class);
+    printMethodDeclaredAnnotationByType(Calendars.class, "user2", AnnotationTestFixture.class);
+
+    printMethodDeclaredAnnotationByType(Calendar.class, "userComplex", AnnotationTestFixture.class);
+    printMethodDeclaredAnnotationByType(Calendars.class, "userComplex",
+        AnnotationTestFixture.class);
+
+    System.out.println("-----------------------------");
+    System.out.println("-----------------------------");
+  }
+
+  // Print the annotation "annotationClass" that is associated with an element denoted by
+  // methodName in annotationUseClass.
+  private static <A extends Annotation> void printMethodDeclaredAnnotationByType(
+      Class<A> annotationClass, String methodName, Class<?> annotationUseClass) {
+    Method m = null;
+    try {
+      m = annotationUseClass.getDeclaredMethod(methodName);
+    } catch (Throwable t) {
+      throw new AssertionError(t);
+    }
+    A[] annotationsByType = m.getDeclaredAnnotationsByType(annotationClass);
+
+    String msg = "Annotations by type, defined by method " + m.getName() + " with annotation "
+        + annotationClass.getName() + ": "
+        + asString(annotationsByType);
+
+    System.out.println(msg);
+  }
+}
diff --git a/test/048-reflect-v8/src/AnnotationTestFixture.java b/test/048-reflect-v8/src/AnnotationTestFixture.java
new file mode 100644
index 0000000..248dfac
--- /dev/null
+++ b/test/048-reflect-v8/src/AnnotationTestFixture.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class AnnotationTestFixture {
+
+  @Calendar(dayOfWeek="single", hour=23)
+  public static void singleUser() {
+
+  }
+  @Calendars ({
+    @Calendar(dayOfMonth="last"),
+    @Calendar(dayOfWeek="Fri", hour=23)
+  })
+  public static void user() {
+
+  }
+
+  @Calendars ({
+    @Calendar(dayOfMonth="z"),
+    @Calendar(dayOfMonth="x"),
+    @Calendar(dayOfMonth="y")
+  })
+  public static void user2() {
+
+  }
+
+  @Calendar(dayOfMonth="afirst")
+  @Calendars ({
+    @Calendar(dayOfMonth="zsecond"),
+    @Calendar(dayOfMonth="athird", hour=23)
+  })
+  public static void userComplex() {
+
+  }
+}
diff --git a/test/048-reflect-v8/src/AnnotationTestHelpers.java b/test/048-reflect-v8/src/AnnotationTestHelpers.java
new file mode 100644
index 0000000..6b5bea2
--- /dev/null
+++ b/test/048-reflect-v8/src/AnnotationTestHelpers.java
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.annotation.Annotation;
+
+public class AnnotationTestHelpers {
+  // Provide custom print function that print a deterministic output.
+  // Note that Annotation#toString has unspecified order: it prints out the
+  // fields, which is why we can't rely on it.
+
+  public static String asString(Annotation anno) {
+    if (anno instanceof Calendar) {
+      return asString((Calendar)anno);
+    } else if (anno instanceof Calendars) {
+      return asString((Calendars)anno);
+    } else {
+      if (anno == null) {
+        return "<null>";
+      }
+      // Fall-back, usually would only go here in a test failure.
+      return anno.toString();
+    }
+  }
+
+  public static String asString(Annotation[] annos) {
+    String msg = "";
+
+    if (annos == null) {
+      msg += "<null>";
+    } else if (annos.length == 0) {
+      msg += "<empty>";
+    } else {
+      for (int i = 0; i < annos.length; ++i) {
+        msg += asString(annos[i]);
+
+        if (i != annos.length - 1) {
+          msg += ", ";
+        }
+      }
+    }
+
+    return msg;
+  }
+
+  public static String asString(Calendar calendar) {
+    if (calendar == null) {
+      return "<null>";
+    }
+
+    return "@Calendar(dayOfMonth=" + calendar.dayOfMonth() + ", dayOfWeek=" +
+      calendar.dayOfWeek() + ", hour=" + calendar.hour() + ")";
+  }
+
+  public static String asString(Calendars calendars) {
+    if (calendars == null) {
+      return "<null>";
+    }
+
+    String s = "@Calendars(value=[";
+
+    Calendar[] allValues = calendars.value();
+    for (int i = 0; i < allValues.length; ++i) {
+      s += asString(allValues[i]);
+      if (i != allValues.length - 1) {
+        s += ", ";
+      }
+    }
+
+    s += "])";
+
+    return s;
+  }
+}
diff --git a/test/048-reflect-v8/src/Calendar.java b/test/048-reflect-v8/src/Calendar.java
new file mode 100644
index 0000000..4a16573
--- /dev/null
+++ b/test/048-reflect-v8/src/Calendar.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.annotation.Inherited;
+import java.lang.annotation.Repeatable;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+
+// This is a plain old non-1.8 annotation. At runtime we can see that it has a
+// "Repeatable" annotation if we query with getDeclaredAnnotation(Repeatable.class)
+@Retention(RetentionPolicy.RUNTIME)
+@Repeatable(Calendars.class)
+@Inherited  // note: container must also be @Inherited by JLS.
+public @interface Calendar {
+    String dayOfMonth() default "unspecified_month";
+    String dayOfWeek() default "unspecified_week";
+    int hour() default 6;
+}
+
diff --git a/test/048-reflect-v8/src/Calendars.java b/test/048-reflect-v8/src/Calendars.java
new file mode 100644
index 0000000..caeda52
--- /dev/null
+++ b/test/048-reflect-v8/src/Calendars.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.annotation.Inherited;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+
+// Plain old annotation, there's nothing 1.8 specific about it.
+@Retention(RetentionPolicy.RUNTIME)
+@Inherited  // note: elements must also be @Inherited by JLS.
+public @interface Calendars {
+  Calendar[] value();
+}
diff --git a/test/048-reflect-v8/src/IFaceA.java b/test/048-reflect-v8/src/IFaceA.java
new file mode 100644
index 0000000..9b1f610
--- /dev/null
+++ b/test/048-reflect-v8/src/IFaceA.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Stored as a complex annotation Calendars(Calendar,Calendar)
+// in the binary.
+@Calendars ({
+  @Calendar(dayOfMonth="if_a_first"),
+  @Calendar(dayOfMonth="if_b_last")
+})
+public interface IFaceA {
+}
diff --git a/test/048-reflect-v8/src/IFaceSimple.java b/test/048-reflect-v8/src/IFaceSimple.java
new file mode 100644
index 0000000..93cf610
--- /dev/null
+++ b/test/048-reflect-v8/src/IFaceSimple.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Simple annotation, no container.
+@Calendar(dayOfMonth="if_simple_first")
+public interface IFaceSimple {
+
+}
diff --git a/test/048-reflect-v8/src/IsDefaultTest.java b/test/048-reflect-v8/src/IsDefaultTest.java
new file mode 100644
index 0000000..177dcf1
--- /dev/null
+++ b/test/048-reflect-v8/src/IsDefaultTest.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class IsDefaultTest {
+  interface DefaultInterface {
+    default void sayHi() {
+      System.out.println("hi default");
+    }
+  }
+
+  interface RegularInterface {
+    void sayHi();
+  }
+
+  class ImplementsWithDefault implements DefaultInterface {}
+  class ImplementsWithRegular implements RegularInterface {
+    public void sayHi() {
+      System.out.println("hello specific");
+    }
+  }
+
+  private static void printIsDefault(Class<?> klass) {
+    Method m;
+    try {
+      m = klass.getMethod("sayHi");
+    } catch (Throwable t) {
+      System.out.println(t);
+      return;
+    }
+
+    boolean isDefault = m.isDefault();
+    System.out.println(klass.getName() + " is default = " + (isDefault ? "yes" : "no"));
+  }
+
+  public static void test() {
+    System.out.println("==============================");
+    System.out.println("Are These Methods Default:");
+    System.out.println("==============================");
+
+    printIsDefault(DefaultInterface.class);
+    printIsDefault(RegularInterface.class);
+    printIsDefault(ImplementsWithDefault.class);
+    printIsDefault(ImplementsWithRegular.class);
+  }
+}
diff --git a/test/048-reflect-v8/src/Main.java b/test/048-reflect-v8/src/Main.java
index 7fa2a92..f2b8287 100644
--- a/test/048-reflect-v8/src/Main.java
+++ b/test/048-reflect-v8/src/Main.java
@@ -14,43 +14,14 @@
  * limitations under the License.
  */
 
-import java.lang.reflect.Method;
-
 public class Main {
-  interface DefaultInterface {
-    default void sayHi() {
-      System.out.println("hi default");
-    }
-  }
-
-  interface RegularInterface {
-    void sayHi();
-  }
-
-  class ImplementsWithDefault implements DefaultInterface {}
-  class ImplementsWithRegular implements RegularInterface {
-    public void sayHi() {
-      System.out.println("hello specific");
-    }
-  }
-
-  private static void printIsDefault(Class<?> klass) {
-    Method m;
-    try {
-      m = klass.getMethod("sayHi");
-    } catch (Throwable t) {
-      System.out.println(t);
-      return;
-    }
-
-    boolean isDefault = m.isDefault();
-    System.out.println(klass.getName() + " is default = " + (isDefault ? "yes" : "no"));
-  }
-
   public static void main(String[] args) {
-    printIsDefault(DefaultInterface.class);
-    printIsDefault(RegularInterface.class);
-    printIsDefault(ImplementsWithDefault.class);
-    printIsDefault(ImplementsWithRegular.class);
+    IsDefaultTest.test();
+    AnnotationTest.testAnnotationsByType();
+    AnnotationTest.testDeclaredAnnotation();
+    AnnotationTest.testDeclaredAnnotationsByType();
+    AnnotationTest.testMethodAnnotationsByType();
+    AnnotationTest.testMethodDeclaredAnnotations();
+    AnnotationTest.testMethodDeclaredAnnotationsByType();
   }
 }
diff --git a/test/048-reflect-v8/src/SingleUser.java b/test/048-reflect-v8/src/SingleUser.java
new file mode 100644
index 0000000..0f9c430
--- /dev/null
+++ b/test/048-reflect-v8/src/SingleUser.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Stored as a single "Calendar" annotation in the binary.
+@Calendar(dayOfWeek="single", hour=23)
+public class SingleUser {
+
+}
diff --git a/test/048-reflect-v8/src/User.java b/test/048-reflect-v8/src/User.java
new file mode 100644
index 0000000..003ceeb
--- /dev/null
+++ b/test/048-reflect-v8/src/User.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Stored as a complex annotation Calendars(Calendar,Calendar)
+// in the binary.
+//
+/* FIXME: Use this code instead, when Jack supports repeatable annotations properly.
+ *
+ * @Calendar(dayOfMonth="last")
+ * @Calendar(dayOfWeek="Fri", hour=23)
+ */
+@Calendars ({
+  @Calendar(dayOfMonth="last"),
+  @Calendar(dayOfWeek="Fri", hour=23)
+})
+public class User {
+
+}
diff --git a/test/048-reflect-v8/src/User2.java b/test/048-reflect-v8/src/User2.java
new file mode 100644
index 0000000..1a6049f
--- /dev/null
+++ b/test/048-reflect-v8/src/User2.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Stored as a complex annotation Calendars(Calendar,Calendar,Calendar)
+// in the binary.
+// (Check for order, should be z,x,y)
+@Calendars ({
+  @Calendar(dayOfMonth="z"),
+  @Calendar(dayOfMonth="x"),
+  @Calendar(dayOfMonth="y")
+})
+public class User2 {
+
+}
diff --git a/test/048-reflect-v8/src/UserComplex.java b/test/048-reflect-v8/src/UserComplex.java
new file mode 100644
index 0000000..e262349
--- /dev/null
+++ b/test/048-reflect-v8/src/UserComplex.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Stored as a complex annotation Calendars(Calendar,Calendar)
+// followed by a Calendar in the binary.
+// In other words { Calendars([C,C]), C }
+//
+// Note that trying to do {C,Calendars,C} or similar
+// is illegal by the JLS.
+@Calendar(dayOfMonth="afirst")
+@Calendars ({
+  @Calendar(dayOfMonth="zsecond"),
+  @Calendar(dayOfMonth="athird", hour=23)
+})
+// @Calendar(dayOfMonth="zlast")  // Leave for future ordering test
+public class UserComplex {
+
+}
diff --git a/test/048-reflect-v8/src/UserSub.java b/test/048-reflect-v8/src/UserSub.java
new file mode 100644
index 0000000..d60aa6a
--- /dev/null
+++ b/test/048-reflect-v8/src/UserSub.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class UserSub
+  extends User
+  implements IFaceA, IFaceSimple {
+
+}
diff --git a/test/048-reflect-v8/src/UserSub2.java b/test/048-reflect-v8/src/UserSub2.java
new file mode 100644
index 0000000..13e2eb0
--- /dev/null
+++ b/test/048-reflect-v8/src/UserSub2.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This calendar subsumes anything else we would've normally gotten from the subclass.
+@Calendar(dayOfMonth="sub2")
+public class UserSub2
+  extends User
+  implements IFaceA, IFaceSimple {
+
+}
diff --git a/test/115-native-bridge/run b/test/115-native-bridge/run
index ea2045b..aeb5721 100644
--- a/test/115-native-bridge/run
+++ b/test/115-native-bridge/run
@@ -28,4 +28,4 @@
 LEFT=$(echo ${ARGS} | sed -r 's/-Djava.library.path.*//')
 RIGHT=$(echo ${ARGS} | sed -r 's/.*Djava.library.path[^ ]* //')
 MODARGS="${LEFT} -Djava.library.path=`pwd` ${RIGHT}"
-exec ${RUN} --runtime-option -XX:NativeBridge=libnativebridgetest.so ${MODARGS} NativeBridgeMain
+exec ${RUN} --runtime-option -Xforce-nb-testing --runtime-option -XX:NativeBridge=libnativebridgetest.so ${MODARGS} NativeBridgeMain
diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java
index 06cfd0a..8f9a32a 100644
--- a/test/449-checker-bce/src/Main.java
+++ b/test/449-checker-bce/src/Main.java
@@ -631,7 +631,8 @@
   /// CHECK-DAG:  <<Array2>>     NullCheck [<<Get1>>]                        loop:<<InnerLoop>>
   /// CHECK-DAG:  <<Len2:i\d+>>  ArrayLength [<<Array2>>]                    loop:<<InnerLoop>>
   /// CHECK-DAG:  <<Bounds2>>    BoundsCheck [<<Index2:i\d+>>,<<Len2>>]      loop:<<InnerLoop>>
-  /// CHECK-DAG:                 InvokeStaticOrDirect [<<Get2>>]             loop:<<InnerLoop>>
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                 InvokeStaticOrDirect [<<Get2>>{{(,[ij]\d+)?}}] loop:<<InnerLoop>>
   /// CHECK-DAG:  <<Index2>>     Phi                                         loop:<<InnerLoop>>
   /// CHECK-DAG:  <<Index1>>     Phi                                         loop:<<OuterLoop:B\d+>>
   /// CHECK-DAG:  <<Field1>>     StaticFieldGet                              loop:none
@@ -644,7 +645,8 @@
   /// CHECK-DAG:  <<Get1:l\d+>>  ArrayGet [<<Array1:l\d+>>,<<Index1:i\d+>>]  loop:<<OuterLoop>>
   //  Array reference ..[j] still in inner loop, with a direct index.
   /// CHECK-DAG:  <<Get2:i\d+>>  ArrayGet [<<Array2:l\d+>>,<<Index2:i\d+>>]  loop:<<InnerLoop:B\d+>>
-  /// CHECK-DAG:                 InvokeStaticOrDirect [<<Get2>>]             loop:<<InnerLoop>>
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                 InvokeStaticOrDirect [<<Get2>>{{(,[ij]\d+)?}}] loop:<<InnerLoop>>
   /// CHECK-DAG:  <<Index2>>     Phi                                         loop:<<InnerLoop>>
   /// CHECK-DAG:  <<Index1>>     Phi                                         loop:<<OuterLoop>>
   //  Synthetic phi.
diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java
index 3c8abeb..2f80470 100644
--- a/test/458-checker-instruction-simplification/src/Main.java
+++ b/test/458-checker-instruction-simplification/src/Main.java
@@ -1404,7 +1404,7 @@
   /// CHECK-START: int Main.floatConditionNotEqualOne(float) ssa_builder (after)
   /// CHECK:                            LessThanOrEqual
 
-  /// CHECK-START: int Main.floatConditionNotEqualOne(float) register (before)
+  /// CHECK-START: int Main.floatConditionNotEqualOne(float) instruction_simplifier_before_codegen (after)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
@@ -1420,7 +1420,7 @@
   /// CHECK-START: int Main.doubleConditionEqualZero(double) ssa_builder (after)
   /// CHECK:                            LessThanOrEqual
 
-  /// CHECK-START: int Main.doubleConditionEqualZero(double) register (before)
+  /// CHECK-START: int Main.doubleConditionEqualZero(double) instruction_simplifier_before_codegen (after)
   /// CHECK-DAG:      <<Arg:d\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
diff --git a/test/530-checker-lse/src/Main.java b/test/530-checker-lse/src/Main.java
index f87326c..d647683 100644
--- a/test/530-checker-lse/src/Main.java
+++ b/test/530-checker-lse/src/Main.java
@@ -664,19 +664,50 @@
     System.out.println("testFinalizableByForcingGc() failed to force gc.");
   }
 
-  public static void assertIntEquals(int expected, int result) {
+  /// CHECK-START: int Main.testHSelect(boolean) load_store_elimination (before)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: Select
+
+  /// CHECK-START: int Main.testHSelect(boolean) load_store_elimination (after)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: Select
+
+  // Test that HSelect creates alias.
+  public static int testHSelect(boolean b) {
+    // Disable inlining.
+    System.out.print("");
+    System.out.print("");
+    System.out.print("");
+    System.out.print("");
+    System.out.print("");
+    System.out.print("");
+    System.out.print("");
+    System.out.print("");
+    System.out.print("");
+    System.out.print("");
+
+    TestClass obj = new TestClass();
+    TestClass obj2 = null;
+    obj.i = 0xdead;
+    if (b) {
+      obj2 = obj;
+    }
+    return obj2.i;
+  }
+
+  public static void assertIntEquals(int result, int expected) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
     }
   }
 
-  public static void assertFloatEquals(float expected, float result) {
+  public static void assertFloatEquals(float result, float expected) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
     }
   }
 
-  public static void assertDoubleEquals(double expected, double result) {
+  public static void assertDoubleEquals(double result, double expected) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
     }
@@ -723,5 +754,6 @@
     assertIntEquals(test23(false), 5);
     assertFloatEquals(test24(), 8.0f);
     testFinalizableByForcingGc();
+    assertIntEquals(testHSelect(true), 0xdead);
   }
 }
diff --git a/test/555-checker-regression-x86const/build b/test/555-checker-regression-x86const/build
new file mode 100644
index 0000000..09dcc36
--- /dev/null
+++ b/test/555-checker-regression-x86const/build
@@ -0,0 +1,46 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Stop if something fails.
+set -e
+
+# We can't use src-ex testing infrastructure because src and src-ex are compiled
+# with javac independetely and can't share code (without reflection).
+
+mkdir classes
+${JAVAC} -d classes `find src -name '*.java'`
+
+mkdir classes-ex
+mv classes/UnresolvedClass.class classes-ex
+
+if [ ${USE_JACK} = "true" ]; then
+  # Create .jack files from classes generated with javac.
+  ${JILL} classes --output classes.jack
+  ${JILL} classes-ex --output classes-ex.jack
+
+  # Create DEX files from .jack files.
+  ${JACK} --import classes.jack --output-dex .
+  zip $TEST_NAME.jar classes.dex
+  ${JACK} --import classes-ex.jack --output-dex .
+  zip ${TEST_NAME}-ex.jar classes.dex
+else
+  if [ ${NEED_DEX} = "true" ]; then
+    ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex --dump-width=1000 classes
+    zip $TEST_NAME.jar classes.dex
+    ${DX} -JXmx256m --debug --dex --dump-to=classes-ex.lst --output=classes.dex --dump-width=1000 classes-ex
+    zip ${TEST_NAME}-ex.jar classes.dex
+  fi
+fi
diff --git a/test/555-checker-regression-x86const/expected.txt b/test/555-checker-regression-x86const/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/555-checker-regression-x86const/expected.txt
diff --git a/test/555-checker-regression-x86const/info.txt b/test/555-checker-regression-x86const/info.txt
new file mode 100644
index 0000000..c4037fa
--- /dev/null
+++ b/test/555-checker-regression-x86const/info.txt
@@ -0,0 +1,2 @@
+Check that X86 FP constant-area handling handles intrinsics with CurrentMethod
+on the call.
diff --git a/test/555-checker-regression-x86const/run b/test/555-checker-regression-x86const/run
new file mode 100644
index 0000000..63fdb8c
--- /dev/null
+++ b/test/555-checker-regression-x86const/run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Use secondary switch to add secondary dex file to class path.
+exec ${RUN} "${@}" --secondary
diff --git a/test/555-checker-regression-x86const/src/Main.java b/test/555-checker-regression-x86const/src/Main.java
new file mode 100644
index 0000000..914cfde
--- /dev/null
+++ b/test/555-checker-regression-x86const/src/Main.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main extends UnresolvedClass {
+
+  /// CHECK-START: float Main.callAbs(float) register (before)
+  /// CHECK:       <<CurrentMethod:[ij]\d+>> CurrentMethod
+  /// CHECK:       <<ParamValue:f\d+>> ParameterValue
+  /// CHECK:       InvokeStaticOrDirect [<<ParamValue>>,<<CurrentMethod>>] method_name:java.lang.Math.abs
+  static public float callAbs(float f) {
+    // An intrinsic invoke in a method that has unresolved references will still
+    // have a CurrentMethod as an argument.  The X86 pc_relative_fixups_x86 pass
+    // must be able to handle Math.abs invokes that have a CurrentMethod, as both
+    // the CurrentMethod and the HX86LoadFromConstantTable (for the bitmask)
+    // expect to be in the 'SpecialInputIndex' input index.
+    return Math.abs(f);
+  }
+
+  static public void main(String[] args) {
+    expectEquals(callAbs(-6.5f), 6.5f);
+  }
+
+  public static void expectEquals(float expected, float result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/555-checker-regression-x86const/src/Unresolved.java b/test/555-checker-regression-x86const/src/Unresolved.java
new file mode 100644
index 0000000..e98bdbf
--- /dev/null
+++ b/test/555-checker-regression-x86const/src/Unresolved.java
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class UnresolvedClass {
+}
diff --git a/test/562-bce-preheader/src/Main.java b/test/562-bce-preheader/src/Main.java
index 8de0533..8b527b4 100644
--- a/test/562-bce-preheader/src/Main.java
+++ b/test/562-bce-preheader/src/Main.java
@@ -70,6 +70,26 @@
     return acc;
   }
 
+  /**
+   * An artificial example with an inconsistent phi structure during
+   * dynamic bce that is corrected afterwards. Note that only the last
+   * assignment is really live, but the other statements set up an
+   * interesting phi structure.
+   */
+  private static int doit(int[] z) {
+    int a = 0;
+    for (int i = 0; i < 10; ++i) {
+      for (int j = i; j < 10; ++j) {
+        a = z[i];
+        for (int k = 0; k < 10; ++k) {
+          a += z[k];
+          a = z[i];
+        }
+      }
+    }
+    return a;
+  }
+
   public static void main(String args[]) {
     int[][] x = new int[2][2];
     int y;
@@ -96,6 +116,9 @@
     expectEquals(26, foo(a, b,  2));
     expectEquals(38, foo(a, b,  3));
 
+    int[] z = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    expectEquals(10, doit(z));
+
     System.out.println("passed");
   }
 
diff --git a/test/565-checker-condition-liveness/src/Main.java b/test/565-checker-condition-liveness/src/Main.java
index a811e5b..dc4cb76 100644
--- a/test/565-checker-condition-liveness/src/Main.java
+++ b/test/565-checker-condition-liveness/src/Main.java
@@ -16,6 +16,24 @@
 
 public class Main {
 
+  /// CHECK-START-X86: int Main.p(float) liveness (after)
+  /// CHECK:         <<Arg:f\d+>>  ParameterValue uses:[<<UseInput:\d+>>]
+  /// CHECK-DAG:     <<Five:f\d+>> FloatConstant 5 uses:[<<UseInput>>]
+  /// CHECK-DAG:     <<Zero:i\d+>> IntConstant 0
+  /// CHECK-DAG:     <<MinusOne:i\d+>> IntConstant -1 uses:[<<UseInput>>]
+  /// CHECK:         <<Base:i\d+>> X86ComputeBaseMethodAddress uses:[<<UseInput>>]
+  /// CHECK-NEXT:    <<Load:f\d+>> X86LoadFromConstantTable [<<Base>>,<<Five>>]
+  /// CHECK-NEXT:    <<Cond:z\d+>> LessThanOrEqual [<<Arg>>,<<Load>>]
+  /// CHECK-NEXT:                  Select [<<Zero>>,<<MinusOne>>,<<Cond>>] liveness:<<LivSel:\d+>>
+  /// CHECK-EVAL:    <<UseInput>> == <<LivSel>> + 1
+
+  public static int p(float arg) {
+    if (arg > 5.0f) {
+      return 0;
+    }
+    return -1;
+  }
+
   /// CHECK-START: void Main.main(java.lang.String[]) liveness (after)
   /// CHECK:         <<X:i\d+>>    ArrayLength uses:[<<UseInput:\d+>>]
   /// CHECK:         <<Y:i\d+>>    StaticFieldGet uses:[<<UseInput>>]
diff --git a/test/565-checker-doublenegbitwise/expected.txt b/test/565-checker-doublenegbitwise/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/565-checker-doublenegbitwise/expected.txt
diff --git a/test/565-checker-doublenegbitwise/info.txt b/test/565-checker-doublenegbitwise/info.txt
new file mode 100644
index 0000000..cbe183c
--- /dev/null
+++ b/test/565-checker-doublenegbitwise/info.txt
@@ -0,0 +1 @@
+Test double-negated bitwise operations simplifications.
diff --git a/test/565-checker-doublenegbitwise/src/Main.java b/test/565-checker-doublenegbitwise/src/Main.java
new file mode 100644
index 0000000..41af97b
--- /dev/null
+++ b/test/565-checker-doublenegbitwise/src/Main.java
@@ -0,0 +1,323 @@
+/*
+* Copyright (C) 2016 The Android Open Source Project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*      http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+public class Main {
+
+  // A dummy value to defeat inlining of these routines.
+  static boolean doThrow = false;
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertLongEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  /**
+   * Test transformation of Not/Not/And into Or/Not.
+   */
+
+  // Note: before the instruction_simplifier pass, Xor's are used instead of
+  // Not's (the simplification happens during the same pass).
+  /// CHECK-START: int Main.$opt$noinline$andToOr(int, int) instruction_simplifier (before)
+  /// CHECK:       <<P1:i\d+>>          ParameterValue
+  /// CHECK:       <<P2:i\d+>>          ParameterValue
+  /// CHECK:       <<CstM1:i\d+>>       IntConstant -1
+  /// CHECK:       <<Not1:i\d+>>        Xor [<<P1>>,<<CstM1>>]
+  /// CHECK:       <<Not2:i\d+>>        Xor [<<P2>>,<<CstM1>>]
+  /// CHECK:       <<And:i\d+>>         And [<<Not1>>,<<Not2>>]
+  /// CHECK:                            Return [<<And>>]
+
+  /// CHECK-START: int Main.$opt$noinline$andToOr(int, int) instruction_simplifier (after)
+  /// CHECK:       <<P1:i\d+>>          ParameterValue
+  /// CHECK:       <<P2:i\d+>>          ParameterValue
+  /// CHECK:       <<Or:i\d+>>          Or [<<P1>>,<<P2>>]
+  /// CHECK:       <<Not:i\d+>>         Not [<<Or>>]
+  /// CHECK:                            Return [<<Not>>]
+
+  /// CHECK-START: int Main.$opt$noinline$andToOr(int, int) instruction_simplifier (after)
+  /// CHECK:                            Not
+  /// CHECK-NOT:                        Not
+
+  /// CHECK-START: int Main.$opt$noinline$andToOr(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:                        And
+
+  public static int $opt$noinline$andToOr(int a, int b) {
+    if (doThrow) throw new Error();
+    return ~a & ~b;
+  }
+
+  /**
+   * Test transformation of Not/Not/And into Or/Not for boolean negations.
+   * Note that the graph before this instruction simplification pass does not
+   * contain `HBooleanNot` instructions. This is because this transformation
+   * follows the optimization of `HSelect` to `HBooleanNot` occurring in the
+   * same pass.
+   */
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanAndToOr(boolean, boolean) instruction_simplifier_after_bce (before)
+  /// CHECK:       <<P1:z\d+>>          ParameterValue
+  /// CHECK:       <<P2:z\d+>>          ParameterValue
+  /// CHECK-DAG:   <<Const0:i\d+>>      IntConstant 0
+  /// CHECK-DAG:   <<Const1:i\d+>>      IntConstant 1
+  /// CHECK:       <<Select1:i\d+>>     Select [<<Const1>>,<<Const0>>,<<P1>>]
+  /// CHECK:       <<Select2:i\d+>>     Select [<<Const1>>,<<Const0>>,<<P2>>]
+  /// CHECK:       <<And:i\d+>>         And [<<Select2>>,<<Select1>>]
+  /// CHECK:                            Return [<<And>>]
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanAndToOr(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK:       <<Cond1:z\d+>>       ParameterValue
+  /// CHECK:       <<Cond2:z\d+>>       ParameterValue
+  /// CHECK:       <<Or:i\d+>>          Or [<<Cond2>>,<<Cond1>>]
+  /// CHECK:       <<BooleanNot:z\d+>>  BooleanNot [<<Or>>]
+  /// CHECK:                            Return [<<BooleanNot>>]
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanAndToOr(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK:                            BooleanNot
+  /// CHECK-NOT:                        BooleanNot
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanAndToOr(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-NOT:                        And
+
+  public static boolean $opt$noinline$booleanAndToOr(boolean a, boolean b) {
+    if (doThrow) throw new Error();
+    return !a & !b;
+  }
+
+  /**
+   * Test transformation of Not/Not/Or into And/Not.
+   */
+
+  // See note above.
+  // The second Xor has its arguments reversed for no obvious reason.
+  /// CHECK-START: long Main.$opt$noinline$orToAnd(long, long) instruction_simplifier (before)
+  /// CHECK:       <<P1:j\d+>>          ParameterValue
+  /// CHECK:       <<P2:j\d+>>          ParameterValue
+  /// CHECK:       <<CstM1:j\d+>>       LongConstant -1
+  /// CHECK:       <<Not1:j\d+>>        Xor [<<P1>>,<<CstM1>>]
+  /// CHECK:       <<Not2:j\d+>>        Xor [<<CstM1>>,<<P2>>]
+  /// CHECK:       <<Or:j\d+>>          Or [<<Not1>>,<<Not2>>]
+  /// CHECK:                            Return [<<Or>>]
+
+  /// CHECK-START: long Main.$opt$noinline$orToAnd(long, long) instruction_simplifier (after)
+  /// CHECK:       <<P1:j\d+>>          ParameterValue
+  /// CHECK:       <<P2:j\d+>>          ParameterValue
+  /// CHECK:       <<And:j\d+>>         And [<<P1>>,<<P2>>]
+  /// CHECK:       <<Not:j\d+>>         Not [<<And>>]
+  /// CHECK:                            Return [<<Not>>]
+
+  /// CHECK-START: long Main.$opt$noinline$orToAnd(long, long) instruction_simplifier (after)
+  /// CHECK:                            Not
+  /// CHECK-NOT:                        Not
+
+  /// CHECK-START: long Main.$opt$noinline$orToAnd(long, long) instruction_simplifier (after)
+  /// CHECK-NOT:                        Or
+
+  public static long $opt$noinline$orToAnd(long a, long b) {
+    if (doThrow) throw new Error();
+    return ~a | ~b;
+  }
+
+  /**
+   * Test transformation of Not/Not/Or into Or/And for boolean negations.
+   * Note that the graph before this instruction simplification pass does not
+   * contain `HBooleanNot` instructions. This is because this transformation
+   * follows the optimization of `HSelect` to `HBooleanNot` occurring in the
+   * same pass.
+   */
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanOrToAnd(boolean, boolean) instruction_simplifier_after_bce (before)
+  /// CHECK:       <<P1:z\d+>>          ParameterValue
+  /// CHECK:       <<P2:z\d+>>          ParameterValue
+  /// CHECK-DAG:   <<Const0:i\d+>>      IntConstant 0
+  /// CHECK-DAG:   <<Const1:i\d+>>      IntConstant 1
+  /// CHECK:       <<Select1:i\d+>>     Select [<<Const1>>,<<Const0>>,<<P1>>]
+  /// CHECK:       <<Select2:i\d+>>     Select [<<Const1>>,<<Const0>>,<<P2>>]
+  /// CHECK:       <<Or:i\d+>>          Or [<<Select2>>,<<Select1>>]
+  /// CHECK:                            Return [<<Or>>]
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanOrToAnd(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK:       <<Cond1:z\d+>>       ParameterValue
+  /// CHECK:       <<Cond2:z\d+>>       ParameterValue
+  /// CHECK:       <<And:i\d+>>         And [<<Cond2>>,<<Cond1>>]
+  /// CHECK:       <<BooleanNot:z\d+>>  BooleanNot [<<And>>]
+  /// CHECK:                            Return [<<BooleanNot>>]
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanOrToAnd(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK:                            BooleanNot
+  /// CHECK-NOT:                        BooleanNot
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanOrToAnd(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-NOT:                        Or
+
+  public static boolean $opt$noinline$booleanOrToAnd(boolean a, boolean b) {
+    if (doThrow) throw new Error();
+    return !a | !b;
+  }
+
+  /**
+   * Test that the transformation copes with inputs being separated from the
+   * bitwise operations.
+   * This is a regression test. The initial logic was inserting the new bitwise
+   * operation incorrectly.
+   */
+
+  /// CHECK-START: int Main.$opt$noinline$regressInputsAway(int, int) instruction_simplifier (before)
+  /// CHECK:       <<P1:i\d+>>          ParameterValue
+  /// CHECK:       <<P2:i\d+>>          ParameterValue
+  /// CHECK-DAG:   <<Cst1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:   <<CstM1:i\d+>>       IntConstant -1
+  /// CHECK:       <<AddP1:i\d+>>       Add [<<P1>>,<<Cst1>>]
+  /// CHECK:       <<Not1:i\d+>>        Xor [<<AddP1>>,<<CstM1>>]
+  /// CHECK:       <<AddP2:i\d+>>       Add [<<P2>>,<<Cst1>>]
+  /// CHECK:       <<Not2:i\d+>>        Xor [<<AddP2>>,<<CstM1>>]
+  /// CHECK:       <<Or:i\d+>>          Or [<<Not1>>,<<Not2>>]
+  /// CHECK:                            Return [<<Or>>]
+
+  /// CHECK-START: int Main.$opt$noinline$regressInputsAway(int, int) instruction_simplifier (after)
+  /// CHECK:       <<P1:i\d+>>          ParameterValue
+  /// CHECK:       <<P2:i\d+>>          ParameterValue
+  /// CHECK:       <<Cst1:i\d+>>        IntConstant 1
+  /// CHECK:       <<AddP1:i\d+>>       Add [<<P1>>,<<Cst1>>]
+  /// CHECK:       <<AddP2:i\d+>>       Add [<<P2>>,<<Cst1>>]
+  /// CHECK:       <<And:i\d+>>         And [<<AddP1>>,<<AddP2>>]
+  /// CHECK:       <<Not:i\d+>>         Not [<<And>>]
+  /// CHECK:                            Return [<<Not>>]
+
+  /// CHECK-START: int Main.$opt$noinline$regressInputsAway(int, int) instruction_simplifier (after)
+  /// CHECK:                            Not
+  /// CHECK-NOT:                        Not
+
+  /// CHECK-START: int Main.$opt$noinline$regressInputsAway(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:                        Or
+
+  public static int $opt$noinline$regressInputsAway(int a, int b) {
+    if (doThrow) throw new Error();
+    int a1 = a + 1;
+    int not_a1 = ~a1;
+    int b1 = b + 1;
+    int not_b1 = ~b1;
+    return not_a1 | not_b1;
+  }
+
+  /**
+   * Test transformation of Not/Not/Xor into Xor.
+   */
+
+  // See first note above.
+  /// CHECK-START: int Main.$opt$noinline$notXorToXor(int, int) instruction_simplifier (before)
+  /// CHECK:       <<P1:i\d+>>          ParameterValue
+  /// CHECK:       <<P2:i\d+>>          ParameterValue
+  /// CHECK:       <<CstM1:i\d+>>       IntConstant -1
+  /// CHECK:       <<Not1:i\d+>>        Xor [<<P1>>,<<CstM1>>]
+  /// CHECK:       <<Not2:i\d+>>        Xor [<<P2>>,<<CstM1>>]
+  /// CHECK:       <<Xor:i\d+>>         Xor [<<Not1>>,<<Not2>>]
+  /// CHECK:                            Return [<<Xor>>]
+
+  /// CHECK-START: int Main.$opt$noinline$notXorToXor(int, int) instruction_simplifier (after)
+  /// CHECK:       <<P1:i\d+>>          ParameterValue
+  /// CHECK:       <<P2:i\d+>>          ParameterValue
+  /// CHECK:       <<Xor:i\d+>>         Xor [<<P1>>,<<P2>>]
+  /// CHECK:                            Return [<<Xor>>]
+
+  /// CHECK-START: int Main.$opt$noinline$notXorToXor(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:                        Not
+
+  public static int $opt$noinline$notXorToXor(int a, int b) {
+    if (doThrow) throw new Error();
+    return ~a ^ ~b;
+  }
+
+  /**
+   * Test transformation of Not/Not/Xor into Xor for boolean negations.
+   * Note that the graph before this instruction simplification pass does not
+   * contain `HBooleanNot` instructions. This is because this transformation
+   * follows the optimization of `HSelect` to `HBooleanNot` occurring in the
+   * same pass.
+   */
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanNotXorToXor(boolean, boolean) instruction_simplifier_after_bce (before)
+  /// CHECK:       <<P1:z\d+>>          ParameterValue
+  /// CHECK:       <<P2:z\d+>>          ParameterValue
+  /// CHECK-DAG:   <<Const0:i\d+>>      IntConstant 0
+  /// CHECK-DAG:   <<Const1:i\d+>>      IntConstant 1
+  /// CHECK:       <<Select1:i\d+>>     Select [<<Const1>>,<<Const0>>,<<P1>>]
+  /// CHECK:       <<Select2:i\d+>>     Select [<<Const1>>,<<Const0>>,<<P2>>]
+  /// CHECK:       <<Xor:i\d+>>         Xor [<<Select2>>,<<Select1>>]
+  /// CHECK:                            Return [<<Xor>>]
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanNotXorToXor(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK:       <<Cond1:z\d+>>       ParameterValue
+  /// CHECK:       <<Cond2:z\d+>>       ParameterValue
+  /// CHECK:       <<Xor:i\d+>>         Xor [<<Cond2>>,<<Cond1>>]
+  /// CHECK:                            Return [<<Xor>>]
+
+  /// CHECK-START: boolean Main.$opt$noinline$booleanNotXorToXor(boolean, boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-NOT:                        BooleanNot
+
+  public static boolean $opt$noinline$booleanNotXorToXor(boolean a, boolean b) {
+    if (doThrow) throw new Error();
+    return !a ^ !b;
+  }
+
+  /**
+   * Check that no transformation is done when one Not has multiple uses.
+   */
+
+  /// CHECK-START: int Main.$opt$noinline$notMultipleUses(int, int) instruction_simplifier (before)
+  /// CHECK:       <<P1:i\d+>>          ParameterValue
+  /// CHECK:       <<P2:i\d+>>          ParameterValue
+  /// CHECK:       <<CstM1:i\d+>>       IntConstant -1
+  /// CHECK:       <<One:i\d+>>         IntConstant 1
+  /// CHECK:       <<Not2:i\d+>>        Xor [<<P2>>,<<CstM1>>]
+  /// CHECK:       <<And2:i\d+>>        And [<<Not2>>,<<One>>]
+  /// CHECK:       <<Not1:i\d+>>        Xor [<<P1>>,<<CstM1>>]
+  /// CHECK:       <<And1:i\d+>>        And [<<Not1>>,<<Not2>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<And2>>,<<And1>>]
+  /// CHECK:                            Return [<<Add>>]
+
+  /// CHECK-START: int Main.$opt$noinline$notMultipleUses(int, int) instruction_simplifier (after)
+  /// CHECK:       <<P1:i\d+>>          ParameterValue
+  /// CHECK:       <<P2:i\d+>>          ParameterValue
+  /// CHECK:       <<One:i\d+>>         IntConstant 1
+  /// CHECK:       <<Not2:i\d+>>        Not [<<P2>>]
+  /// CHECK:       <<And2:i\d+>>        And [<<Not2>>,<<One>>]
+  /// CHECK:       <<Not1:i\d+>>        Not [<<P1>>]
+  /// CHECK:       <<And1:i\d+>>        And [<<Not1>>,<<Not2>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<And2>>,<<And1>>]
+  /// CHECK:                            Return [<<Add>>]
+
+  /// CHECK-START: int Main.$opt$noinline$notMultipleUses(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:                        Or
+
+  public static int $opt$noinline$notMultipleUses(int a, int b) {
+    if (doThrow) throw new Error();
+    int tmp = ~b;
+    return (tmp & 0x1) + (~a & tmp);
+  }
+
+  public static void main(String[] args) {
+    assertIntEquals(~0xff, $opt$noinline$andToOr(0xf, 0xff));
+    assertLongEquals(~0xf, $opt$noinline$orToAnd(0xf, 0xff));
+    assertIntEquals(0xf0, $opt$noinline$notXorToXor(0xf, 0xff));
+    assertIntEquals(~0xff, $opt$noinline$notMultipleUses(0xf, 0xff));
+  }
+}
diff --git a/test/566-checker-codegen-select/src/Main.java b/test/566-checker-codegen-select/src/Main.java
index edb31e6..3a1b3fc 100644
--- a/test/566-checker-codegen-select/src/Main.java
+++ b/test/566-checker-codegen-select/src/Main.java
@@ -45,6 +45,13 @@
   /// CHECK:             LessThanOrEqual
   /// CHECK-NEXT:        Select
 
+  // Check that we generate CMOV for long on x86_64.
+  /// CHECK-START-X86_64: long Main.$noinline$longSelect_Constant(long) disassembly (after)
+  /// CHECK:             LessThanOrEqual
+  /// CHECK-NEXT:        Select
+  /// CHECK:             cmpq
+  /// CHECK:             cmovle/ngq
+
   public long $noinline$longSelect_Constant(long param) {
     if (doThrow) { throw new Error(); }
     long val_true = longB;
@@ -52,12 +59,34 @@
     return (param > 3L) ? val_true : val_false;
   }
 
+  // Check that we generate CMOV for int on x86_64.
+  /// CHECK-START-X86_64: int Main.$noinline$intSelect_Constant(int) disassembly (after)
+  /// CHECK:             LessThan
+  /// CHECK-NEXT:        Select
+  /// CHECK:             cmp
+  /// CHECK:             cmovl/nge
+
+  public int $noinline$intSelect_Constant(int param) {
+    if (doThrow) { throw new Error(); }
+    int val_true = intB;
+    int val_false = intC;
+    return (param >= 3) ? val_true : val_false;
+  }
+
   public static void main(String[] args) {
     Main m = new Main();
     assertLongEquals(5L, m.$noinline$longSelect(4L));
     assertLongEquals(7L, m.$noinline$longSelect(2L));
     assertLongEquals(5L, m.$noinline$longSelect_Constant(4L));
     assertLongEquals(7L, m.$noinline$longSelect_Constant(2L));
+    assertIntEquals(5, m.$noinline$intSelect_Constant(4));
+    assertIntEquals(7, m.$noinline$intSelect_Constant(2));
+  }
+
+  public static void assertIntEquals(int expected, int actual) {
+    if (expected != actual) {
+      throw new Error(expected + " != " + actual);
+    }
   }
 
   public static void assertLongEquals(long expected, long actual) {
@@ -71,4 +100,6 @@
   public long longA = 3L;
   public long longB = 5L;
   public long longC = 7L;
+  public int intB = 5;
+  public int intC = 7;
 }
diff --git a/test/566-checker-signum/src/Main.java b/test/566-checker-signum/src/Main.java
index cc4a984..0ad0042 100644
--- a/test/566-checker-signum/src/Main.java
+++ b/test/566-checker-signum/src/Main.java
@@ -54,6 +54,13 @@
     expectEquals(1, sign64(12345L));
     expectEquals(1, sign64(Long.MAX_VALUE));
 
+    expectEquals(-1, sign64(0x800000007FFFFFFFL));
+    expectEquals(-1, sign64(0x80000000FFFFFFFFL));
+    expectEquals(1, sign64(0x000000007FFFFFFFL));
+    expectEquals(1, sign64(0x00000000FFFFFFFFL));
+    expectEquals(1, sign64(0x7FFFFFFF7FFFFFFFL));
+    expectEquals(1, sign64(0x7FFFFFFFFFFFFFFFL));
+
     for (long i = -11L; i <= 11L; i++) {
       int expected = 0;
       if (i < 0) expected = -1;
@@ -61,6 +68,14 @@
       expectEquals(expected, sign64(i));
     }
 
+    for (long i = Long.MIN_VALUE; i <= Long.MIN_VALUE + 11L; i++) {
+      expectEquals(-1, sign64(i));
+    }
+
+    for (long i = Long.MAX_VALUE; i >= Long.MAX_VALUE - 11L; i--) {
+      expectEquals(1, sign64(i));
+    }
+
     System.out.println("passed");
   }
 
diff --git a/test/567-checker-compare/src/Main.java b/test/567-checker-compare/src/Main.java
index 52abb75..951d2c7 100644
--- a/test/567-checker-compare/src/Main.java
+++ b/test/567-checker-compare/src/Main.java
@@ -88,6 +88,10 @@
     expectEquals(1, compare64(Long.MAX_VALUE, 1L));
     expectEquals(1, compare64(Long.MAX_VALUE, Long.MAX_VALUE - 1L));
 
+    expectEquals(-1, compare64(0x111111117FFFFFFFL, 0x11111111FFFFFFFFL));
+    expectEquals(0, compare64(0x111111117FFFFFFFL, 0x111111117FFFFFFFL));
+    expectEquals(1, compare64(0x11111111FFFFFFFFL, 0x111111117FFFFFFFL));
+
     for (long i = -11L; i <= 11L; i++) {
       for (long j = -11L; j <= 11L; j++) {
         int expected = 0;
@@ -97,6 +101,14 @@
       }
     }
 
+    for (long i = Long.MIN_VALUE; i <= Long.MIN_VALUE + 11L; i++) {
+      expectEquals(-1, compare64(i, 0));
+    }
+
+    for (long i = Long.MAX_VALUE; i >= Long.MAX_VALUE - 11L; i--) {
+      expectEquals(1, compare64(i, 0));
+    }
+
     System.out.println("passed");
   }
 
diff --git a/test/569-checker-pattern-replacement/src/Main.java b/test/569-checker-pattern-replacement/src/Main.java
index 9a85c81..e2d451c 100644
--- a/test/569-checker-pattern-replacement/src/Main.java
+++ b/test/569-checker-pattern-replacement/src/Main.java
@@ -39,7 +39,8 @@
     /// CHECK-DAG:  <<Value:l\d+>>      ParameterValue
     /// CHECK-DAG:  <<Ignored:i\d+>>    IntConstant 77
     /// CHECK-DAG:  <<ClinitCk:l\d+>>   ClinitCheck
-    /// CHECK-DAG:  <<Invoke:l\d+>>     InvokeStaticOrDirect [<<Ignored>>,<<Value>>,<<ClinitCk>>]
+    // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+    /// CHECK-DAG:  <<Invoke:l\d+>>     InvokeStaticOrDirect [<<Ignored>>,<<Value>>{{(,[ij]\d+)?}},<<ClinitCk>>]
     /// CHECK-DAG:                      Return [<<Invoke>>]
 
     /// CHECK-START: java.lang.Object Main.staticReturnArg2(java.lang.String) inliner (after)
@@ -313,7 +314,8 @@
 
     /// CHECK-START: java.lang.Object Main.newObject() inliner (before)
     /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
-    /// CHECK-DAG:              InvokeStaticOrDirect [<<Obj>>] method_name:java.lang.Object.<init>
+    // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+    /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>{{(,[ij]\d+)?}}] method_name:java.lang.Object.<init>
 
     /// CHECK-START: java.lang.Object Main.newObject() inliner (after)
     /// CHECK-NOT:                      InvokeStaticOrDirect
diff --git a/test/570-checker-osr/expected.txt b/test/570-checker-osr/expected.txt
new file mode 100644
index 0000000..555c6a9
--- /dev/null
+++ b/test/570-checker-osr/expected.txt
@@ -0,0 +1,5 @@
+JNI_OnLoad called
+100000000
+200000000
+300000000
+400000000
diff --git a/test/570-checker-osr/info.txt b/test/570-checker-osr/info.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/570-checker-osr/info.txt
diff --git a/test/570-checker-osr/osr.cc b/test/570-checker-osr/osr.cc
new file mode 100644
index 0000000..fb84687
--- /dev/null
+++ b/test/570-checker-osr/osr.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "art_method.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
+#include "oat_quick_method_header.h"
+#include "scoped_thread_state_change.h"
+#include "stack_map.h"
+
+namespace art {
+
+class OsrVisitor : public StackVisitor {
+ public:
+  explicit OsrVisitor(Thread* thread)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        in_osr_method_(false) {}
+
+  bool VisitFrame() SHARED_REQUIRES(Locks::mutator_lock_) {
+    ArtMethod* m = GetMethod();
+    std::string m_name(m->GetName());
+
+    if ((m_name.compare("$noinline$returnInt") == 0) ||
+        (m_name.compare("$noinline$returnFloat") == 0) ||
+        (m_name.compare("$noinline$returnDouble") == 0) ||
+        (m_name.compare("$noinline$returnLong") == 0) ||
+        (m_name.compare("$noinline$deopt") == 0)) {
+      const OatQuickMethodHeader* header =
+          Runtime::Current()->GetJit()->GetCodeCache()->LookupOsrMethodHeader(m);
+      if (header != nullptr && header == GetCurrentOatQuickMethodHeader()) {
+        in_osr_method_ = true;
+      }
+      return false;
+    }
+    return true;
+  }
+
+  bool in_osr_method_;
+};
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_ensureInOsrCode(JNIEnv*, jclass) {
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit == nullptr) {
+    // Just return true for non-jit configurations to stop the infinite loop.
+    return JNI_TRUE;
+  }
+  ScopedObjectAccess soa(Thread::Current());
+  OsrVisitor visitor(soa.Self());
+  visitor.WalkStack();
+  return visitor.in_osr_method_;
+}
+
+}  // namespace art
diff --git a/test/570-checker-osr/smali/Osr.smali b/test/570-checker-osr/smali/Osr.smali
new file mode 100644
index 0000000..869c7c3
--- /dev/null
+++ b/test/570-checker-osr/smali/Osr.smali
@@ -0,0 +1,35 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LOsr;
+
+.super Ljava/lang/Object;
+
+# Check that blocks only havig nops are not merged when they are loop headers.
+# This ensures we can do on-stack replacement for branches to those nop blocks.
+
+## CHECK-START: int Osr.simpleLoop(int, int) dead_code_elimination_final (after)
+## CHECK-DAG:                     SuspendCheck loop:<<OuterLoop:B\d+>> outer_loop:none
+## CHECK-DAG:                     SuspendCheck loop:{{B\d+}} outer_loop:<<OuterLoop>>
+.method public static simpleLoop(II)I
+   .registers 3
+   const/16 v0, 0
+   :nop_entry
+   nop
+   :loop_entry
+   add-int v0, v0, v0
+   if-eq v0, v1, :loop_entry
+   if-eq v0, v2, :nop_entry
+   return v0
+.end method
diff --git a/test/570-checker-osr/src/DeoptimizationController.java b/test/570-checker-osr/src/DeoptimizationController.java
new file mode 100644
index 0000000..907d133
--- /dev/null
+++ b/test/570-checker-osr/src/DeoptimizationController.java
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file is a copy of 802-deoptimization/src/DeoptimizationController.java
+// because run-test requires standalone individual test.
+
+import java.io.File;
+import java.io.IOException;
+import java.lang.reflect.Method;
+
+/**
+ * Controls deoptimization using dalvik.system.VMDebug class.
+ */
+public class DeoptimizationController {
+  private static final String TEMP_FILE_NAME_PREFIX = "test";
+  private static final String TEMP_FILE_NAME_SUFFIX = ".trace";
+
+  private static File createTempFile() throws Exception {
+    try {
+      return  File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+    } catch (IOException e) {
+      System.setProperty("java.io.tmpdir", "/data/local/tmp");
+      try {
+        return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+      } catch (IOException e2) {
+        System.setProperty("java.io.tmpdir", "/sdcard");
+        return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+      }
+    }
+  }
+
+  public static void startDeoptimization() {
+    File tempFile = null;
+    try {
+      tempFile = createTempFile();
+      String tempFileName = tempFile.getPath();
+
+      VMDebug.startMethodTracing(tempFileName, 0, 0, false, 1000);
+      if (VMDebug.getMethodTracingMode() == 0) {
+        throw new IllegalStateException("Not tracing.");
+      }
+    } catch (Exception exc) {
+      exc.printStackTrace(System.err);
+    } finally {
+      if (tempFile != null) {
+        tempFile.delete();
+      }
+    }
+  }
+
+  public static void stopDeoptimization() {
+    try {
+      VMDebug.stopMethodTracing();
+      if (VMDebug.getMethodTracingMode() != 0) {
+        throw new IllegalStateException("Still tracing.");
+      }
+    } catch (Exception exc) {
+      exc.printStackTrace(System.err);
+    }
+  }
+
+  private static class VMDebug {
+    private static final Method startMethodTracingMethod;
+    private static final Method stopMethodTracingMethod;
+    private static final Method getMethodTracingModeMethod;
+
+    static {
+      try {
+        Class<?> c = Class.forName("dalvik.system.VMDebug");
+        startMethodTracingMethod = c.getDeclaredMethod("startMethodTracing", String.class,
+            Integer.TYPE, Integer.TYPE, Boolean.TYPE, Integer.TYPE);
+        stopMethodTracingMethod = c.getDeclaredMethod("stopMethodTracing");
+        getMethodTracingModeMethod = c.getDeclaredMethod("getMethodTracingMode");
+      } catch (Exception e) {
+        throw new RuntimeException(e);
+      }
+    }
+
+    public static void startMethodTracing(String filename, int bufferSize, int flags,
+        boolean samplingEnabled, int intervalUs) throws Exception {
+      startMethodTracingMethod.invoke(null, filename, bufferSize, flags, samplingEnabled,
+          intervalUs);
+    }
+    public static void stopMethodTracing() throws Exception {
+      stopMethodTracingMethod.invoke(null);
+    }
+    public static int getMethodTracingMode() throws Exception {
+      return (int) getMethodTracingModeMethod.invoke(null);
+    }
+  }
+}
diff --git a/test/570-checker-osr/src/Main.java b/test/570-checker-osr/src/Main.java
new file mode 100644
index 0000000..7485163
--- /dev/null
+++ b/test/570-checker-osr/src/Main.java
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    System.loadLibrary(args[0]);
+    if ($noinline$returnInt() != 53) {
+      throw new Error("Unexpected return value");
+    }
+    if ($noinline$returnFloat() != 42.2f) {
+      throw new Error("Unexpected return value");
+    }
+    if ($noinline$returnDouble() != Double.longBitsToDouble(0xF000000000001111L)) {
+      throw new Error("Unexpected return value ");
+    }
+    if ($noinline$returnLong() != 0xFFFF000000001111L) {
+      throw new Error("Unexpected return value");
+    }
+
+    try {
+      $noinline$deopt();
+    } catch (Exception e) {}
+  }
+
+  public static int $noinline$returnInt() {
+    if (doThrow) throw new Error("");
+    int i = 0;
+    for (; i < 100000000; ++i) {
+    }
+    while (!ensureInOsrCode()) {}
+    System.out.println(i);
+    return 53;
+  }
+
+  public static float $noinline$returnFloat() {
+    if (doThrow) throw new Error("");
+    int i = 0;
+    for (; i < 200000000; ++i) {
+    }
+    while (!ensureInOsrCode()) {}
+    System.out.println(i);
+    return 42.2f;
+  }
+
+  public static double $noinline$returnDouble() {
+    if (doThrow) throw new Error("");
+    int i = 0;
+    for (; i < 300000000; ++i) {
+    }
+    while (!ensureInOsrCode()) {}
+    System.out.println(i);
+    return Double.longBitsToDouble(0xF000000000001111L);
+  }
+
+  public static long $noinline$returnLong() {
+    if (doThrow) throw new Error("");
+    int i = 1000000;
+    for (; i < 400000000; ++i) {
+    }
+    while (!ensureInOsrCode()) {}
+    System.out.println(i);
+    return 0xFFFF000000001111L;
+  }
+
+  public static void $noinline$deopt() {
+    if (doThrow) throw new Error("");
+    int i = 0;
+    for (; i < 100000000; ++i) {
+    }
+    while (!ensureInOsrCode()) {}
+    DeoptimizationController.startDeoptimization();
+  }
+
+  public static int[] array = new int[4];
+
+  public static native boolean ensureInOsrCode();
+
+  public static boolean doThrow = false;
+}
diff --git a/test/570-checker-select/expected.txt b/test/570-checker-select/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/570-checker-select/expected.txt
diff --git a/test/570-checker-select/info.txt b/test/570-checker-select/info.txt
new file mode 100644
index 0000000..6d49532
--- /dev/null
+++ b/test/570-checker-select/info.txt
@@ -0,0 +1 @@
+Tests for HSelect codegens.
diff --git a/test/570-checker-select/src/Main.java b/test/570-checker-select/src/Main.java
new file mode 100644
index 0000000..ec60240
--- /dev/null
+++ b/test/570-checker-select/src/Main.java
@@ -0,0 +1,325 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: int Main.BoolCond_IntVarVar(boolean, int, int) register (after)
+  /// CHECK:               Select [{{i\d+}},{{i\d+}},{{z\d+}}]
+
+  /// CHECK-START-X86_64: int Main.BoolCond_IntVarVar(boolean, int, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+
+  public static int BoolCond_IntVarVar(boolean cond, int x, int y) {
+    return cond ? x : y;
+  }
+
+  /// CHECK-START: int Main.BoolCond_IntVarCst(boolean, int) register (after)
+  /// CHECK:               Select [{{i\d+}},{{i\d+}},{{z\d+}}]
+
+  /// CHECK-START-X86_64: int Main.BoolCond_IntVarCst(boolean, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+
+  public static int BoolCond_IntVarCst(boolean cond, int x) {
+    return cond ? x : 1;
+  }
+
+  /// CHECK-START: int Main.BoolCond_IntCstVar(boolean, int) register (after)
+  /// CHECK:               Select [{{i\d+}},{{i\d+}},{{z\d+}}]
+
+  /// CHECK-START-X86_64: int Main.BoolCond_IntCstVar(boolean, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+
+  public static int BoolCond_IntCstVar(boolean cond, int y) {
+    return cond ? 1 : y;
+  }
+
+  /// CHECK-START: long Main.BoolCond_LongVarVar(boolean, long, long) register (after)
+  /// CHECK:               Select [{{j\d+}},{{j\d+}},{{z\d+}}]
+
+  /// CHECK-START-X86_64: long Main.BoolCond_LongVarVar(boolean, long, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/neq
+
+  public static long BoolCond_LongVarVar(boolean cond, long x, long y) {
+    return cond ? x : y;
+  }
+
+  /// CHECK-START: long Main.BoolCond_LongVarCst(boolean, long) register (after)
+  /// CHECK:               Select [{{j\d+}},{{j\d+}},{{z\d+}}]
+
+  /// CHECK-START-X86_64: long Main.BoolCond_LongVarCst(boolean, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/neq
+
+  public static long BoolCond_LongVarCst(boolean cond, long x) {
+    return cond ? x : 1L;
+  }
+
+  /// CHECK-START: long Main.BoolCond_LongCstVar(boolean, long) register (after)
+  /// CHECK:               Select [{{j\d+}},{{j\d+}},{{z\d+}}]
+
+  /// CHECK-START-X86_64: long Main.BoolCond_LongCstVar(boolean, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/neq
+
+  public static long BoolCond_LongCstVar(boolean cond, long y) {
+    return cond ? 1L : y;
+  }
+
+  /// CHECK-START: float Main.BoolCond_FloatVarVar(boolean, float, float) register (after)
+  /// CHECK:               Select [{{f\d+}},{{f\d+}},{{z\d+}}]
+
+  public static float BoolCond_FloatVarVar(boolean cond, float x, float y) {
+    return cond ? x : y;
+  }
+
+  /// CHECK-START: float Main.BoolCond_FloatVarCst(boolean, float) register (after)
+  /// CHECK:               Select [{{f\d+}},{{f\d+}},{{z\d+}}]
+
+  public static float BoolCond_FloatVarCst(boolean cond, float x) {
+    return cond ? x : 1.0f;
+  }
+
+  /// CHECK-START: float Main.BoolCond_FloatCstVar(boolean, float) register (after)
+  /// CHECK:               Select [{{f\d+}},{{f\d+}},{{z\d+}}]
+
+  public static float BoolCond_FloatCstVar(boolean cond, float y) {
+    return cond ? 1.0f : y;
+  }
+
+  /// CHECK-START: int Main.IntNonmatCond_IntVarVar(int, int, int, int) register (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK-NEXT:                     Select [{{i\d+}},{{i\d+}},<<Cond>>]
+
+  /// CHECK-START-X86_64: int Main.IntNonmatCond_IntVarVar(int, int, int, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK-NEXT:                     Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovle/ng
+
+  public static int IntNonmatCond_IntVarVar(int a, int b, int x, int y) {
+    return a > b ? x : y;
+  }
+
+  /// CHECK-START: int Main.IntMatCond_IntVarVar(int, int, int, int) register (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK-NEXT:       <<Sel:i\d+>>  Select [{{i\d+}},{{i\d+}},{{z\d+}}]
+  /// CHECK-NEXT:                     Add [<<Cond>>,<<Sel>>]
+
+  /// CHECK-START-X86_64: int Main.IntMatCond_IntVarVar(int, int, int, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovle/ng
+
+  public static int IntMatCond_IntVarVar(int a, int b, int x, int y) {
+    int result = (a > b ? x : y);
+    return result + (a > b ? 0 : 1);
+  }
+
+  /// CHECK-START: long Main.IntNonmatCond_LongVarVar(int, int, long, long) register (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK-NEXT:                     Select [{{j\d+}},{{j\d+}},<<Cond>>]
+
+  /// CHECK-START-X86_64: long Main.IntNonmatCond_LongVarVar(int, int, long, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK-NEXT:                     Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovle/ngq
+
+  public static long IntNonmatCond_LongVarVar(int a, int b, long x, long y) {
+    return a > b ? x : y;
+  }
+
+  /// CHECK-START: long Main.IntMatCond_LongVarVar(int, int, long, long) register (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK:            <<Sel1:j\d+>> Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:            <<Sel2:j\d+>> Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          Add [<<Sel2>>,<<Sel1>>]
+
+  /// CHECK-START-X86_64: long Main.IntMatCond_LongVarVar(int, int, long, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovle/ngq
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/neq
+
+  public static long IntMatCond_LongVarVar(int a, int b, long x, long y) {
+    long result = (a > b ? x : y);
+    return result + (a > b ? 0L : 1L);
+  }
+
+  /// CHECK-START: long Main.LongNonmatCond_LongVarVar(long, long, long, long) register (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{j\d+}},{{j\d+}}]
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+
+  /// CHECK-START-X86_64: long Main.LongNonmatCond_LongVarVar(long, long, long, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{j\d+}},{{j\d+}}]
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovle/ngq
+
+  public static long LongNonmatCond_LongVarVar(long a, long b, long x, long y) {
+    return a > b ? x : y;
+  }
+
+  /// CHECK-START: long Main.LongMatCond_LongVarVar(long, long, long, long) register (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{j\d+}},{{j\d+}}]
+  /// CHECK:            <<Sel1:j\d+>> Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:            <<Sel2:j\d+>> Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          Add [<<Sel2>>,<<Sel1>>]
+
+  /// CHECK-START-X86_64: long Main.LongMatCond_LongVarVar(long, long, long, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{j\d+}},{{j\d+}}]
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovle/ngq
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/neq
+
+  public static long LongMatCond_LongVarVar(long a, long b, long x, long y) {
+    long result = (a > b ? x : y);
+    return result + (a > b ? 0L : 1L);
+  }
+
+  /// CHECK-START: int Main.FloatLtNonmatCond_IntVarVar(float, float, int, int) register (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{f\d+}},{{f\d+}}]
+  /// CHECK-NEXT:                     Select [{{i\d+}},{{i\d+}},<<Cond>>]
+
+  public static int FloatLtNonmatCond_IntVarVar(float a, float b, int x, int y) {
+    return a > b ? x : y;
+  }
+
+  /// CHECK-START: int Main.FloatGtNonmatCond_IntVarVar(float, float, int, int) register (after)
+  /// CHECK:            <<Cond:z\d+>> GreaterThanOrEqual [{{f\d+}},{{f\d+}}]
+  /// CHECK-NEXT:                     Select [{{i\d+}},{{i\d+}},<<Cond>>]
+
+  public static int FloatGtNonmatCond_IntVarVar(float a, float b, int x, int y) {
+    return a < b ? x : y;
+  }
+
+  /// CHECK-START: float Main.FloatGtNonmatCond_FloatVarVar(float, float, float, float) register (after)
+  /// CHECK:            <<Cond:z\d+>> GreaterThanOrEqual [{{f\d+}},{{f\d+}}]
+  /// CHECK-NEXT:                     Select [{{f\d+}},{{f\d+}},<<Cond>>]
+
+  public static float FloatGtNonmatCond_FloatVarVar(float a, float b, float x, float y) {
+    return a < b ? x : y;
+  }
+
+  /// CHECK-START: int Main.FloatLtMatCond_IntVarVar(float, float, int, int) register (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{f\d+}},{{f\d+}}]
+  /// CHECK-NEXT:       <<Sel:i\d+>>  Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK-NEXT:                     Add [<<Cond>>,<<Sel>>]
+
+  public static int FloatLtMatCond_IntVarVar(float a, float b, int x, int y) {
+    int result = (a > b ? x : y);
+    return result + (a > b ? 0 : 1);
+  }
+
+  /// CHECK-START: int Main.FloatGtMatCond_IntVarVar(float, float, int, int) register (after)
+  /// CHECK:            <<Cond:z\d+>> GreaterThanOrEqual [{{f\d+}},{{f\d+}}]
+  /// CHECK-NEXT:       <<Sel:i\d+>>  Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK-NEXT:                     Add [<<Cond>>,<<Sel>>]
+
+  public static int FloatGtMatCond_IntVarVar(float a, float b, int x, int y) {
+    int result = (a < b ? x : y);
+    return result + (a < b ? 0 : 1);
+  }
+
+  /// CHECK-START: float Main.FloatGtMatCond_FloatVarVar(float, float, float, float) register (after)
+  /// CHECK:            <<Cond:z\d+>> GreaterThanOrEqual
+  /// CHECK-NEXT:       <<Sel:f\d+>>  Select [{{f\d+}},{{f\d+}},<<Cond>>]
+  /// CHECK-NEXT:                     TypeConversion [<<Cond>>]
+
+  public static float FloatGtMatCond_FloatVarVar(float a, float b, float x, float y) {
+    float result = (a < b ? x : y);
+    return result + (a < b ? 0 : 1);
+  }
+
+  public static void assertEqual(int expected, int actual) {
+    if (expected != actual) {
+      throw new Error("Assertion failed: " + expected + " != " + actual);
+    }
+  }
+
+  public static void assertEqual(float expected, float actual) {
+    if (expected != actual) {
+      throw new Error("Assertion failed: " + expected + " != " + actual);
+    }
+  }
+
+  public static void main(String[] args) {
+    assertEqual(5, BoolCond_IntVarVar(true, 5, 7));
+    assertEqual(7, BoolCond_IntVarVar(false, 5, 7));
+    assertEqual(5, BoolCond_IntVarCst(true, 5));
+    assertEqual(1, BoolCond_IntVarCst(false, 5));
+    assertEqual(1, BoolCond_IntCstVar(true, 7));
+    assertEqual(7, BoolCond_IntCstVar(false, 7));
+
+    assertEqual(5L, BoolCond_LongVarVar(true, 5L, 7L));
+    assertEqual(7L, BoolCond_LongVarVar(false, 5L, 7L));
+    assertEqual(5L, BoolCond_LongVarCst(true, 5L));
+    assertEqual(1L, BoolCond_LongVarCst(false, 5L));
+    assertEqual(1L, BoolCond_LongCstVar(true, 7L));
+    assertEqual(7L, BoolCond_LongCstVar(false, 7L));
+
+    assertEqual(5, BoolCond_FloatVarVar(true, 5, 7));
+    assertEqual(7, BoolCond_FloatVarVar(false, 5, 7));
+    assertEqual(5, BoolCond_FloatVarCst(true, 5));
+    assertEqual(1, BoolCond_FloatVarCst(false, 5));
+    assertEqual(1, BoolCond_FloatCstVar(true, 7));
+    assertEqual(7, BoolCond_FloatCstVar(false, 7));
+
+    assertEqual(5, IntNonmatCond_IntVarVar(3, 2, 5, 7));
+    assertEqual(7, IntNonmatCond_IntVarVar(2, 3, 5, 7));
+    assertEqual(5, IntMatCond_IntVarVar(3, 2, 5, 7));
+    assertEqual(8, IntMatCond_IntVarVar(2, 3, 5, 7));
+
+    assertEqual(5, FloatLtNonmatCond_IntVarVar(3, 2, 5, 7));
+    assertEqual(7, FloatLtNonmatCond_IntVarVar(2, 3, 5, 7));
+    assertEqual(7, FloatLtNonmatCond_IntVarVar(Float.NaN, 2, 5, 7));
+    assertEqual(7, FloatLtNonmatCond_IntVarVar(2, Float.NaN, 5, 7));
+
+    assertEqual(5, FloatGtNonmatCond_IntVarVar(2, 3, 5, 7));
+    assertEqual(7, FloatGtNonmatCond_IntVarVar(3, 2, 5, 7));
+    assertEqual(7, FloatGtNonmatCond_IntVarVar(Float.NaN, 2, 5, 7));
+    assertEqual(7, FloatGtNonmatCond_IntVarVar(2, Float.NaN, 5, 7));
+
+    assertEqual(5, FloatGtNonmatCond_FloatVarVar(2, 3, 5, 7));
+    assertEqual(7, FloatGtNonmatCond_FloatVarVar(3, 2, 5, 7));
+    assertEqual(7, FloatGtNonmatCond_FloatVarVar(Float.NaN, 2, 5, 7));
+    assertEqual(7, FloatGtNonmatCond_FloatVarVar(2, Float.NaN, 5, 7));
+
+    assertEqual(5, FloatLtMatCond_IntVarVar(3, 2, 5, 7));
+    assertEqual(8, FloatLtMatCond_IntVarVar(2, 3, 5, 7));
+    assertEqual(8, FloatLtMatCond_IntVarVar(Float.NaN, 2, 5, 7));
+    assertEqual(8, FloatLtMatCond_IntVarVar(2, Float.NaN, 5, 7));
+
+    assertEqual(5, FloatGtMatCond_IntVarVar(2, 3, 5, 7));
+    assertEqual(8, FloatGtMatCond_IntVarVar(3, 2, 5, 7));
+    assertEqual(8, FloatGtMatCond_IntVarVar(Float.NaN, 2, 5, 7));
+    assertEqual(8, FloatGtMatCond_IntVarVar(2, Float.NaN, 5, 7));
+
+    assertEqual(5, FloatGtMatCond_FloatVarVar(2, 3, 5, 7));
+    assertEqual(8, FloatGtMatCond_FloatVarVar(3, 2, 5, 7));
+    assertEqual(8, FloatGtMatCond_FloatVarVar(Float.NaN, 2, 5, 7));
+    assertEqual(8, FloatGtMatCond_FloatVarVar(2, Float.NaN, 5, 7));
+  }
+}
diff --git a/test/571-irreducible-loop/expected.txt b/test/571-irreducible-loop/expected.txt
new file mode 100644
index 0000000..3a71184
--- /dev/null
+++ b/test/571-irreducible-loop/expected.txt
@@ -0,0 +1 @@
+5.9E-44
diff --git a/test/571-irreducible-loop/info.txt b/test/571-irreducible-loop/info.txt
new file mode 100644
index 0000000..1e0dd02
--- /dev/null
+++ b/test/571-irreducible-loop/info.txt
@@ -0,0 +1,2 @@
+Regression test for optimizing in the presence of
+an irreducible loop.
diff --git a/test/571-irreducible-loop/smali/IrreducibleLoop.smali b/test/571-irreducible-loop/smali/IrreducibleLoop.smali
new file mode 100644
index 0000000..737a18b
--- /dev/null
+++ b/test/571-irreducible-loop/smali/IrreducibleLoop.smali
@@ -0,0 +1,47 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LIrreducibleLoop;
+
+.super Ljava/lang/Object;
+
+# Check that on x86 we don't crash because irreducible loops
+# disabled the constant pool optimization.
+.method public static test1(IF)F
+   .registers 5
+   const/16 v0, 1
+   const/16 v1, 42
+
+   if-nez p0, :loop_entry
+   goto :other_loop_pre_entry
+
+   # The then part: beginning of the irreducible loop.
+   :loop_entry
+   if-eqz p0, :exit
+   add-float v2, p1, v1
+   sub-float v2, v2, v1
+   div-float v2, v2, v1
+   mul-float v2, v2, v1
+   :other_loop_entry
+   sub-int p0, p0, v0
+   goto :loop_entry
+
+   # The other block branching to the irreducible loop.
+   # In that block, v4 has no live range.
+   :other_loop_pre_entry
+   goto :other_loop_entry
+
+   :exit
+   return v1
+.end method
diff --git a/test/571-irreducible-loop/src/Main.java b/test/571-irreducible-loop/src/Main.java
new file mode 100644
index 0000000..ff22f67
--- /dev/null
+++ b/test/571-irreducible-loop/src/Main.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("IrreducibleLoop");
+    Method m = c.getMethod("test1", int.class, float.class);
+    Object[] arguments = { 42, 31.0f };
+    System.out.println(m.invoke(null, arguments));
+  }
+}
diff --git a/test/971-iface-super/util-src/generate_smali.py b/test/971-iface-super/util-src/generate_smali.py
index f01c904..3681411 100755
--- a/test/971-iface-super/util-src/generate_smali.py
+++ b/test/971-iface-super/util-src/generate_smali.py
@@ -39,7 +39,7 @@
 import string
 
 # The max depth the type tree can have.
-MAX_IFACE_DEPTH = 3
+MAX_IFACE_DEPTH = 2
 
 class MainClass(mixins.DumpMixin, mixins.Named, mixins.SmaliFileMixin):
   """
diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk
index faaf1f0..e547c72 100644
--- a/test/Android.libarttest.mk
+++ b/test/Android.libarttest.mk
@@ -40,7 +40,8 @@
   466-get-live-vreg/get_live_vreg_jni.cc \
   497-inlining-and-class-loader/clear_dex_cache.cc \
   543-env-long-ref/env_long_ref.cc \
-  566-polymorphic-inlining/polymorphic_inline.cc
+  566-polymorphic-inlining/polymorphic_inline.cc \
+  570-checker-osr/osr.cc
 
 ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttest.so
 ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttestd.so
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 7400248..a8938fa 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -302,18 +302,7 @@
 
 # Temporarily disable some broken tests when forcing access checks in interpreter b/22414682
 TEST_ART_BROKEN_INTERPRETER_ACCESS_CHECK_TESTS := \
-  004-JniTest \
-  005-annotations \
-  044-proxy \
-  073-mismatched-field \
-  088-monitor-verification \
-  135-MirandaDispatch \
-  137-cfi \
-  412-new-array \
-  471-uninitialized-locals \
-  506-verify-aput \
-  554-jit-profile-file \
-  800-smali
+  137-cfi
 
 ifneq (,$(filter interp-ac,$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -470,10 +459,7 @@
 
 # Known broken tests for the mips32 optimizing compiler backend.
 TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS := \
-    441-checker-inliner \
     510-checker-try-catch \
-    536-checker-intrinsic-optimization \
-    557-checker-instruction-simplifier-ror \
 
 ifeq (mips,$(TARGET_ARCH))
   ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
@@ -488,7 +474,6 @@
 
 # Known broken tests for the mips64 optimizing compiler backend.
 TEST_ART_BROKEN_OPTIMIZING_MIPS64_RUN_TESTS := \
-    557-checker-instruction-simplifier-ror \
 
 ifeq (mips64,$(TARGET_ARCH))
   ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
@@ -615,18 +600,6 @@
 TEST_ART_BROKEN_DEFAULT_HEAP_POISONING_RUN_TESTS :=
 TEST_ART_BROKEN_OPTIMIZING_HEAP_POISONING_RUN_TESTS :=
 
-# Tests broken by multi-image.
-TEST_ART_BROKEN_MULTI_IMAGE_RUN_TESTS := \
-  476-checker-ctor-memory-barrier \
-  530-checker-lse
-
-ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-    $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-    $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), \
-    $(TEST_ART_BROKEN_MULTI_IMAGE_RUN_TESTS),  $(ALL_ADDRESS_SIZES))
-
-TEST_ART_BROKEN_MULTI_IMAGE_RUN_TESTS :=
-
 # Clear variables ahead of appending to them when defining tests.
 $(foreach target, $(TARGET_TYPES), $(eval ART_RUN_TEST_$(call name-to-var,$(target))_RULES :=))
 $(foreach target, $(TARGET_TYPES), \
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index 6d67f84..44206df 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -171,7 +171,7 @@
   bug: 25437292
 },
 {
-  description: "Failing tests after enso move.",
+  description: "Failing tests after OpenJDK move.",
   result: EXEC_FAILED,
   bug: 26326992,
   names: ["libcore.icu.RelativeDateTimeFormatterTest#test_getRelativeDateTimeStringDST",
@@ -196,6 +196,12 @@
           "org.apache.harmony.tests.java.text.DecimalFormatSymbolsTest#test_setInternationalCurrencySymbolLjava_lang_String",
           "org.apache.harmony.tests.java.text.DecimalFormatTest#testSerializationHarmonyRICompatible",
           "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parseLjava_lang_StringLjava_text_ParsePosition",
+          "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parse_W_w_dd_MMMM_yyyy_EEEE",
+          "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parse_dayOfYearPatterns",
+          "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parse_h_m_z",
+          "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parse_h_z_2DigitOffsetFromGMT",
+          "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parse_h_z_4DigitOffsetFromGMT",
+          "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parse_h_z_4DigitOffsetNoGMT",
           "org.apache.harmony.tests.java.util.jar.JarFileTest#test_getInputStreamLjava_util_jar_JarEntry_subtest0",
           "libcore.java.util.CalendarTest#test_clear_45877",
           "org.apache.harmony.crypto.tests.javax.crypto.spec.SecretKeySpecTest#testGetFormat",
diff --git a/tools/libcore_failures_concurrent_collector.txt b/tools/libcore_failures_concurrent_collector.txt
index 95d1292..d8ef9ba 100644
--- a/tools/libcore_failures_concurrent_collector.txt
+++ b/tools/libcore_failures_concurrent_collector.txt
@@ -27,7 +27,8 @@
   description: "TimeoutException on host-{x86,x86-64}-concurrent-collector",
   result: EXEC_FAILED,
   modes: [host],
-  names: ["libcore.java.util.zip.DeflaterOutputStreamTest#testSyncFlushDisabled",
+  names: ["libcore.java.util.zip.DeflaterOutputStreamTest#testSyncFlushEnabled",
+          "libcore.java.util.zip.DeflaterOutputStreamTest#testSyncFlushDisabled",
           "libcore.java.util.zip.GZIPOutputStreamTest#testSyncFlushEnabled",
           "libcore.java.util.zip.OldAndroidGZIPStreamTest#testGZIPStream",
           "libcore.java.util.zip.OldAndroidZipStreamTest#testZipStream",
@@ -40,7 +41,8 @@
   result: EXEC_FAILED,
   modes: [device],
   names: ["libcore.icu.RelativeDateTimeFormatterTest#test_bug25821045",
-          "libcore.java.text.SimpleDateFormatTest#testLocales"],
+          "libcore.java.text.SimpleDateFormatTest#testLocales",
+          "libcore.java.util.zip.ZipFileTest#testZipFileWithLotsOfEntries"],
   bug: 26711853
 }
 ]