Merge "runtime: Cleanup comments for kAccSkipAccessChecks"

commit: bd707ab60fa33ec1ba921ee0c7d3b24d70f00a54 [log] [tgz]
author: Igor Murashkin <iam@google.com> Thu Feb 04 23:30:13 2016 +0000
committer: Gerrit Code Review <noreply-gerritcodereview@google.com> Thu Feb 04 23:30:13 2016 +0000
tree: 445bb30d2a2dfeede4653d10bbec8017e203cb5b
parent: a4e981265dd57adbe755e12a814c0f30ae073c2f [diff]
parent: 8d1da85822f254f102dfec2903ca7aa0064444af [diff]
diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc
index 372fe2b..4d6c058 100644
--- a/compiler/dex/quick/mips/utility_mips.cc
+++ b/compiler/dex/quick/mips/utility_mips.cc

@@ -28,6 +28,8 @@
 
 namespace art {
 
+static constexpr size_t kMips64DoublewordSize = 8;
+
 /* This file contains codegen for the Mips ISA */
 LIR* MipsMir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) {
   int opcode;
@@ -760,7 +762,25 @@
 
   if (cu_->target64) {
     if (short_form) {
-      load = res = NewLIR3(opcode, r_dest.GetReg(), displacement, r_base.GetReg());
+      if (!IsAligned<kMips64DoublewordSize>(displacement) && opcode == kMips64Ld) {
+        RegStorage r_tmp = AllocTemp();
+        load = res = NewLIR3(kMips64Lwu, r_dest.GetReg(), displacement + LOWORD_OFFSET,
+                             r_base.GetReg());
+        load2 = NewLIR3(kMips64Lwu, r_tmp.GetReg(), displacement + HIWORD_OFFSET, r_base.GetReg());
+        NewLIR3(kMips64Dsll32, r_tmp.GetReg(), r_tmp.GetReg(), 0x0);
+        NewLIR3(kMipsOr, r_dest.GetReg(), r_dest.GetReg(), r_tmp.GetReg());
+        FreeTemp(r_tmp);
+      } else if (!IsAligned<kMips64DoublewordSize>(displacement) && opcode == kMipsFldc1) {
+        RegStorage r_tmp = AllocTemp();
+        r_dest = Fp64ToSolo32(r_dest);
+        load = res = NewLIR3(kMipsFlwc1, r_dest.GetReg(), displacement + LOWORD_OFFSET,
+                             r_base.GetReg());
+        load2 = NewLIR3(kMipsLw, r_tmp.GetReg(), displacement + HIWORD_OFFSET, r_base.GetReg());
+        NewLIR2(kMipsMthc1, r_tmp.GetReg(), r_dest.GetReg());
+        FreeTemp(r_tmp);
+      } else {
+        load = res = NewLIR3(opcode, r_dest.GetReg(), displacement, r_base.GetReg());
+      }
     } else {
       RegStorage r_tmp = (r_base == r_dest) ? AllocTemp() : r_dest;
       res = OpRegRegImm(kOpAdd, r_tmp, r_base, displacement);
@@ -771,7 +791,12 @@
 
     if (mem_ref_type_ == ResourceMask::kDalvikReg) {
       DCHECK_EQ(r_base, TargetPtrReg(kSp));
-      AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit());
+      AnnotateDalvikRegAccess(load, (displacement + LOWORD_OFFSET) >> 2,
+                              true /* is_load */, r_dest.Is64Bit() /* is64bit */);
+      if (load2 != nullptr) {
+        AnnotateDalvikRegAccess(load2, (displacement + HIWORD_OFFSET) >> 2,
+                                true /* is_load */, r_dest.Is64Bit() /* is64bit */);
+      }
     }
     return res;
   }
@@ -932,7 +957,24 @@
 
   if (cu_->target64) {
     if (short_form) {
-      store = res = NewLIR3(opcode, r_src.GetReg(), displacement, r_base.GetReg());
+      if (!IsAligned<kMips64DoublewordSize>(displacement) && opcode == kMips64Sd) {
+        RegStorage r_tmp = AllocTemp();
+        res = NewLIR2(kMipsMove, r_tmp.GetReg(), r_src.GetReg());
+        store = NewLIR3(kMipsSw, r_tmp.GetReg(), displacement + LOWORD_OFFSET, r_base.GetReg());
+        NewLIR3(kMips64Dsrl32, r_tmp.GetReg(), r_tmp.GetReg(), 0x0);
+        store2 = NewLIR3(kMipsSw, r_tmp.GetReg(), displacement + HIWORD_OFFSET, r_base.GetReg());
+        FreeTemp(r_tmp);
+      } else if (!IsAligned<kMips64DoublewordSize>(displacement) && opcode == kMipsFsdc1) {
+        RegStorage r_tmp = AllocTemp();
+        r_src = Fp64ToSolo32(r_src);
+        store = res = NewLIR3(kMipsFswc1, r_src.GetReg(), displacement + LOWORD_OFFSET,
+                              r_base.GetReg());
+        NewLIR2(kMipsMfhc1, r_tmp.GetReg(), r_src.GetReg());
+        store2 = NewLIR3(kMipsSw, r_tmp.GetReg(), displacement + HIWORD_OFFSET, r_base.GetReg());
+        FreeTemp(r_tmp);
+      } else {
+        store = res = NewLIR3(opcode, r_src.GetReg(), displacement, r_base.GetReg());
+      }
     } else {
       RegStorage r_scratch = AllocTemp();
       res = OpRegRegImm(kOpAdd, r_scratch, r_base, displacement);
@@ -942,7 +984,12 @@
 
     if (mem_ref_type_ == ResourceMask::kDalvikReg) {
       DCHECK_EQ(r_base, TargetPtrReg(kSp));
-      AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit());
+      AnnotateDalvikRegAccess(store, (displacement + LOWORD_OFFSET) >> 2,
+                              false /* is_load */, r_src.Is64Bit() /* is64bit */);
+      if (store2 != nullptr) {
+        AnnotateDalvikRegAccess(store2, (displacement + HIWORD_OFFSET) >> 2,
+                                false /* is_load */, r_src.Is64Bit() /* is64bit */);
+      }
     }
     return res;
   }

diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 9ab7280..f078bf6 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc

@@ -921,7 +921,7 @@
       std::set<std::pair<uint16_t, const DexFile*>>& exceptions_to_resolve)
      : exceptions_to_resolve_(exceptions_to_resolve) {}
 
-  virtual bool Visit(mirror::Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+  virtual bool operator()(mirror::Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     const auto pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
     for (auto& m : c->GetMethods(pointer_size)) {
       ResolveExceptionsForMethod(&m, pointer_size);
@@ -975,7 +975,7 @@
   explicit RecordImageClassesVisitor(std::unordered_set<std::string>* image_classes)
       : image_classes_(image_classes) {}
 
-  bool Visit(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool operator()(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     std::string temp;
     image_classes_->insert(klass->GetDescriptor(&temp));
     return true;
@@ -1142,7 +1142,7 @@
    public:
     explicit FindImageClassesVisitor(ClinitImageUpdate* data) : data_(data) {}
 
-    bool Visit(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+    bool operator()(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
       std::string temp;
       const char* name = klass->GetDescriptor(&temp);
       if (data_->image_class_descriptors_->find(name) != data_->image_class_descriptors_->end()) {

diff --git a/compiler/dwarf/debug_abbrev_writer.h b/compiler/dwarf/debug_abbrev_writer.h
new file mode 100644
index 0000000..71367e8
--- /dev/null
+++ b/compiler/dwarf/debug_abbrev_writer.h

@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DWARF_DEBUG_ABBREV_WRITER_H_
+#define ART_COMPILER_DWARF_DEBUG_ABBREV_WRITER_H_
+
+#include <cstdint>
+#include <type_traits>
+#include <unordered_map>
+
+#include "base/casts.h"
+#include "base/stl_util.h"
+#include "dwarf/dwarf_constants.h"
+#include "dwarf/writer.h"
+#include "leb128.h"
+
+namespace art {
+namespace dwarf {
+
+// Writer for the .debug_abbrev.
+//
+// Abbreviations specify the format of entries in .debug_info.
+// Each entry specifies abbreviation code, which in turns
+// determines all the attributes and their format.
+// It is possible to think of them as type definitions.
+template <typename Vector = std::vector<uint8_t>>
+class DebugAbbrevWriter FINAL : private Writer<Vector> {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+
+ public:
+  explicit DebugAbbrevWriter(Vector* buffer)
+      : Writer<Vector>(buffer),
+        current_abbrev_(buffer->get_allocator()) {
+    this->PushUint8(0);  // Add abbrev table terminator.
+  }
+
+  // Start abbreviation declaration.
+  void StartAbbrev(Tag tag) {
+    DCHECK(current_abbrev_.empty());
+    EncodeUnsignedLeb128(&current_abbrev_, tag);
+    has_children_offset_ = current_abbrev_.size();
+    current_abbrev_.push_back(0);  // Place-holder for DW_CHILDREN.
+  }
+
+  // Add attribute specification.
+  void AddAbbrevAttribute(Attribute name, Form type) {
+    EncodeUnsignedLeb128(&current_abbrev_, name);
+    EncodeUnsignedLeb128(&current_abbrev_, type);
+  }
+
+  // End abbreviation declaration and return its code.
+  // This will deduplicate abbreviations.
+  uint32_t EndAbbrev(Children has_children) {
+    DCHECK(!current_abbrev_.empty());
+    current_abbrev_[has_children_offset_] = has_children;
+    auto it = abbrev_codes_.insert(std::make_pair(std::move(current_abbrev_), NextAbbrevCode()));
+    uint32_t abbrev_code = it.first->second;
+    if (UNLIKELY(it.second)) {  // Inserted new entry.
+      const Vector& abbrev = it.first->first;
+      this->Pop();  // Remove abbrev table terminator.
+      this->PushUleb128(abbrev_code);
+      this->PushData(abbrev.data(), abbrev.size());
+      this->PushUint8(0);  // Attribute list end.
+      this->PushUint8(0);  // Attribute list end.
+      this->PushUint8(0);  // Add abbrev table terminator.
+    }
+    current_abbrev_.clear();
+    return abbrev_code;
+  }
+
+  // Get the next free abbrev code.
+  uint32_t NextAbbrevCode() {
+    return dchecked_integral_cast<uint32_t>(1 + abbrev_codes_.size());
+  }
+
+ private:
+  Vector current_abbrev_;
+  size_t has_children_offset_ = 0;
+  std::unordered_map<Vector, uint32_t, FNVHash<Vector> > abbrev_codes_;
+};
+
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DWARF_DEBUG_ABBREV_WRITER_H_

diff --git a/compiler/dwarf/debug_info_entry_writer.h b/compiler/dwarf/debug_info_entry_writer.h
index e5bbed3..1e29859 100644
--- a/compiler/dwarf/debug_info_entry_writer.h
+++ b/compiler/dwarf/debug_info_entry_writer.h

@@ -21,6 +21,7 @@
 #include <unordered_map>
 
 #include "base/casts.h"
+#include "dwarf/debug_abbrev_writer.h"
 #include "dwarf/dwarf_constants.h"
 #include "dwarf/expression.h"
 #include "dwarf/writer.h"
@@ -29,24 +30,8 @@
 namespace art {
 namespace dwarf {
 
-// 32-bit FNV-1a hash function which we use to find duplicate abbreviations.
-// See http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
-template <typename Vector>
-struct FNVHash {
-  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
-
-  size_t operator()(const Vector& v) const {
-    uint32_t hash = 2166136261u;
-    for (size_t i = 0; i < v.size(); i++) {
-      hash = (hash ^ v[i]) * 16777619u;
-    }
-    return hash;
-  }
-};
-
 /*
  * Writer for debug information entries (DIE).
- * It also handles generation of abbreviations.
  *
  * Usage:
  *   StartTag(DW_TAG_compile_unit);
@@ -69,13 +54,13 @@
     if (inside_entry_) {
       // Write abbrev code for the previous entry.
       // Parent entry is finalized before any children are written.
-      this->UpdateUleb128(abbrev_code_offset_, EndAbbrev(DW_CHILDREN_yes));
+      this->UpdateUleb128(abbrev_code_offset_, debug_abbrev_->EndAbbrev(DW_CHILDREN_yes));
       inside_entry_ = false;
     }
-    StartAbbrev(tag);
+    debug_abbrev_->StartAbbrev(tag);
     // Abbrev code placeholder of sufficient size.
     abbrev_code_offset_ = this->data()->size();
-    this->PushUleb128(NextAbbrevCode());
+    this->PushUleb128(debug_abbrev_->NextAbbrevCode());
     depth_++;
     inside_entry_ = true;
     return abbrev_code_offset_ + kCompilationUnitHeaderSize;
@@ -86,7 +71,7 @@
     DCHECK_GT(depth_, 0);
     if (inside_entry_) {
       // Write abbrev code for this entry.
-      this->UpdateUleb128(abbrev_code_offset_, EndAbbrev(DW_CHILDREN_no));
+      this->UpdateUleb128(abbrev_code_offset_, debug_abbrev_->EndAbbrev(DW_CHILDREN_no));
       inside_entry_ = false;
       // This entry has no children and so there is no terminator.
     } else {
@@ -98,7 +83,7 @@
   }
 
   void WriteAddr(Attribute attrib, uint64_t value) {
-    AddAbbrevAttribute(attrib, DW_FORM_addr);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_addr);
     patch_locations_.push_back(this->data()->size());
     if (is64bit_) {
       this->PushUint64(value);
@@ -108,85 +93,89 @@
   }
 
   void WriteBlock(Attribute attrib, const uint8_t* ptr, size_t num_bytes) {
-    AddAbbrevAttribute(attrib, DW_FORM_block);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_block);
     this->PushUleb128(num_bytes);
     this->PushData(ptr, num_bytes);
   }
 
   void WriteExprLoc(Attribute attrib, const Expression& expr) {
-    AddAbbrevAttribute(attrib, DW_FORM_exprloc);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_exprloc);
     this->PushUleb128(dchecked_integral_cast<uint32_t>(expr.size()));
     this->PushData(expr.data());
   }
 
   void WriteData1(Attribute attrib, uint8_t value) {
-    AddAbbrevAttribute(attrib, DW_FORM_data1);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_data1);
     this->PushUint8(value);
   }
 
   void WriteData2(Attribute attrib, uint16_t value) {
-    AddAbbrevAttribute(attrib, DW_FORM_data2);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_data2);
     this->PushUint16(value);
   }
 
   void WriteData4(Attribute attrib, uint32_t value) {
-    AddAbbrevAttribute(attrib, DW_FORM_data4);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_data4);
     this->PushUint32(value);
   }
 
   void WriteData8(Attribute attrib, uint64_t value) {
-    AddAbbrevAttribute(attrib, DW_FORM_data8);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_data8);
     this->PushUint64(value);
   }
 
   void WriteSecOffset(Attribute attrib, uint32_t offset) {
-    AddAbbrevAttribute(attrib, DW_FORM_sec_offset);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_sec_offset);
     this->PushUint32(offset);
   }
 
   void WriteSdata(Attribute attrib, int value) {
-    AddAbbrevAttribute(attrib, DW_FORM_sdata);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_sdata);
     this->PushSleb128(value);
   }
 
   void WriteUdata(Attribute attrib, int value) {
-    AddAbbrevAttribute(attrib, DW_FORM_udata);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_udata);
     this->PushUleb128(value);
   }
 
   void WriteUdata(Attribute attrib, uint32_t value) {
-    AddAbbrevAttribute(attrib, DW_FORM_udata);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_udata);
     this->PushUleb128(value);
   }
 
   void WriteFlag(Attribute attrib, bool value) {
-    AddAbbrevAttribute(attrib, DW_FORM_flag);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_flag);
     this->PushUint8(value ? 1 : 0);
   }
 
+  void WriteFlagPresent(Attribute attrib) {
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_flag_present);
+  }
+
   void WriteRef4(Attribute attrib, uint32_t cu_offset) {
-    AddAbbrevAttribute(attrib, DW_FORM_ref4);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_ref4);
     this->PushUint32(cu_offset);
   }
 
   void WriteRef(Attribute attrib, uint32_t cu_offset) {
-    AddAbbrevAttribute(attrib, DW_FORM_ref_udata);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_ref_udata);
     this->PushUleb128(cu_offset);
   }
 
   void WriteString(Attribute attrib, const char* value) {
-    AddAbbrevAttribute(attrib, DW_FORM_string);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_string);
     this->PushString(value);
   }
 
   void WriteStrp(Attribute attrib, size_t debug_str_offset) {
-    AddAbbrevAttribute(attrib, DW_FORM_strp);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_strp);
     this->PushUint32(dchecked_integral_cast<uint32_t>(debug_str_offset));
   }
 
   void WriteStrp(Attribute attrib, const char* str, size_t len,
                  std::vector<uint8_t>* debug_str) {
-    AddAbbrevAttribute(attrib, DW_FORM_strp);
+    debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_strp);
     this->PushUint32(debug_str->size());
     debug_str->insert(debug_str->end(), str, str + len);
     debug_str->push_back(0);
@@ -209,16 +198,13 @@
   using Writer<Vector>::UpdateUint32;
 
   DebugInfoEntryWriter(bool is64bitArch,
-                       Vector* debug_abbrev,
+                       DebugAbbrevWriter<Vector>* debug_abbrev,
                        const typename Vector::allocator_type& alloc =
                            typename Vector::allocator_type())
       : Writer<Vector>(&entries_),
         debug_abbrev_(debug_abbrev),
-        current_abbrev_(alloc),
-        abbrev_codes_(alloc),
         entries_(alloc),
         is64bit_(is64bitArch) {
-    debug_abbrev_.PushUint8(0);  // Add abbrev table terminator.
   }
 
   ~DebugInfoEntryWriter() {
@@ -227,53 +213,7 @@
   }
 
  private:
-  // Start abbreviation declaration.
-  void StartAbbrev(Tag tag) {
-    current_abbrev_.clear();
-    EncodeUnsignedLeb128(&current_abbrev_, tag);
-    has_children_offset_ = current_abbrev_.size();
-    current_abbrev_.push_back(0);  // Place-holder for DW_CHILDREN.
-  }
-
-  // Add attribute specification.
-  void AddAbbrevAttribute(Attribute name, Form type) {
-    DCHECK(inside_entry_) << "Call StartTag before adding attributes.";
-    EncodeUnsignedLeb128(&current_abbrev_, name);
-    EncodeUnsignedLeb128(&current_abbrev_, type);
-  }
-
-  int NextAbbrevCode() {
-    return 1 + abbrev_codes_.size();
-  }
-
-  // End abbreviation declaration and return its code.
-  int EndAbbrev(Children has_children) {
-    DCHECK(!current_abbrev_.empty());
-    current_abbrev_[has_children_offset_] = has_children;
-    auto it = abbrev_codes_.insert(std::make_pair(std::move(current_abbrev_),
-                                                  NextAbbrevCode()));
-    int abbrev_code = it.first->second;
-    if (UNLIKELY(it.second)) {  // Inserted new entry.
-      const Vector& abbrev = it.first->first;
-      debug_abbrev_.Pop();  // Remove abbrev table terminator.
-      debug_abbrev_.PushUleb128(abbrev_code);
-      debug_abbrev_.PushData(abbrev.data(), abbrev.size());
-      debug_abbrev_.PushUint8(0);  // Attribute list end.
-      debug_abbrev_.PushUint8(0);  // Attribute list end.
-      debug_abbrev_.PushUint8(0);  // Add abbrev table terminator.
-    }
-    return abbrev_code;
-  }
-
- private:
-  // Fields for writing and deduplication of abbrevs.
-  Writer<Vector> debug_abbrev_;
-  Vector current_abbrev_;
-  size_t has_children_offset_ = 0;
-  std::unordered_map<Vector, int,
-                     FNVHash<Vector> > abbrev_codes_;
-
-  // Fields for writing of debugging information entries.
+  DebugAbbrevWriter<Vector>* debug_abbrev_;
   Vector entries_;
   bool is64bit_;
   int depth_ = 0;

diff --git a/compiler/dwarf/dedup_vector.h b/compiler/dwarf/dedup_vector.h
deleted file mode 100644
index 7fb21b7..0000000
--- a/compiler/dwarf/dedup_vector.h
+++ /dev/null

@@ -1,67 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DWARF_DEDUP_VECTOR_H_
-#define ART_COMPILER_DWARF_DEDUP_VECTOR_H_
-
-#include <vector>
-#include <unordered_map>
-
-namespace art {
-namespace dwarf {
-  class DedupVector {
-   public:
-    // Returns an offset to previously inserted identical block of data,
-    // or appends the data at the end of the vector and returns offset to it.
-    size_t Insert(const uint8_t* ptr, size_t num_bytes) {
-      // See http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
-      uint32_t hash = 2166136261u;
-      for (size_t i = 0; i < num_bytes; i++) {
-        hash = (hash ^ ptr[i]) * 16777619u;
-      }
-      // Try to find existing copy of the data.
-      const auto& range = hash_to_offset_.equal_range(hash);
-      for (auto it = range.first; it != range.second; ++it) {
-        const size_t offset = it->second;
-        if (offset + num_bytes <= vector_.size() &&
-            memcmp(vector_.data() + offset, ptr, num_bytes) == 0) {
-          return offset;
-        }
-      }
-      // Append the data at the end of the vector.
-      const size_t new_offset = vector_.size();
-      hash_to_offset_.emplace(hash, new_offset);
-      vector_.insert(vector_.end(), ptr, ptr + num_bytes);
-      return new_offset;
-    }
-
-    const std::vector<uint8_t>& Data() const { return vector_; }
-
-   private:
-    struct IdentityHash {
-      size_t operator()(uint32_t v) const { return v; }
-    };
-
-    // We store the full hash as the key to simplify growing of the table.
-    // It avoids storing or referencing the actual data in the hash-table.
-    std::unordered_multimap<uint32_t, size_t, IdentityHash> hash_to_offset_;
-
-    std::vector<uint8_t> vector_;
-  };
-}  // namespace dwarf
-}  // namespace art
-
-#endif  // ART_COMPILER_DWARF_DEDUP_VECTOR_H_

diff --git a/compiler/dwarf/dwarf_test.cc b/compiler/dwarf/dwarf_test.cc
index e9cd421..3237311 100644
--- a/compiler/dwarf/dwarf_test.cc
+++ b/compiler/dwarf/dwarf_test.cc

@@ -283,7 +283,8 @@
 
 TEST_F(DwarfTest, DebugInfo) {
   constexpr bool is64bit = false;
-  DebugInfoEntryWriter<> info(is64bit, &debug_abbrev_data_);
+  DebugAbbrevWriter<> debug_abbrev(&debug_abbrev_data_);
+  DebugInfoEntryWriter<> info(is64bit, &debug_abbrev);
   DW_CHECK("Contents of the .debug_info section:");
   info.StartTag(dwarf::DW_TAG_compile_unit);
   DW_CHECK("Abbrev Number: 1 (DW_TAG_compile_unit)");

diff --git a/compiler/dwarf/register.h b/compiler/dwarf/register.h
index 35b3e15..aa3070a 100644
--- a/compiler/dwarf/register.h
+++ b/compiler/dwarf/register.h

@@ -42,6 +42,8 @@
   static Reg Arm64Fp(int num) { return Reg(64 + num); }  // V0-V31.
   static Reg MipsCore(int num) { return Reg(num); }
   static Reg Mips64Core(int num) { return Reg(num); }
+  static Reg MipsFp(int num) { return Reg(32 + num); }
+  static Reg Mips64Fp(int num) { return Reg(32 + num); }
   static Reg X86Core(int num) { return Reg(num); }
   static Reg X86Fp(int num) { return Reg(21 + num); }
   static Reg X86_64Core(int num) {

diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index 3d24d19..bc7c83e 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h

@@ -110,18 +110,27 @@
       CHECK(sections.empty() || sections.back()->finished_);
       // The first ELF section index is 1. Index 0 is reserved for NULL.
       section_index_ = sections.size() + 1;
-      // Push this section on the list of written sections.
-      sections.push_back(this);
+      // Page-align if we switch between allocated and non-allocated sections,
+      // or if we change the type of allocation (e.g. executable vs non-executable).
+      if (!sections.empty()) {
+        if (header_.sh_flags != sections.back()->header_.sh_flags) {
+          header_.sh_addralign = kPageSize;
+        }
+      }
       // Align file position.
       if (header_.sh_type != SHT_NOBITS) {
-        header_.sh_offset = RoundUp(owner_->stream_.Seek(0, kSeekCurrent), header_.sh_addralign);
-        owner_->stream_.Seek(header_.sh_offset, kSeekSet);
+        header_.sh_offset = owner_->AlignFileOffset(header_.sh_addralign);
+      } else {
+        header_.sh_offset = 0;
       }
       // Align virtual memory address.
       if ((header_.sh_flags & SHF_ALLOC) != 0) {
-        header_.sh_addr = RoundUp(owner_->virtual_address_, header_.sh_addralign);
-        owner_->virtual_address_ = header_.sh_addr;
+        header_.sh_addr = owner_->AlignVirtualAddress(header_.sh_addralign);
+      } else {
+        header_.sh_addr = 0;
       }
+      // Push this section on the list of written sections.
+      sections.push_back(this);
     }
 
     // Finish writing of this section.
@@ -170,8 +179,8 @@
     // and it will be zero-initialized when the ELF file is loaded in the running program.
     void WriteNoBitsSection(Elf_Word size) {
       DCHECK_NE(header_.sh_flags & SHF_ALLOC, 0u);
-      Start();
       header_.sh_type = SHT_NOBITS;
+      Start();
       header_.sh_size = size;
       End();
     }
@@ -293,12 +302,13 @@
         dynamic_(this, ".dynamic", SHT_DYNAMIC, SHF_ALLOC, &dynstr_, 0, kPageSize, sizeof(Elf_Dyn)),
         eh_frame_(this, ".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
         eh_frame_hdr_(this, ".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0),
-        strtab_(this, ".strtab", 0, kPageSize),
+        strtab_(this, ".strtab", 0, 1),
         symtab_(this, ".symtab", SHT_SYMTAB, 0, &strtab_),
         debug_frame_(this, ".debug_frame", SHT_PROGBITS, 0, nullptr, 0, sizeof(Elf_Addr), 0),
         debug_info_(this, ".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0),
         debug_line_(this, ".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0),
         shstrtab_(this, ".shstrtab", 0, 1),
+        started_(false),
         virtual_address_(0) {
     text_.phdr_flags_ = PF_R | PF_X;
     bss_.phdr_flags_ = PF_R | PF_W;
@@ -357,16 +367,25 @@
     virtual_address_ = address;
   }
 
-  void Start() {
-    // Reserve space for ELF header and program headers.
-    // We do not know the number of headers until later, so
-    // it is easiest to just reserve a fixed amount of space.
-    int size = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) * kMaxProgramHeaders;
+  // Reserve space for ELF header and program headers.
+  // We do not know the number of headers until later, so
+  // it is easiest to just reserve a fixed amount of space.
+  // Program headers are required for loading by the linker.
+  // It is possible to omit them for ELF files used for debugging.
+  void Start(bool write_program_headers = true) {
+    int size = sizeof(Elf_Ehdr);
+    if (write_program_headers) {
+      size += sizeof(Elf_Phdr) * kMaxProgramHeaders;
+    }
     stream_.Seek(size, kSeekSet);
+    started_ = true;
     virtual_address_ += size;
+    write_program_headers_ = write_program_headers;
   }
 
   void End() {
+    DCHECK(started_);
+
     // Write section names and finish the section headers.
     shstrtab_.Start();
     shstrtab_.Write("");
@@ -386,8 +405,7 @@
       shdrs.push_back(section->header_);
     }
     Elf_Off section_headers_offset;
-    section_headers_offset = RoundUp(stream_.Seek(0, kSeekCurrent), sizeof(Elf_Off));
-    stream_.Seek(section_headers_offset, kSeekSet);
+    section_headers_offset = AlignFileOffset(sizeof(Elf_Off));
     stream_.WriteFully(shdrs.data(), shdrs.size() * sizeof(shdrs[0]));
 
     // Flush everything else before writing the program headers. This should prevent
@@ -395,14 +413,21 @@
     // and partially written data if we suddenly lose power, for example.
     stream_.Flush();
 
-    // Write the initial file headers.
-    std::vector<Elf_Phdr> phdrs = MakeProgramHeaders();
+    // The main ELF header.
     Elf_Ehdr elf_header = MakeElfHeader(isa_);
-    elf_header.e_phoff = sizeof(Elf_Ehdr);
     elf_header.e_shoff = section_headers_offset;
-    elf_header.e_phnum = phdrs.size();
     elf_header.e_shnum = shdrs.size();
     elf_header.e_shstrndx = shstrtab_.GetSectionIndex();
+
+    // Program headers (i.e. mmap instructions).
+    std::vector<Elf_Phdr> phdrs;
+    if (write_program_headers_) {
+      phdrs = MakeProgramHeaders();
+      CHECK_LE(phdrs.size(), kMaxProgramHeaders);
+      elf_header.e_phoff = sizeof(Elf_Ehdr);
+      elf_header.e_phnum = phdrs.size();
+    }
+
     stream_.Seek(0, kSeekSet);
     stream_.WriteFully(&elf_header, sizeof(elf_header));
     stream_.WriteFully(phdrs.data(), phdrs.size() * sizeof(phdrs[0]));
@@ -492,6 +517,14 @@
     return &stream_;
   }
 
+  off_t AlignFileOffset(size_t alignment) {
+     return stream_.Seek(RoundUp(stream_.Seek(0, kSeekCurrent), alignment), kSeekSet);
+  }
+
+  Elf_Addr AlignVirtualAddress(size_t alignment) {
+     return virtual_address_ = RoundUp(virtual_address_, alignment);
+  }
+
  private:
   static Elf_Ehdr MakeElfHeader(InstructionSet isa) {
     Elf_Ehdr elf_header = Elf_Ehdr();
@@ -666,9 +699,13 @@
   // List of used section in the order in which they were written.
   std::vector<Section*> sections_;
 
+  bool started_;
+
   // Used for allocation of virtual address space.
   Elf_Addr virtual_address_;
 
+  size_t write_program_headers_;
+
   DISALLOW_COPY_AND_ASSIGN(ElfBuilder);
 };
 

diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc
index 2e98b69..ca8cd68 100644
--- a/compiler/elf_writer_debug.cc
+++ b/compiler/elf_writer_debug.cc

@@ -27,7 +27,6 @@
 #include "compiled_method.h"
 #include "dex_file-inl.h"
 #include "driver/compiler_driver.h"
-#include "dwarf/dedup_vector.h"
 #include "dwarf/expression.h"
 #include "dwarf/headers.h"
 #include "dwarf/method_debug_info.h"
@@ -90,6 +89,10 @@
       return Reg::X86Fp(machine_reg);
     case kX86_64:
       return Reg::X86_64Fp(machine_reg);
+    case kMips:
+      return Reg::MipsFp(machine_reg);
+    case kMips64:
+      return Reg::Mips64Fp(machine_reg);
     default:
       LOG(FATAL) << "Unknown instruction set: " << isa;
       UNREACHABLE();
@@ -162,6 +165,14 @@
           opcodes.SameValue(Reg::MipsCore(reg));
         }
       }
+      // fp registers.
+      for (int reg = 0; reg < 32; reg++) {
+        if (reg < 24) {
+          opcodes.Undefined(Reg::Mips64Fp(reg));
+        } else {
+          opcodes.SameValue(Reg::Mips64Fp(reg));
+        }
+      }
       auto return_reg = Reg::MipsCore(31);  // R31(RA).
       WriteCIE(is64bit, return_reg, opcodes, format, buffer);
       return;
@@ -474,7 +485,7 @@
    public:
     explicit CompilationUnitWriter(DebugInfoWriter* owner)
       : owner_(owner),
-        info_(Is64BitInstructionSet(owner_->builder_->GetIsa()), &debug_abbrev_) {
+        info_(Is64BitInstructionSet(owner_->builder_->GetIsa()), &owner->debug_abbrev_) {
     }
 
     void Write(const CompilationUnit& compilation_unit) {
@@ -485,9 +496,9 @@
       const uintptr_t cu_size = compilation_unit.high_pc_ - compilation_unit.low_pc_;
 
       info_.StartTag(DW_TAG_compile_unit);
-      info_.WriteStrp(DW_AT_producer, owner_->WriteString("Android dex2oat"));
+      info_.WriteString(DW_AT_producer, "Android dex2oat");
       info_.WriteData1(DW_AT_language, DW_LANG_Java);
-      info_.WriteStrp(DW_AT_comp_dir, owner_->WriteString("$JAVA_SRC_ROOT"));
+      info_.WriteString(DW_AT_comp_dir, "$JAVA_SRC_ROOT");
       info_.WriteAddr(DW_AT_low_pc, text_address + compilation_unit.low_pc_);
       info_.WriteUdata(DW_AT_high_pc, dchecked_integral_cast<uint32_t>(cu_size));
       info_.WriteSecOffset(DW_AT_stmt_list, compilation_unit.debug_line_offset_);
@@ -505,7 +516,7 @@
         // Enclose the method in correct class definition.
         if (last_dex_class_desc != dex_class_desc) {
           if (last_dex_class_desc != nullptr) {
-            EndClassTag(last_dex_class_desc);
+            EndClassTag();
           }
           // Write reference tag for the class we are about to declare.
           size_t reference_tag_offset = info_.StartTag(DW_TAG_reference_type);
@@ -516,7 +527,7 @@
           // Declare the class that owns this method.
           size_t class_offset = StartClassTag(dex_class_desc);
           info_.UpdateUint32(type_attrib_offset, class_offset);
-          info_.WriteFlag(DW_AT_declaration, true);
+          info_.WriteFlagPresent(DW_AT_declaration);
           // Check that each class is defined only once.
           bool unique = owner_->defined_dex_classes_.insert(dex_class_desc).second;
           CHECK(unique) << "Redefinition of " << dex_class_desc;
@@ -542,7 +553,7 @@
         if (!is_static) {
           info_.StartTag(DW_TAG_formal_parameter);
           WriteName("this");
-          info_.WriteFlag(DW_AT_artificial, true);
+          info_.WriteFlagPresent(DW_AT_artificial);
           WriteLazyType(dex_class_desc);
           if (dex_code != nullptr) {
             // Write the stack location of the parameter.
@@ -601,25 +612,32 @@
         CHECK_EQ(info_.Depth(), start_depth);  // Balanced start/end.
       }
       if (last_dex_class_desc != nullptr) {
-        EndClassTag(last_dex_class_desc);
+        EndClassTag();
       }
-      CHECK_EQ(info_.Depth(), 1);
       FinishLazyTypes();
+      CloseNamespacesAboveDepth(0);
       info_.EndTag();  // DW_TAG_compile_unit
+      CHECK_EQ(info_.Depth(), 0);
       std::vector<uint8_t> buffer;
       buffer.reserve(info_.data()->size() + KB);
       const size_t offset = owner_->builder_->GetDebugInfo()->GetSize();
-      const size_t debug_abbrev_offset =
-          owner_->debug_abbrev_.Insert(debug_abbrev_.data(), debug_abbrev_.size());
+      // All compilation units share single table which is at the start of .debug_abbrev.
+      const size_t debug_abbrev_offset = 0;
       WriteDebugInfoCU(debug_abbrev_offset, info_, offset, &buffer, &owner_->debug_info_patches_);
       owner_->builder_->GetDebugInfo()->WriteFully(buffer.data(), buffer.size());
     }
 
     void Write(const ArrayRef<mirror::Class*>& types) SHARED_REQUIRES(Locks::mutator_lock_) {
       info_.StartTag(DW_TAG_compile_unit);
-      info_.WriteStrp(DW_AT_producer, owner_->WriteString("Android dex2oat"));
+      info_.WriteString(DW_AT_producer, "Android dex2oat");
       info_.WriteData1(DW_AT_language, DW_LANG_Java);
 
+      // Base class references to be patched at the end.
+      std::map<size_t, mirror::Class*> base_class_references;
+
+      // Already written declarations or definitions.
+      std::map<mirror::Class*, size_t> class_declarations;
+
       std::vector<uint8_t> expr_buffer;
       for (mirror::Class* type : types) {
         if (type->IsPrimitive()) {
@@ -633,6 +651,7 @@
           uint32_t data_offset = mirror::Array::DataOffset(component_size).Uint32Value();
           uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
 
+          CloseNamespacesAboveDepth(0);  // Declare in root namespace.
           info_.StartTag(DW_TAG_array_type);
           std::string descriptor_string;
           WriteLazyType(element_type->GetDescriptor(&descriptor_string));
@@ -650,22 +669,10 @@
           // Skip.  Variables cannot have an interface as a dynamic type.
           // We do not expose the interface information to the debugger in any way.
         } else {
-          // Declare base class.  We can not use the standard WriteLazyType
-          // since we want to avoid the DW_TAG_reference_tag wrapping.
-          mirror::Class* base_class = type->GetSuperClass();
-          size_t base_class_declaration_offset = 0;
-          if (base_class != nullptr) {
-            std::string tmp_storage;
-            const char* base_class_desc = base_class->GetDescriptor(&tmp_storage);
-            base_class_declaration_offset = StartClassTag(base_class_desc);
-            info_.WriteFlag(DW_AT_declaration, true);
-            WriteLinkageName(base_class);
-            EndClassTag(base_class_desc);
-          }
-
           std::string descriptor_string;
           const char* desc = type->GetDescriptor(&descriptor_string);
-          StartClassTag(desc);
+          size_t class_offset = StartClassTag(desc);
+          class_declarations.emplace(type, class_offset);
 
           if (!type->IsVariableSize()) {
             info_.WriteUdata(DW_AT_byte_size, type->GetObjectSize());
@@ -680,7 +687,7 @@
             info_.StartTag(DW_TAG_member);
             WriteName(".dynamic_type");
             WriteLazyType(sizeof(uintptr_t) == 8 ? "J" : "I");
-            info_.WriteFlag(DW_AT_artificial, true);
+            info_.WriteFlagPresent(DW_AT_artificial);
             // Create DWARF expression to get the value of the methods_ field.
             Expression expr(&expr_buffer);
             // The address of the object has been implicitly pushed on the stack.
@@ -702,9 +709,11 @@
           }
 
           // Base class.
+          mirror::Class* base_class = type->GetSuperClass();
           if (base_class != nullptr) {
             info_.StartTag(DW_TAG_inheritance);
-            info_.WriteRef4(DW_AT_type, base_class_declaration_offset);
+            base_class_references.emplace(info_.size(), base_class);
+            info_.WriteRef4(DW_AT_type, 0);
             info_.WriteUdata(DW_AT_data_member_location, 0);
             info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_public);
             info_.EndTag();  // DW_TAG_inheritance.
@@ -743,18 +752,40 @@
             info_.EndTag();  // DW_TAG_member.
           }
 
-          EndClassTag(desc);
+          EndClassTag();
         }
       }
 
-      CHECK_EQ(info_.Depth(), 1);
+      // Write base class declarations.
+      for (const auto& base_class_reference : base_class_references) {
+        size_t reference_offset = base_class_reference.first;
+        mirror::Class* base_class = base_class_reference.second;
+        const auto& it = class_declarations.find(base_class);
+        if (it != class_declarations.end()) {
+          info_.UpdateUint32(reference_offset, it->second);
+        } else {
+          // Declare base class.  We can not use the standard WriteLazyType
+          // since we want to avoid the DW_TAG_reference_tag wrapping.
+          std::string tmp_storage;
+          const char* base_class_desc = base_class->GetDescriptor(&tmp_storage);
+          size_t base_class_declaration_offset = StartClassTag(base_class_desc);
+          info_.WriteFlagPresent(DW_AT_declaration);
+          WriteLinkageName(base_class);
+          EndClassTag();
+          class_declarations.emplace(base_class, base_class_declaration_offset);
+          info_.UpdateUint32(reference_offset, base_class_declaration_offset);
+        }
+      }
+
       FinishLazyTypes();
+      CloseNamespacesAboveDepth(0);
       info_.EndTag();  // DW_TAG_compile_unit.
+      CHECK_EQ(info_.Depth(), 0);
       std::vector<uint8_t> buffer;
       buffer.reserve(info_.data()->size() + KB);
       const size_t offset = owner_->builder_->GetDebugInfo()->GetSize();
-      const size_t debug_abbrev_offset =
-          owner_->debug_abbrev_.Insert(debug_abbrev_.data(), debug_abbrev_.size());
+      // All compilation units share single table which is at the start of .debug_abbrev.
+      const size_t debug_abbrev_offset = 0;
       WriteDebugInfoCU(debug_abbrev_offset, info_, offset, &buffer, &owner_->debug_info_patches_);
       owner_->builder_->GetDebugInfo()->WriteFully(buffer.data(), buffer.size());
     }
@@ -840,10 +871,6 @@
               expr.WriteOpReg(Reg::ArmDp(value / 2).num());
               break;
             }
-            if (isa == kMips || isa == kMips64) {
-              // TODO: Find what the DWARF floating point register numbers are on MIPS.
-              break;
-            }
             expr.WriteOpReg(GetDwarfFpReg(isa, value).num());
             if (piece == 0 && reg_hi.GetKind() == Kind::kInFpuRegisterHigh &&
                 reg_hi.GetValue() == reg_lo.GetValue()) {
@@ -940,7 +967,7 @@
    private:
     void WriteName(const char* name) {
       if (name != nullptr) {
-        info_.WriteStrp(DW_AT_name, owner_->WriteString(name));
+        info_.WriteString(DW_AT_name, name);
       }
     }
 
@@ -957,8 +984,8 @@
       if (desc[0] == 'L') {
         // Class type. For example: Lpackage/name;
         size_t class_offset = StartClassTag(desc.c_str());
-        info_.WriteFlag(DW_AT_declaration, true);
-        EndClassTag(desc.c_str());
+        info_.WriteFlagPresent(DW_AT_declaration);
+        EndClassTag();
         // Reference to the class type.
         offset = info_.StartTag(DW_TAG_reference_type);
         info_.WriteRef(DW_AT_type, class_offset);
@@ -966,8 +993,9 @@
       } else if (desc[0] == '[') {
         // Array type.
         size_t element_type = WriteTypeDeclaration(desc.substr(1));
+        CloseNamespacesAboveDepth(0);  // Declare in root namespace.
         size_t array_type = info_.StartTag(DW_TAG_array_type);
-        info_.WriteFlag(DW_AT_declaration, true);
+        info_.WriteFlagPresent(DW_AT_declaration);
         info_.WriteRef(DW_AT_type, element_type);
         info_.EndTag();
         offset = info_.StartTag(DW_TAG_reference_type);
@@ -1028,6 +1056,7 @@
           LOG(FATAL) << "Unknown dex type descriptor: \"" << desc << "\"";
           UNREACHABLE();
         }
+        CloseNamespacesAboveDepth(0);  // Declare in root namespace.
         offset = info_.StartTag(DW_TAG_base_type);
         WriteName(name);
         info_.WriteData1(DW_AT_encoding, encoding);
@@ -1042,36 +1071,52 @@
     // Start DW_TAG_class_type tag nested in DW_TAG_namespace tags.
     // Returns offset of the class tag in the compilation unit.
     size_t StartClassTag(const char* desc) {
-      DCHECK(desc != nullptr && desc[0] == 'L');
-      // Enclose the type in namespace tags.
-      const char* end;
-      for (desc = desc + 1; (end = strchr(desc, '/')) != nullptr; desc = end + 1) {
-        info_.StartTag(DW_TAG_namespace);
-        WriteName(std::string(desc, end - desc).c_str());
-      }
-      // Start the class tag.
+      std::string name = SetNamespaceForClass(desc);
       size_t offset = info_.StartTag(DW_TAG_class_type);
-      end = strchr(desc, ';');
-      CHECK(end != nullptr);
-      WriteName(std::string(desc, end - desc).c_str());
+      WriteName(name.c_str());
       return offset;
     }
 
-    void EndClassTag(const char* desc) {
-      DCHECK(desc != nullptr && desc[0] == 'L');
-      // End the class tag.
+    void EndClassTag() {
       info_.EndTag();
-      // Close namespace tags.
-      const char* end;
-      for (desc = desc + 1; (end = strchr(desc, '/')) != nullptr; desc = end + 1) {
+    }
+
+    // Set the current namespace nesting to one required by the given class.
+    // Returns the class name with namespaces, 'L', and ';' stripped.
+    std::string SetNamespaceForClass(const char* desc) {
+      DCHECK(desc != nullptr && desc[0] == 'L');
+      desc++;  // Skip the initial 'L'.
+      size_t depth = 0;
+      for (const char* end; (end = strchr(desc, '/')) != nullptr; desc = end + 1, ++depth) {
+        // Check whether the name at this depth is already what we need.
+        if (depth < current_namespace_.size()) {
+          const std::string& name = current_namespace_[depth];
+          if (name.compare(0, name.size(), desc, end - desc) == 0) {
+            continue;
+          }
+        }
+        // Otherwise we need to open a new namespace tag at this depth.
+        CloseNamespacesAboveDepth(depth);
+        info_.StartTag(DW_TAG_namespace);
+        std::string name(desc, end - desc);
+        WriteName(name.c_str());
+        current_namespace_.push_back(std::move(name));
+      }
+      CloseNamespacesAboveDepth(depth);
+      return std::string(desc, strchr(desc, ';') - desc);
+    }
+
+    // Close namespace tags to reach the given nesting depth.
+    void CloseNamespacesAboveDepth(size_t depth) {
+      DCHECK_LE(depth, current_namespace_.size());
+      while (current_namespace_.size() > depth) {
         info_.EndTag();
+        current_namespace_.pop_back();
       }
     }
 
     // For access to the ELF sections.
     DebugInfoWriter<ElfTypes>* owner_;
-    // Debug abbrevs for this compilation unit only.
-    std::vector<uint8_t> debug_abbrev_;
     // Temporary buffer to create and store the entries.
     DebugInfoEntryWriter<> info_;
     // Cache of already translated type descriptors.
@@ -1079,10 +1124,14 @@
     // 32-bit references which need to be resolved to a type later.
     // Given type may be used multiple times.  Therefore we need a multimap.
     std::multimap<std::string, size_t> lazy_types_;  // type_desc -> patch_offset.
+    // The current set of open namespace tags which are active and not closed yet.
+    std::vector<std::string> current_namespace_;
   };
 
  public:
-  explicit DebugInfoWriter(ElfBuilder<ElfTypes>* builder) : builder_(builder) {
+  explicit DebugInfoWriter(ElfBuilder<ElfTypes>* builder)
+      : builder_(builder),
+        debug_abbrev_(&debug_abbrev_buffer_) {
   }
 
   void Start() {
@@ -1099,25 +1148,26 @@
     writer.Write(types);
   }
 
-  void End() {
+  void End(bool write_oat_patches) {
     builder_->GetDebugInfo()->End();
-    builder_->WritePatches(".debug_info.oat_patches",
-                           ArrayRef<const uintptr_t>(debug_info_patches_));
-    builder_->WriteSection(".debug_abbrev", &debug_abbrev_.Data());
-    builder_->WriteSection(".debug_str", &debug_str_.Data());
-    builder_->WriteSection(".debug_loc", &debug_loc_);
-    builder_->WriteSection(".debug_ranges", &debug_ranges_);
+    if (write_oat_patches) {
+      builder_->WritePatches(".debug_info.oat_patches",
+                             ArrayRef<const uintptr_t>(debug_info_patches_));
+    }
+    builder_->WriteSection(".debug_abbrev", &debug_abbrev_buffer_);
+    if (!debug_loc_.empty()) {
+      builder_->WriteSection(".debug_loc", &debug_loc_);
+    }
+    if (!debug_ranges_.empty()) {
+      builder_->WriteSection(".debug_ranges", &debug_ranges_);
+    }
   }
 
  private:
-  size_t WriteString(const char* str) {
-    return debug_str_.Insert(reinterpret_cast<const uint8_t*>(str), strlen(str) + 1);
-  }
-
   ElfBuilder<ElfTypes>* builder_;
   std::vector<uintptr_t> debug_info_patches_;
-  DedupVector debug_abbrev_;
-  DedupVector debug_str_;
+  std::vector<uint8_t> debug_abbrev_buffer_;
+  DebugAbbrevWriter<> debug_abbrev_;
   std::vector<uint8_t> debug_loc_;
   std::vector<uint8_t> debug_ranges_;
 
@@ -1313,10 +1363,12 @@
     return buffer.size();
   }
 
-  void End() {
+  void End(bool write_oat_patches) {
     builder_->GetDebugLine()->End();
-    builder_->WritePatches(".debug_line.oat_patches",
-                           ArrayRef<const uintptr_t>(debug_line_patches));
+    if (write_oat_patches) {
+      builder_->WritePatches(".debug_line.oat_patches",
+                             ArrayRef<const uintptr_t>(debug_line_patches));
+    }
   }
 
  private:
@@ -1326,7 +1378,8 @@
 
 template<typename ElfTypes>
 static void WriteDebugSections(ElfBuilder<ElfTypes>* builder,
-                               const ArrayRef<const MethodDebugInfo>& method_infos) {
+                               const ArrayRef<const MethodDebugInfo>& method_infos,
+                               bool write_oat_patches) {
   // Group the methods into compilation units based on source file.
   std::vector<CompilationUnit> compilation_units;
   const char* last_source_file = nullptr;
@@ -1350,7 +1403,7 @@
     for (auto& compilation_unit : compilation_units) {
       line_writer.WriteCompilationUnit(compilation_unit);
     }
-    line_writer.End();
+    line_writer.End(write_oat_patches);
   }
 
   // Write .debug_info section.
@@ -1360,7 +1413,7 @@
     for (const auto& compilation_unit : compilation_units) {
       info_writer.WriteCompilationUnit(compilation_unit);
     }
-    info_writer.End();
+    info_writer.End(write_oat_patches);
   }
 }
 
@@ -1440,13 +1493,14 @@
 template <typename ElfTypes>
 void WriteDebugInfo(ElfBuilder<ElfTypes>* builder,
                     const ArrayRef<const MethodDebugInfo>& method_infos,
-                    CFIFormat cfi_format) {
+                    CFIFormat cfi_format,
+                    bool write_oat_patches) {
   // Add methods to .symtab.
   WriteDebugSymbols(builder, method_infos, true /* with_signature */);
   // Generate CFI (stack unwinding information).
-  WriteCFISection(builder, method_infos, cfi_format, true /* write_oat_patches */);
+  WriteCFISection(builder, method_infos, cfi_format, write_oat_patches);
   // Write DWARF .debug_* sections.
-  WriteDebugSections(builder, method_infos);
+  WriteDebugSections(builder, method_infos, write_oat_patches);
 }
 
 static void XzCompress(const std::vector<uint8_t>* src, std::vector<uint8_t>* dst) {
@@ -1527,10 +1581,12 @@
   buffer.reserve(KB);
   VectorOutputStream out("Debug ELF file", &buffer);
   std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, &out));
-  builder->Start();
+  // No program headers since the ELF file is not linked and has no allocated sections.
+  builder->Start(false /* write_program_headers */);
   WriteDebugInfo(builder.get(),
                  ArrayRef<const MethodDebugInfo>(&method_info, 1),
-                 DW_DEBUG_FRAME_FORMAT);
+                 DW_DEBUG_FRAME_FORMAT,
+                 false /* write_oat_patches */);
   builder->End();
   CHECK(builder->Good());
   // Make a copy of the buffer.  We want to shrink it anyway.
@@ -1557,12 +1613,12 @@
   buffer.reserve(KB);
   VectorOutputStream out("Debug ELF file", &buffer);
   std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, &out));
-  builder->Start();
-
+  // No program headers since the ELF file is not linked and has no allocated sections.
+  builder->Start(false /* write_program_headers */);
   DebugInfoWriter<ElfTypes> info_writer(builder.get());
   info_writer.Start();
   info_writer.WriteTypes(types);
-  info_writer.End();
+  info_writer.End(false /* write_oat_patches */);
 
   builder->End();
   CHECK(builder->Good());
@@ -1586,11 +1642,13 @@
 template void WriteDebugInfo<ElfTypes32>(
     ElfBuilder<ElfTypes32>* builder,
     const ArrayRef<const MethodDebugInfo>& method_infos,
-    CFIFormat cfi_format);
+    CFIFormat cfi_format,
+    bool write_oat_patches);
 template void WriteDebugInfo<ElfTypes64>(
     ElfBuilder<ElfTypes64>* builder,
     const ArrayRef<const MethodDebugInfo>& method_infos,
-    CFIFormat cfi_format);
+    CFIFormat cfi_format,
+    bool write_oat_patches);
 template void WriteMiniDebugInfo<ElfTypes32>(
     ElfBuilder<ElfTypes32>* builder,
     const ArrayRef<const MethodDebugInfo>& method_infos);

diff --git a/compiler/elf_writer_debug.h b/compiler/elf_writer_debug.h
index e19da08..8e8472f 100644
--- a/compiler/elf_writer_debug.h
+++ b/compiler/elf_writer_debug.h

@@ -33,7 +33,8 @@
 template <typename ElfTypes>
 void WriteDebugInfo(ElfBuilder<ElfTypes>* builder,
                     const ArrayRef<const MethodDebugInfo>& method_infos,
-                    CFIFormat cfi_format);
+                    CFIFormat cfi_format,
+                    bool write_oat_patches);
 
 template <typename ElfTypes>
 void WriteMiniDebugInfo(ElfBuilder<ElfTypes>* builder,

diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 6bf080a..f2a95f2 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc

@@ -151,7 +151,7 @@
     const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) {
   if (compiler_options_->GetGenerateDebugInfo()) {
     // Generate all the debug information we can.
-    dwarf::WriteDebugInfo(builder_.get(), method_infos, kCFIFormat);
+    dwarf::WriteDebugInfo(builder_.get(), method_infos, kCFIFormat, true /* write_oat_patches */);
   }
   if (compiler_options_->GetGenerateMiniDebugInfo()) {
     // Generate only some information and compress it.

diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 60dfcfb..73574ba 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc

@@ -714,7 +714,7 @@
 
 class ComputeLazyFieldsForClassesVisitor : public ClassVisitor {
  public:
-  bool Visit(Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool operator()(Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     StackHandleScope<1> hs(Thread::Current());
     mirror::Class::ComputeName(hs.NewHandle(c));
     return true;
@@ -852,7 +852,7 @@
  public:
   explicit NonImageClassesVisitor(ImageWriter* image_writer) : image_writer_(image_writer) {}
 
-  bool Visit(Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool operator()(Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     if (!image_writer_->KeepClass(klass)) {
       classes_to_prune_.insert(klass);
     }

diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index eee6116..c307522 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc

@@ -1227,27 +1227,28 @@
     InductionVarRange::Value v1;
     InductionVarRange::Value v2;
     bool needs_finite_test = false;
-    induction_range_.GetInductionRange(context, index, &v1, &v2, &needs_finite_test);
-    do {
-      if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) &&
-          v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) {
-        DCHECK(v1.a_constant == 1 || v1.instruction == nullptr);
-        DCHECK(v2.a_constant == 1 || v2.instruction == nullptr);
-        ValueRange index_range(GetGraph()->GetArena(),
-                               ValueBound(v1.instruction, v1.b_constant),
-                               ValueBound(v2.instruction, v2.b_constant));
-        // If analysis reveals a certain OOB, disable dynamic BCE.
-        if (index_range.GetLower().LessThan(array_range->GetLower()) ||
-            index_range.GetUpper().GreaterThan(array_range->GetUpper())) {
-          *try_dynamic_bce = false;
-          return false;
+    if (induction_range_.GetInductionRange(context, index, &v1, &v2, &needs_finite_test)) {
+      do {
+        if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) &&
+            v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) {
+          DCHECK(v1.a_constant == 1 || v1.instruction == nullptr);
+          DCHECK(v2.a_constant == 1 || v2.instruction == nullptr);
+          ValueRange index_range(GetGraph()->GetArena(),
+                                 ValueBound(v1.instruction, v1.b_constant),
+                                 ValueBound(v2.instruction, v2.b_constant));
+          // If analysis reveals a certain OOB, disable dynamic BCE.
+          if (index_range.GetLower().LessThan(array_range->GetLower()) ||
+              index_range.GetUpper().GreaterThan(array_range->GetUpper())) {
+            *try_dynamic_bce = false;
+            return false;
+          }
+          // Use analysis for static bce only if loop is finite.
+          if (!needs_finite_test && index_range.FitsIn(array_range)) {
+            return true;
+          }
         }
-        // Use analysis for static bce only if loop is finite.
-        if (!needs_finite_test && index_range.FitsIn(array_range)) {
-          return true;
-        }
-      }
-    } while (induction_range_.RefineOuter(&v1, &v2));
+      } while (induction_range_.RefineOuter(&v1, &v2));
+    }
     return false;
   }
 

diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index a59024e..4179fab 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc

@@ -1556,21 +1556,13 @@
   UseScratchRegisterScope temps(GetVIXLAssembler());
   Register temp = temps.AcquireW();
   size_t status_offset = mirror::Class::StatusOffset().SizeValue();
-  bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
 
   // Even if the initialized flag is set, we need to ensure consistent memory ordering.
-  if (use_acquire_release) {
-    // TODO(vixl): Let the MacroAssembler handle MemOperand.
-    __ Add(temp, class_reg, status_offset);
-    __ Ldar(temp, HeapOperand(temp));
-    __ Cmp(temp, mirror::Class::kStatusInitialized);
-    __ B(lt, slow_path->GetEntryLabel());
-  } else {
-    __ Ldr(temp, HeapOperand(class_reg, status_offset));
-    __ Cmp(temp, mirror::Class::kStatusInitialized);
-    __ B(lt, slow_path->GetEntryLabel());
-    __ Dmb(InnerShareable, BarrierReads);
-  }
+  // TODO(vixl): Let the MacroAssembler handle MemOperand.
+  __ Add(temp, class_reg, status_offset);
+  __ Ldar(temp, HeapOperand(temp));
+  __ Cmp(temp, mirror::Class::kStatusInitialized);
+  __ B(lt, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -1716,9 +1708,7 @@
   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
   Primitive::Type field_type = field_info.GetFieldType();
   BlockPoolsScope block_pools(GetVIXLAssembler());
-
   MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset());
-  bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
 
   if (field_type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
     // Object FieldGet with Baker's read barrier case.
@@ -1736,26 +1726,15 @@
         offset,
         temp,
         /* needs_null_check */ true,
-        field_info.IsVolatile() && use_acquire_release);
-    if (field_info.IsVolatile() && !use_acquire_release) {
-      // For IRIW sequential consistency kLoadAny is not sufficient.
-      codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
-    }
+        field_info.IsVolatile());
   } else {
     // General case.
     if (field_info.IsVolatile()) {
-      if (use_acquire_release) {
-        // Note that a potential implicit null check is handled in this
-        // CodeGeneratorARM64::LoadAcquire call.
-        // NB: LoadAcquire will record the pc info if needed.
-        codegen_->LoadAcquire(
-            instruction, OutputCPURegister(instruction), field, /* needs_null_check */ true);
-      } else {
-        codegen_->Load(field_type, OutputCPURegister(instruction), field);
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
-        // For IRIW sequential consistency kLoadAny is not sufficient.
-        codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
-      }
+      // Note that a potential implicit null check is handled in this
+      // CodeGeneratorARM64::LoadAcquire call.
+      // NB: LoadAcquire will record the pc info if needed.
+      codegen_->LoadAcquire(
+          instruction, OutputCPURegister(instruction), field, /* needs_null_check */ true);
     } else {
       codegen_->Load(field_type, OutputCPURegister(instruction), field);
       codegen_->MaybeRecordImplicitNullCheck(instruction);
@@ -1791,7 +1770,6 @@
   CPURegister source = value;
   Offset offset = field_info.GetFieldOffset();
   Primitive::Type field_type = field_info.GetFieldType();
-  bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
 
   {
     // We use a block to end the scratch scope before the write barrier, thus
@@ -1807,15 +1785,8 @@
     }
 
     if (field_info.IsVolatile()) {
-      if (use_acquire_release) {
-        codegen_->StoreRelease(field_type, source, HeapOperand(obj, offset));
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
-      } else {
-        codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
-        codegen_->Store(field_type, source, HeapOperand(obj, offset));
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
-        codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
-      }
+      codegen_->StoreRelease(field_type, source, HeapOperand(obj, offset));
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
     } else {
       codegen_->Store(field_type, source, HeapOperand(obj, offset));
       codegen_->MaybeRecordImplicitNullCheck(instruction);

diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index fa119bb..961fe62 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc

@@ -1132,11 +1132,11 @@
 }
 
 void CodeGeneratorMIPS::DumpCoreRegister(std::ostream& stream, int reg) const {
-  stream << MipsManagedRegister::FromCoreRegister(Register(reg));
+  stream << Register(reg);
 }
 
 void CodeGeneratorMIPS::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
-  stream << MipsManagedRegister::FromFRegister(FRegister(reg));
+  stream << FRegister(reg);
 }
 
 void CodeGeneratorMIPS::InvokeRuntime(QuickEntrypointEnum entrypoint,
@@ -5287,12 +5287,27 @@
   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
 }
 
-void LocationsBuilderMIPS::VisitClassTableGet(HClassTableGet*) {
-  UNIMPLEMENTED(FATAL) << "ClassTableGet is unimplemented on mips";
+void LocationsBuilderMIPS::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
 }
 
-void InstructionCodeGeneratorMIPS::VisitClassTableGet(HClassTableGet*) {
-  UNIMPLEMENTED(FATAL) << "ClassTableGet is unimplemented on mips";
+void InstructionCodeGeneratorMIPS::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  uint32_t method_offset = 0;
+  if (instruction->GetTableKind() == HClassTableGet::kVTable) {
+    method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+        instruction->GetIndex(), kMipsPointerSize).SizeValue();
+  } else {
+    method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+        instruction->GetIndex() % mirror::Class::kImtSize, kMipsPointerSize).Uint32Value();
+  }
+  __ LoadFromOffset(kLoadWord,
+                    locations->Out().AsRegister<Register>(),
+                    locations->InAt(0).AsRegister<Register>(),
+                    method_offset);
 }
 
 #undef __

diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 3c928de..3e1563c 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc

@@ -106,7 +106,7 @@
 }
 
 #define __ down_cast<CodeGeneratorMIPS64*>(codegen)->GetAssembler()->
-#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, x).Int32Value()
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, x).Int32Value()
 
 class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
  public:
@@ -437,7 +437,7 @@
 
 #undef __
 #define __ down_cast<Mips64Assembler*>(GetAssembler())->
-#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, x).Int32Value()
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, x).Int32Value()
 
 void CodeGeneratorMIPS64::Finalize(CodeAllocator* allocator) {
   // Ensure that we fix up branches.
@@ -486,12 +486,12 @@
 void ParallelMoveResolverMIPS64::RestoreScratch(int reg) {
   // Pop reg
   __ Ld(GpuRegister(reg), SP, 0);
-  __ DecreaseFrameSize(kMips64WordSize);
+  __ DecreaseFrameSize(kMips64DoublewordSize);
 }
 
 void ParallelMoveResolverMIPS64::SpillScratch(int reg) {
   // Push reg
-  __ IncreaseFrameSize(kMips64WordSize);
+  __ IncreaseFrameSize(kMips64DoublewordSize);
   __ Sd(GpuRegister(reg), SP, 0);
 }
 
@@ -503,7 +503,7 @@
   // automatically unspilled when the scratch scope object is destroyed).
   ScratchRegisterScope ensure_scratch(this, TMP, V0, codegen_->GetNumberOfCoreRegisters());
   // If V0 spills onto the stack, SP-relative offsets need to be adjusted.
-  int stack_offset = ensure_scratch.IsSpilled() ? kMips64WordSize : 0;
+  int stack_offset = ensure_scratch.IsSpilled() ? kMips64DoublewordSize : 0;
   __ LoadFromOffset(load_type,
                     GpuRegister(ensure_scratch.GetRegister()),
                     SP,
@@ -523,7 +523,9 @@
   return dwarf::Reg::Mips64Core(static_cast<int>(reg));
 }
 
-// TODO: mapping of floating-point registers to DWARF
+static dwarf::Reg DWARFReg(FpuRegister reg) {
+  return dwarf::Reg::Mips64Fp(static_cast<int>(reg));
+}
 
 void CodeGeneratorMIPS64::GenerateFrameEntry() {
   __ Bind(&frame_entry_label_);
@@ -562,7 +564,7 @@
   for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
     GpuRegister reg = kCoreCalleeSaves[i];
     if (allocated_registers_.ContainsCoreRegister(reg)) {
-      ofs -= kMips64WordSize;
+      ofs -= kMips64DoublewordSize;
       __ Sd(reg, SP, ofs);
       __ cfi().RelOffset(DWARFReg(reg), ofs);
     }
@@ -571,9 +573,9 @@
   for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
     FpuRegister reg = kFpuCalleeSaves[i];
     if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
-      ofs -= kMips64WordSize;
+      ofs -= kMips64DoublewordSize;
       __ Sdc1(reg, SP, ofs);
-      // TODO: __ cfi().RelOffset(DWARFReg(reg), ofs);
+      __ cfi().RelOffset(DWARFReg(reg), ofs);
     }
   }
 
@@ -609,8 +611,8 @@
       FpuRegister reg = kFpuCalleeSaves[i];
       if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
         __ Ldc1(reg, SP, ofs);
-        ofs += kMips64WordSize;
-        // TODO: __ cfi().Restore(DWARFReg(reg));
+        ofs += kMips64DoublewordSize;
+        __ cfi().Restore(DWARFReg(reg));
       }
     }
 
@@ -618,7 +620,7 @@
       GpuRegister reg = kCoreCalleeSaves[i];
       if (allocated_registers_.ContainsCoreRegister(reg)) {
         __ Ld(reg, SP, ofs);
-        ofs += kMips64WordSize;
+        ofs += kMips64DoublewordSize;
         __ cfi().Restore(DWARFReg(reg));
       }
     }
@@ -976,7 +978,7 @@
   __ LoadFromOffset(kLoadDoubleword,
                     card,
                     TR,
-                    Thread::CardTableOffset<kMips64WordSize>().Int32Value());
+                    Thread::CardTableOffset<kMips64DoublewordSize>().Int32Value());
   __ Dsrl(temp, object, gc::accounting::CardTable::kCardShift);
   __ Daddu(temp, card, temp);
   __ Sb(card, temp, 0);
@@ -994,10 +996,11 @@
   blocked_core_registers_[SP] = true;
   blocked_core_registers_[RA] = true;
 
-  // AT and TMP(T8) are used as temporary/scratch registers
-  // (similar to how AT is used by MIPS assemblers).
+  // AT, TMP(T8) and TMP2(T3) are used as temporary/scratch
+  // registers (similar to how AT is used by MIPS assemblers).
   blocked_core_registers_[AT] = true;
   blocked_core_registers_[TMP] = true;
+  blocked_core_registers_[TMP2] = true;
   blocked_fpu_registers_[FTMP] = true;
 
   // Reserve suspend and thread registers.
@@ -1021,22 +1024,22 @@
 
 size_t CodeGeneratorMIPS64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
   __ StoreToOffset(kStoreDoubleword, GpuRegister(reg_id), SP, stack_index);
-  return kMips64WordSize;
+  return kMips64DoublewordSize;
 }
 
 size_t CodeGeneratorMIPS64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
   __ LoadFromOffset(kLoadDoubleword, GpuRegister(reg_id), SP, stack_index);
-  return kMips64WordSize;
+  return kMips64DoublewordSize;
 }
 
 size_t CodeGeneratorMIPS64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
   __ StoreFpuToOffset(kStoreDoubleword, FpuRegister(reg_id), SP, stack_index);
-  return kMips64WordSize;
+  return kMips64DoublewordSize;
 }
 
 size_t CodeGeneratorMIPS64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
   __ LoadFpuFromOffset(kLoadDoubleword, FpuRegister(reg_id), SP, stack_index);
-  return kMips64WordSize;
+  return kMips64DoublewordSize;
 }
 
 void CodeGeneratorMIPS64::DumpCoreRegister(std::ostream& stream, int reg) const {
@@ -1051,7 +1054,7 @@
                                      HInstruction* instruction,
                                      uint32_t dex_pc,
                                      SlowPathCode* slow_path) {
-  InvokeRuntime(GetThreadOffset<kMips64WordSize>(entrypoint).Int32Value(),
+  InvokeRuntime(GetThreadOffset<kMips64DoublewordSize>(entrypoint).Int32Value(),
                 instruction,
                 dex_pc,
                 slow_path);
@@ -1091,7 +1094,7 @@
   __ LoadFromOffset(kLoadUnsignedHalfword,
                     TMP,
                     TR,
-                    Thread::ThreadFlagsOffset<kMips64WordSize>().Int32Value());
+                    Thread::ThreadFlagsOffset<kMips64DoublewordSize>().Int32Value());
   if (successor == nullptr) {
     __ Bnezc(TMP, slow_path->GetEntryLabel());
     __ Bind(slow_path->GetReturnLabel());
@@ -3014,7 +3017,7 @@
       invoke->GetImtIndex() % mirror::Class::kImtSize, kMips64PointerSize).Uint32Value();
   Location receiver = invoke->GetLocations()->InAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64WordSize);
+  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64DoublewordSize);
 
   // Set the hidden argument.
   __ LoadConst32(invoke->GetLocations()->GetTemp(1).AsRegister<GpuRegister>(),
@@ -3190,7 +3193,7 @@
                         T9,
                         callee_method.AsRegister<GpuRegister>(),
                         ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-                            kMips64WordSize).Int32Value());
+                            kMips64DoublewordSize).Int32Value());
       // T9()
       __ Jalr(T9);
       __ Nop();
@@ -3228,7 +3231,7 @@
   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kMips64PointerSize).SizeValue();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64WordSize);
+  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64DoublewordSize);
 
   // temp = object->GetClass();
   __ LoadFromOffset(kLoadUnsignedWord, temp, receiver, class_offset);
@@ -3306,7 +3309,7 @@
 }
 
 static int32_t GetExceptionTlsOffset() {
-  return Thread::ExceptionOffset<kMips64WordSize>().Int32Value();
+  return Thread::ExceptionOffset<kMips64DoublewordSize>().Int32Value();
 }
 
 void LocationsBuilderMIPS64::VisitLoadException(HLoadException* load) {
@@ -3546,7 +3549,8 @@
   if (instruction->IsStringAlloc()) {
     // String is allocated through StringFactory. Call NewEmptyString entry point.
     GpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<GpuRegister>();
-    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64WordSize);
+    MemberOffset code_offset =
+        ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64DoublewordSize);
     __ LoadFromOffset(kLoadDoubleword, temp, TR, QUICK_ENTRY_POINT(pNewEmptyString));
     __ LoadFromOffset(kLoadDoubleword, T9, temp, code_offset.Int32Value());
     __ Jalr(T9);

diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 08e5615..c836f83 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h

@@ -27,10 +27,6 @@
 namespace art {
 namespace mips64 {
 
-// Use a local definition to prevent copying mistakes.
-static constexpr size_t kMips64WordSize = kMips64PointerSize;
-
-
 // InvokeDexCallingConvention registers
 
 static constexpr GpuRegister kParameterCoreRegisters[] =
@@ -274,9 +270,9 @@
 
   void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
 
-  size_t GetWordSize() const OVERRIDE { return kMips64WordSize; }
+  size_t GetWordSize() const OVERRIDE { return kMips64DoublewordSize; }
 
-  size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return kMips64WordSize; }
+  size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return kMips64DoublewordSize; }
 
   uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE {
     return assembler_.GetLabelLocation(GetLabelOf(block));

diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 18d70da..da054ba 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc

@@ -2688,6 +2688,8 @@
       locations->SetInAt(0, Location::RequiresFpuRegister());
       if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
         DCHECK(add->InputAt(1)->IsEmittedAtUseSite());
+      } else if (add->InputAt(1)->IsConstant()) {
+        locations->SetInAt(1, Location::RequiresFpuRegister());
       } else {
         locations->SetInAt(1, Location::Any());
       }
@@ -2804,6 +2806,8 @@
       locations->SetInAt(0, Location::RequiresFpuRegister());
       if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
         DCHECK(sub->InputAt(1)->IsEmittedAtUseSite());
+      } else if (sub->InputAt(1)->IsConstant()) {
+        locations->SetInAt(1, Location::RequiresFpuRegister());
       } else {
         locations->SetInAt(1, Location::Any());
       }
@@ -2918,6 +2922,8 @@
       locations->SetInAt(0, Location::RequiresFpuRegister());
       if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
         DCHECK(mul->InputAt(1)->IsEmittedAtUseSite());
+      } else if (mul->InputAt(1)->IsConstant()) {
+        locations->SetInAt(1, Location::RequiresFpuRegister());
       } else {
         locations->SetInAt(1, Location::Any());
       }
@@ -3415,6 +3421,8 @@
       locations->SetInAt(0, Location::RequiresFpuRegister());
       if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
         DCHECK(div->InputAt(1)->IsEmittedAtUseSite());
+      } else if (div->InputAt(1)->IsConstant()) {
+        locations->SetInAt(1, Location::RequiresFpuRegister());
       } else {
         locations->SetInAt(1, Location::Any());
       }

diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index ae15fcf..9566c29 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc

@@ -93,7 +93,7 @@
   DCHECK(induction_analysis != nullptr);
 }
 
-void InductionVarRange::GetInductionRange(HInstruction* context,
+bool InductionVarRange::GetInductionRange(HInstruction* context,
                                           HInstruction* instruction,
                                           /*out*/Value* min_val,
                                           /*out*/Value* max_val,
@@ -111,12 +111,9 @@
     *min_val = GetVal(info, trip, in_body, /* is_min */ true);
     *max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min */ false));
     *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip);
-  } else {
-    // No loop to analyze.
-    *min_val = Value();
-    *max_val = Value();
-    *needs_finite_test = false;
+    return true;
   }
+  return false;  // Nothing known
 }
 
 bool InductionVarRange::RefineOuter(/*in-out*/Value* min_val, /*in-out*/Value* max_val) const {

diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index 974b8fb..3cb7b4b 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h

@@ -60,13 +60,13 @@
    * Given a context denoted by the first instruction, returns a possibly conservative
    * lower and upper bound on the instruction's value in the output parameters min_val
    * and max_val, respectively. The need_finite_test flag denotes if an additional finite-test
-   * is needed to protect the range evaluation inside its loop.
+   * is needed to protect the range evaluation inside its loop. Returns false on failure.
    */
-  void GetInductionRange(HInstruction* context,
+  bool GetInductionRange(HInstruction* context,
                          HInstruction* instruction,
-                         /*out*/Value* min_val,
-                         /*out*/Value* max_val,
-                         /*out*/bool* needs_finite_test);
+                         /*out*/ Value* min_val,
+                         /*out*/ Value* max_val,
+                         /*out*/ bool* needs_finite_test);
 
   /** Refines the values with induction of next outer loop. Returns true on change. */
   bool RefineOuter(/*in-out*/Value* min_val, /*in-out*/Value* max_val) const;
@@ -79,8 +79,8 @@
    */
   bool CanGenerateCode(HInstruction* context,
                        HInstruction* instruction,
-                       /*out*/bool* needs_finite_test,
-                       /*out*/bool* needs_taken_test);
+                       /*out*/ bool* needs_finite_test,
+                       /*out*/ bool* needs_taken_test);
 
   /**
    * Generates the actual code in the HIR for the lower and upper bound expressions on the
@@ -101,8 +101,8 @@
                          HInstruction* instruction,
                          HGraph* graph,
                          HBasicBlock* block,
-                         /*out*/HInstruction** lower,
-                         /*out*/HInstruction** upper);
+                         /*out*/ HInstruction** lower,
+                         /*out*/ HInstruction** upper);
 
   /**
    * Generates explicit taken-test for the loop in the given context. Code is generated in
@@ -113,7 +113,7 @@
   void GenerateTakenTest(HInstruction* context,
                          HGraph* graph,
                          HBasicBlock* block,
-                         /*out*/HInstruction** taken_test);
+                         /*out*/ HInstruction** taken_test);
 
  private:
   bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) const;
@@ -168,17 +168,17 @@
                     HInstruction* instruction,
                     HGraph* graph,
                     HBasicBlock* block,
-                    /*out*/HInstruction** lower,
-                    /*out*/HInstruction** upper,
-                    /*out*/HInstruction** taken_test,
-                    /*out*/bool* needs_finite_test,
-                    /*out*/bool* needs_taken_test) const;
+                    /*out*/ HInstruction** lower,
+                    /*out*/ HInstruction** upper,
+                    /*out*/ HInstruction** taken_test,
+                    /*out*/ bool* needs_finite_test,
+                    /*out*/ bool* needs_taken_test) const;
 
   bool GenerateCode(HInductionVarAnalysis::InductionInfo* info,
                     HInductionVarAnalysis::InductionInfo* trip,
                     HGraph* graph,
                     HBasicBlock* block,
-                    /*out*/HInstruction** result,
+                    /*out*/ HInstruction** result,
                     bool in_body,
                     bool is_min) const;
 

diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index a839d2d..9b91b53 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc

@@ -392,8 +392,8 @@
       << invoke_instruction->DebugName();
   // This optimization only works under JIT for now.
   DCHECK(Runtime::Current()->UseJit());
-  if (graph_->GetInstructionSet() == kMips || graph_->GetInstructionSet() == kMips64) {
-    // TODO: Support HClassTableGet for mips and mips64.
+  if (graph_->GetInstructionSet() == kMips64) {
+    // TODO: Support HClassTableGet for mips64.
     return false;
   }
   ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();

diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 7d3a723..c1e3863 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc

@@ -46,6 +46,10 @@
   bool TryReplaceWithRotateRegisterSubPattern(HBinaryOperation* op, HUShr* ushr, HShl* shl);
 
   bool TryMoveNegOnInputsAfterBinop(HBinaryOperation* binop);
+  // `op` should be either HOr or HAnd.
+  // De Morgan's laws:
+  // ~a & ~b = ~(a | b)  and  ~a | ~b = ~(a & b)
+  bool TryDeMorganNegationFactoring(HBinaryOperation* op);
   void VisitShift(HBinaryOperation* shift);
 
   void VisitSuspendCheck(HSuspendCheck* check) OVERRIDE;
@@ -164,6 +168,54 @@
   return true;
 }
 
+bool InstructionSimplifierVisitor::TryDeMorganNegationFactoring(HBinaryOperation* op) {
+  DCHECK(op->IsAnd() || op->IsOr()) << op->DebugName();
+  Primitive::Type type = op->GetType();
+  HInstruction* left = op->GetLeft();
+  HInstruction* right = op->GetRight();
+
+  // We can apply De Morgan's laws if both inputs are Not's and are only used
+  // by `op`.
+  if (left->IsNot() &&
+      right->IsNot() &&
+      left->HasOnlyOneNonEnvironmentUse() &&
+      right->HasOnlyOneNonEnvironmentUse()) {
+    // Replace code looking like
+    //    NOT nota, a
+    //    NOT notb, b
+    //    AND dst, nota, notb (respectively OR)
+    // with
+    //    OR or, a, b         (respectively AND)
+    //    NOT dest, or
+    HInstruction* src_left = left->AsNot()->GetInput();
+    HInstruction* src_right = right->AsNot()->GetInput();
+    uint32_t dex_pc = op->GetDexPc();
+
+    // Remove the negations on the inputs.
+    left->ReplaceWith(src_left);
+    right->ReplaceWith(src_right);
+    left->GetBlock()->RemoveInstruction(left);
+    right->GetBlock()->RemoveInstruction(right);
+
+    // Replace the `HAnd` or `HOr`.
+    HBinaryOperation* hbin;
+    if (op->IsAnd()) {
+      hbin = new (GetGraph()->GetArena()) HOr(type, src_left, src_right, dex_pc);
+    } else {
+      hbin = new (GetGraph()->GetArena()) HAnd(type, src_left, src_right, dex_pc);
+    }
+    HNot* hnot = new (GetGraph()->GetArena()) HNot(type, hbin, dex_pc);
+
+    op->GetBlock()->InsertInstructionBefore(hbin, op);
+    op->GetBlock()->ReplaceAndRemoveInstructionWith(op, hnot);
+
+    RecordSimplification();
+    return true;
+  }
+
+  return false;
+}
+
 void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) {
   DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr());
   HConstant* input_cst = instruction->GetConstantRight();
@@ -813,7 +865,10 @@
     //    src
     instruction->ReplaceWith(instruction->GetLeft());
     instruction->GetBlock()->RemoveInstruction(instruction);
+    return;
   }
+
+  TryDeMorganNegationFactoring(instruction);
 }
 
 void InstructionSimplifierVisitor::VisitGreaterThan(HGreaterThan* condition) {
@@ -1127,6 +1182,8 @@
     return;
   }
 
+  if (TryDeMorganNegationFactoring(instruction)) return;
+
   TryReplaceWithRotate(instruction);
 }
 
@@ -1249,6 +1306,26 @@
     return;
   }
 
+  HInstruction* left = instruction->GetLeft();
+  HInstruction* right = instruction->GetRight();
+  if (left->IsNot() &&
+      right->IsNot() &&
+      left->HasOnlyOneNonEnvironmentUse() &&
+      right->HasOnlyOneNonEnvironmentUse()) {
+    // Replace code looking like
+    //    NOT nota, a
+    //    NOT notb, b
+    //    XOR dst, nota, notb
+    // with
+    //    XOR dst, a, b
+    instruction->ReplaceInput(left->AsNot()->GetInput(), 0);
+    instruction->ReplaceInput(right->AsNot()->GetInput(), 1);
+    left->GetBlock()->RemoveInstruction(left);
+    right->GetBlock()->RemoveInstruction(right);
+    RecordSimplification();
+    return;
+  }
+
   TryReplaceWithRotate(instruction);
 }
 

diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 1376695..5dce83a 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc

@@ -780,7 +780,6 @@
   Register offset = XRegisterFrom(offset_loc);  // Long offset.
   Location trg_loc = locations->Out();
   Register trg = RegisterFrom(trg_loc, type);
-  bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
 
   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
     // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
@@ -788,19 +787,11 @@
     Register temp = temps.AcquireW();
     codegen->GenerateArrayLoadWithBakerReadBarrier(
         invoke, trg_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
-    if (is_volatile && !use_acquire_release) {
-      __ Dmb(InnerShareable, BarrierReads);
-    }
   } else {
     // Other cases.
     MemOperand mem_op(base.X(), offset);
     if (is_volatile) {
-      if (use_acquire_release) {
-        codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check */ true);
-      } else {
-        codegen->Load(type, trg, mem_op);
-        __ Dmb(InnerShareable, BarrierReads);
-      }
+      codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check */ true);
     } else {
       codegen->Load(type, trg, mem_op);
     }
@@ -914,8 +905,6 @@
   Register offset = XRegisterFrom(locations->InAt(2));  // Long offset.
   Register value = RegisterFrom(locations->InAt(3), type);
   Register source = value;
-  bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
-
   MemOperand mem_op(base.X(), offset);
 
   {
@@ -932,15 +921,7 @@
     }
 
     if (is_volatile || is_ordered) {
-      if (use_acquire_release) {
-        codegen->StoreRelease(type, source, mem_op);
-      } else {
-        __ Dmb(InnerShareable, BarrierAll);
-        codegen->Store(type, source, mem_op);
-        if (is_volatile) {
-          __ Dmb(InnerShareable, BarrierReads);
-        }
-      }
+      codegen->StoreRelease(type, source, mem_op);
     } else {
       codegen->Store(type, source, mem_op);
     }
@@ -1037,7 +1018,6 @@
 }
 
 static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM64* codegen) {
-  bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
   vixl::MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_;
 
   Register out = WRegisterFrom(locations->Out());                  // Boolean result.
@@ -1078,43 +1058,20 @@
   // result = tmp_value != 0;
 
   vixl::Label loop_head, exit_loop;
-  if (use_acquire_release) {
-    __ Bind(&loop_head);
-    // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
-    // the reference stored in the object before attempting the CAS,
-    // similar to the one in the art::Unsafe_compareAndSwapObject JNI
-    // implementation.
-    //
-    // Note that this code is not (yet) used when read barriers are
-    // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
-    DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
-    __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
-    __ Cmp(tmp_value, expected);
-    __ B(&exit_loop, ne);
-    __ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
-    __ Cbnz(tmp_32, &loop_head);
-  } else {
-    // Emit a `Dmb(InnerShareable, BarrierAll)` (DMB ISH) instruction
-    // instead of a `Dmb(InnerShareable, BarrierWrites)` (DMB ISHST)
-    // one, as the latter allows a preceding load to be delayed past
-    // the STXR instruction below.
-    __ Dmb(InnerShareable, BarrierAll);
-    __ Bind(&loop_head);
-    // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
-    // the reference stored in the object before attempting the CAS,
-    // similar to the one in the art::Unsafe_compareAndSwapObject JNI
-    // implementation.
-    //
-    // Note that this code is not (yet) used when read barriers are
-    // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
-    DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
-    __ Ldxr(tmp_value, MemOperand(tmp_ptr));
-    __ Cmp(tmp_value, expected);
-    __ B(&exit_loop, ne);
-    __ Stxr(tmp_32, value, MemOperand(tmp_ptr));
-    __ Cbnz(tmp_32, &loop_head);
-    __ Dmb(InnerShareable, BarrierAll);
-  }
+  __ Bind(&loop_head);
+  // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
+  // the reference stored in the object before attempting the CAS,
+  // similar to the one in the art::Unsafe_compareAndSwapObject JNI
+  // implementation.
+  //
+  // Note that this code is not (yet) used when read barriers are
+  // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
+  DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
+  __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
+  __ Cmp(tmp_value, expected);
+  __ B(&exit_loop, ne);
+  __ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
+  __ Cbnz(tmp_32, &loop_head);
   __ Bind(&exit_loop);
   __ Cset(out, eq);
 
@@ -1527,6 +1484,181 @@
   GenSignum(invoke->GetLocations(), /* is_long */ true,  GetVIXLAssembler());
 }
 
+static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
+  DCHECK(Primitive::IsFloatingPointType(invoke->InputAt(0)->GetType()));
+  DCHECK(Primitive::IsFloatingPointType(invoke->GetType()));
+
+  LocationSummary* const locations = new (arena) LocationSummary(invoke,
+                                                                 LocationSummary::kCall,
+                                                                 kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+
+  locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
+  locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
+}
+
+static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
+  DCHECK(Primitive::IsFloatingPointType(invoke->InputAt(0)->GetType()));
+  DCHECK(Primitive::IsFloatingPointType(invoke->InputAt(1)->GetType()));
+  DCHECK(Primitive::IsFloatingPointType(invoke->GetType()));
+
+  LocationSummary* const locations = new (arena) LocationSummary(invoke,
+                                                                 LocationSummary::kCall,
+                                                                 kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+
+  locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
+  locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
+  locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
+}
+
+static void GenFPToFPCall(HInvoke* invoke,
+                          vixl::MacroAssembler* masm,
+                          CodeGeneratorARM64* codegen,
+                          QuickEntrypointEnum entry) {
+  __ Ldr(lr, MemOperand(tr, GetThreadOffset<kArm64WordSize>(entry).Int32Value()));
+  __ Blr(lr);
+  codegen->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathCos(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathCos(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickCos);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathSin(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathSin(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickSin);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathAcos(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathAcos(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickAcos);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathAsin(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathAsin(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickAsin);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathAtan(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathAtan(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickAtan);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathCbrt(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathCbrt(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickCbrt);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathCosh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathCosh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickCosh);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathExp(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathExp(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickExp);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathExpm1(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathExpm1(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickExpm1);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathLog(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathLog(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickLog);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathLog10(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathLog10(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickLog10);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathSinh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathSinh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickSinh);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathTan(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathTan(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickTan);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathTanh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathTanh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickTanh);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathAtan2(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathAtan2(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickAtan2);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathHypot(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickHypot);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitMathNextAfter(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathNextAfter(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickNextAfter);
+}
+
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                  \
@@ -1542,24 +1674,6 @@
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 
-UNIMPLEMENTED_INTRINSIC(MathCos)
-UNIMPLEMENTED_INTRINSIC(MathSin)
-UNIMPLEMENTED_INTRINSIC(MathAcos)
-UNIMPLEMENTED_INTRINSIC(MathAsin)
-UNIMPLEMENTED_INTRINSIC(MathAtan)
-UNIMPLEMENTED_INTRINSIC(MathAtan2)
-UNIMPLEMENTED_INTRINSIC(MathCbrt)
-UNIMPLEMENTED_INTRINSIC(MathCosh)
-UNIMPLEMENTED_INTRINSIC(MathExp)
-UNIMPLEMENTED_INTRINSIC(MathExpm1)
-UNIMPLEMENTED_INTRINSIC(MathHypot)
-UNIMPLEMENTED_INTRINSIC(MathLog)
-UNIMPLEMENTED_INTRINSIC(MathLog10)
-UNIMPLEMENTED_INTRINSIC(MathNextAfter)
-UNIMPLEMENTED_INTRINSIC(MathSinh)
-UNIMPLEMENTED_INTRINSIC(MathTan)
-UNIMPLEMENTED_INTRINSIC(MathTanh)
-
 UNIMPLEMENTED_INTRINSIC(FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite)
 UNIMPLEMENTED_INTRINSIC(FloatIsNaN)

diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index cba84fa..f681d1f 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc

@@ -1429,8 +1429,7 @@
   __ LoadFromOffset(kLoadDoubleword,
                     TMP,
                     TR,
-                    QUICK_ENTRYPOINT_OFFSET(kMips64WordSize,
-                                            pStringCompareTo).Int32Value());
+                    QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, pStringCompareTo).Int32Value());
   __ Jalr(TMP);
   __ Nop();
   __ Bind(slow_path->GetExitLabel());
@@ -1583,7 +1582,7 @@
   __ LoadFromOffset(kLoadDoubleword,
                     TMP,
                     TR,
-                    QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, pIndexOf).Int32Value());
+                    QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, pIndexOf).Int32Value());
   __ Jalr(TMP);
   __ Nop();
 
@@ -1659,7 +1658,8 @@
   __ LoadFromOffset(kLoadDoubleword,
                     TMP,
                     TR,
-                    QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, pAllocStringFromBytes).Int32Value());
+                    QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize,
+                                            pAllocStringFromBytes).Int32Value());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Jalr(TMP);
   __ Nop();
@@ -1685,7 +1685,8 @@
   __ LoadFromOffset(kLoadDoubleword,
                     TMP,
                     TR,
-                    QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, pAllocStringFromChars).Int32Value());
+                    QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize,
+                                            pAllocStringFromChars).Int32Value());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Jalr(TMP);
   __ Nop();
@@ -1716,7 +1717,8 @@
   __ LoadFromOffset(kLoadDoubleword,
                     TMP,
                     TR,
-                    QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, pAllocStringFromString).Int32Value());
+                    QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize,
+                                            pAllocStringFromString).Int32Value());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Jalr(TMP);
   __ Nop();

diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index c4492c8..9a97f54 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc

@@ -55,13 +55,13 @@
         is_singleton_and_not_returned_ = false;
         return;
       }
-      if (use->IsPhi() || use->IsInvoke() ||
+      if (use->IsPhi() || use->IsSelect() || use->IsInvoke() ||
           (use->IsInstanceFieldSet() && (reference_ == use->InputAt(1))) ||
           (use->IsUnresolvedInstanceFieldSet() && (reference_ == use->InputAt(1))) ||
           (use->IsStaticFieldSet() && (reference_ == use->InputAt(1))) ||
           (use->IsUnresolvedStaticFieldSet() && (reference_ == use->InputAt(0))) ||
           (use->IsArraySet() && (reference_ == use->InputAt(2)))) {
-        // reference_ is merged to a phi, passed to a callee, or stored to heap.
+        // reference_ is merged to a phi/HSelect, passed to a callee, or stored to heap.
         // reference_ isn't the only name that can refer to its value anymore.
         is_singleton_ = false;
         is_singleton_and_not_returned_ = false;

diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index c057eca..3dda850 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc

@@ -858,7 +858,6 @@
       // At the end of the loop pre-header, the corresponding value for instruction
       // is the first input of the phi.
       HInstruction* initial = instruction->AsPhi()->InputAt(0);
-      DCHECK(initial->GetBlock()->Dominates(loop_header));
       SetRawEnvAt(i, initial);
       initial->AddEnvUseAt(this, i);
     } else {

diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc
index de85729..fc66823 100644
--- a/compiler/optimizing/optimizing_cfi_test_expected.inc
+++ b/compiler/optimizing/optimizing_cfi_test_expected.inc

@@ -198,8 +198,9 @@
 };
 static constexpr uint8_t expected_cfi_kMips64[] = {
     0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06,
-    0x4C, 0x0E, 0x40, 0x44, 0x0A, 0x44, 0x0E, 0x28, 0x4C, 0xD0, 0x44, 0xD1,
-    0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+    0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x44, 0x0E, 0x40, 0x44, 0x0A, 0x44,
+    0x0E, 0x28, 0x44, 0xF8, 0x44, 0xF9, 0x44, 0xD0, 0x44, 0xD1, 0x44, 0xDF,
+    0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
 };
 // 0x00000000: daddiu r29, r29, -40
 // 0x00000004: .cfi_def_cfa_offset: 40
@@ -210,7 +211,9 @@
 // 0x0000000c: sd r16, +16(r29)
 // 0x00000010: .cfi_offset: r16 at cfa-24
 // 0x00000010: sdc1 f25, +8(r29)
+// 0x00000014: .cfi_offset: r57 at cfa-32
 // 0x00000014: sdc1 f24, +0(r29)
+// 0x00000018: .cfi_offset: r56 at cfa-40
 // 0x00000018: daddiu r29, r29, -24
 // 0x0000001c: .cfi_def_cfa_offset: 64
 // 0x0000001c: sd r4, +0(r29)
@@ -218,7 +221,9 @@
 // 0x00000020: daddiu r29, r29, 24
 // 0x00000024: .cfi_def_cfa_offset: 40
 // 0x00000024: ldc1 f24, +0(r29)
+// 0x00000028: .cfi_restore: r56
 // 0x00000028: ldc1 f25, +8(r29)
+// 0x0000002c: .cfi_restore: r57
 // 0x0000002c: ld r16, +16(r29)
 // 0x00000030: .cfi_restore: r16
 // 0x00000030: ld r17, +24(r29)
@@ -427,9 +432,9 @@
 };
 static constexpr uint8_t expected_cfi_kMips64_adjust[] = {
     0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06,
-    0x4C, 0x0E, 0x40, 0x04, 0x14, 0x00, 0x02, 0x00, 0x0A, 0x44, 0x0E, 0x28,
-    0x4C, 0xD0, 0x44, 0xD1, 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E,
-    0x40,
+    0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x44, 0x0E, 0x40, 0x04, 0x14, 0x00,
+    0x02, 0x00, 0x0A, 0x44, 0x0E, 0x28, 0x44, 0xF8, 0x44, 0xF9, 0x44, 0xD0,
+    0x44, 0xD1, 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
 };
 // 0x00000000: daddiu r29, r29, -40
 // 0x00000004: .cfi_def_cfa_offset: 40
@@ -440,7 +445,9 @@
 // 0x0000000c: sd r16, +16(r29)
 // 0x00000010: .cfi_offset: r16 at cfa-24
 // 0x00000010: sdc1 f25, +8(r29)
+// 0x00000014: .cfi_offset: r57 at cfa-32
 // 0x00000014: sdc1 f24, +0(r29)
+// 0x00000018: .cfi_offset: r56 at cfa-40
 // 0x00000018: daddiu r29, r29, -24
 // 0x0000001c: .cfi_def_cfa_offset: 64
 // 0x0000001c: sd r4, +0(r29)
@@ -454,7 +461,9 @@
 // 0x00020030: daddiu r29, r29, 24
 // 0x00020034: .cfi_def_cfa_offset: 40
 // 0x00020034: ldc1 f24, +0(r29)
+// 0x00020038: .cfi_restore: r56
 // 0x00020038: ldc1 f25, +8(r29)
+// 0x0002003c: .cfi_restore: r57
 // 0x0002003c: ld r16, +16(r29)
 // 0x00020040: .cfi_restore: r16
 // 0x00020040: ld r17, +24(r29)

diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 7ed3c84..1dd3508 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc

@@ -174,6 +174,38 @@
   ComputeLiveInAndLiveOutSets();
 }
 
+static void RecursivelyProcessInputs(HInstruction* current,
+                                     HInstruction* actual_user,
+                                     BitVector* live_in) {
+  for (size_t i = 0, e = current->InputCount(); i < e; ++i) {
+    HInstruction* input = current->InputAt(i);
+    bool has_in_location = current->GetLocations()->InAt(i).IsValid();
+    bool has_out_location = input->GetLocations()->Out().IsValid();
+
+    if (has_in_location) {
+      DCHECK(has_out_location)
+          << "Instruction " << current->DebugName() << current->GetId()
+          << " expects an input value at index " << i << " but "
+          << input->DebugName() << input->GetId() << " does not produce one.";
+      DCHECK(input->HasSsaIndex());
+      // `input` generates a result used by `current`. Add use and update
+      // the live-in set.
+      input->GetLiveInterval()->AddUse(current, /* environment */ nullptr, i, actual_user);
+      live_in->SetBit(input->GetSsaIndex());
+    } else if (has_out_location) {
+      // `input` generates a result but it is not used by `current`.
+    } else {
+      // `input` is inlined into `current`. Walk over its inputs and record
+      // uses at `current`.
+      DCHECK(input->IsEmittedAtUseSite());
+      // Check that the inlined input is not a phi. Recursing on loop phis could
+      // lead to an infinite loop.
+      DCHECK(!input->IsPhi());
+      RecursivelyProcessInputs(input, actual_user, live_in);
+    }
+  }
+}
+
 void SsaLivenessAnalysis::ComputeLiveRanges() {
   // Do a post order visit, adding inputs of instructions live in the block where
   // that instruction is defined, and killing instructions that are being visited.
@@ -261,35 +293,7 @@
           DCHECK(!current->HasEnvironmentUses());
         }
       } else {
-        for (size_t i = 0, e = current->InputCount(); i < e; ++i) {
-          HInstruction* input = current->InputAt(i);
-          bool has_in_location = current->GetLocations()->InAt(i).IsValid();
-          bool has_out_location = input->GetLocations()->Out().IsValid();
-
-          if (has_in_location) {
-            DCHECK(has_out_location);
-            DCHECK(input->HasSsaIndex());
-            // `Input` generates a result used by `current`. Add use and update
-            // the live-in set.
-            input->GetLiveInterval()->AddUse(current, /* environment */ nullptr, i);
-            live_in->SetBit(input->GetSsaIndex());
-          } else if (has_out_location) {
-            // `Input` generates a result but it is not used by `current`.
-          } else {
-            // `Input` is inlined into `current`. Walk over its inputs and record
-            // uses at `current`.
-            DCHECK(input->IsEmittedAtUseSite());
-            for (size_t i2 = 0, e2 = input->InputCount(); i2 < e2; ++i2) {
-              HInstruction* inlined_input = input->InputAt(i2);
-              DCHECK(inlined_input->HasSsaIndex()) << "Recursive inlining not allowed.";
-              if (input->GetLocations()->InAt(i2).IsValid()) {
-                live_in->SetBit(inlined_input->GetSsaIndex());
-                inlined_input->GetLiveInterval()->AddUse(
-                    /* owner */ input, /* environment */ nullptr, i2, /* actual_user */ current);
-              }
-            }
-          }
-        }
+        RecursivelyProcessInputs(current, current, live_in);
       }
     }
 

diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index f9ff2df..ab480ca 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc

@@ -300,10 +300,17 @@
   EmitRtd(0x1f, rt, rd, 0x5, 0x24);
 }
 
-void Mips64Assembler::Dext(GpuRegister rt, GpuRegister rs, int pos, int size_less_one) {
-  DCHECK(0 <= pos && pos < 32) << pos;
-  DCHECK(0 <= size_less_one && size_less_one < 32) << size_less_one;
-  EmitR(0x1f, rs, rt, static_cast<GpuRegister>(size_less_one), pos, 3);
+void Mips64Assembler::Dext(GpuRegister rt, GpuRegister rs, int pos, int size) {
+  CHECK(IsUint<5>(pos)) << pos;
+  CHECK(IsUint<5>(size - 1)) << size;
+  EmitR(0x1f, rs, rt, static_cast<GpuRegister>(size - 1), pos, 0x3);
+}
+
+void Mips64Assembler::Dinsu(GpuRegister rt, GpuRegister rs, int pos, int size) {
+  CHECK(IsUint<5>(pos - 32)) << pos;
+  CHECK(IsUint<5>(size - 1)) << size;
+  CHECK(IsUint<5>(pos + size - 33)) << pos << " + " << size;
+  EmitR(0x1f, rs, rt, static_cast<GpuRegister>(pos + size - 33), pos - 32, 0x6);
 }
 
 void Mips64Assembler::Wsbh(GpuRegister rd, GpuRegister rt) {
@@ -311,22 +318,22 @@
 }
 
 void Mips64Assembler::Sc(GpuRegister rt, GpuRegister base, int16_t imm9) {
-  DCHECK((-256 <= imm9) && (imm9 < 256));
+  CHECK(IsInt<9>(imm9));
   EmitI(0x1f, base, rt, ((imm9 & 0x1FF) << 7) | 0x26);
 }
 
 void Mips64Assembler::Scd(GpuRegister rt, GpuRegister base, int16_t imm9) {
-  DCHECK((-256 <= imm9) && (imm9 < 256));
+  CHECK(IsInt<9>(imm9));
   EmitI(0x1f, base, rt, ((imm9 & 0x1FF) << 7) | 0x27);
 }
 
 void Mips64Assembler::Ll(GpuRegister rt, GpuRegister base, int16_t imm9) {
-  DCHECK((-256 <= imm9) && (imm9 < 256));
+  CHECK(IsInt<9>(imm9));
   EmitI(0x1f, base, rt, ((imm9 & 0x1FF) << 7) | 0x36);
 }
 
 void Mips64Assembler::Lld(GpuRegister rt, GpuRegister base, int16_t imm9) {
-  DCHECK((-256 <= imm9) && (imm9 < 256));
+  CHECK(IsInt<9>(imm9));
   EmitI(0x1f, base, rt, ((imm9 & 0x1FF) << 7) | 0x37);
 }
 
@@ -967,10 +974,18 @@
   EmitFR(0x11, 0x00, static_cast<FpuRegister>(rt), fs, static_cast<FpuRegister>(0), 0x0);
 }
 
+void Mips64Assembler::Mfhc1(GpuRegister rt, FpuRegister fs) {
+  EmitFR(0x11, 0x03, static_cast<FpuRegister>(rt), fs, static_cast<FpuRegister>(0), 0x0);
+}
+
 void Mips64Assembler::Mtc1(GpuRegister rt, FpuRegister fs) {
   EmitFR(0x11, 0x04, static_cast<FpuRegister>(rt), fs, static_cast<FpuRegister>(0), 0x0);
 }
 
+void Mips64Assembler::Mthc1(GpuRegister rt, FpuRegister fs) {
+  EmitFR(0x11, 0x07, static_cast<FpuRegister>(rt), fs, static_cast<FpuRegister>(0), 0x0);
+}
+
 void Mips64Assembler::Dmfc1(GpuRegister rt, FpuRegister fs) {
   EmitFR(0x11, 0x01, static_cast<FpuRegister>(rt), fs, static_cast<FpuRegister>(0), 0x0);
 }
@@ -1787,11 +1802,13 @@
 
 void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base,
                                      int32_t offset) {
-  if (!IsInt<16>(offset)) {
-    LoadConst32(AT, offset);
+  if (!IsInt<16>(offset) ||
+      (type == kLoadDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
+       !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
+    LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
     Daddu(AT, AT, base);
     base = AT;
-    offset = 0;
+    offset &= (kMips64DoublewordSize - 1);
   }
 
   switch (type) {
@@ -1808,32 +1825,51 @@
       Lhu(reg, base, offset);
       break;
     case kLoadWord:
+      CHECK_ALIGNED(offset, kMips64WordSize);
       Lw(reg, base, offset);
       break;
     case kLoadUnsignedWord:
+      CHECK_ALIGNED(offset, kMips64WordSize);
       Lwu(reg, base, offset);
       break;
     case kLoadDoubleword:
-      Ld(reg, base, offset);
+      if (!IsAligned<kMips64DoublewordSize>(offset)) {
+        CHECK_ALIGNED(offset, kMips64WordSize);
+        Lwu(reg, base, offset);
+        Lwu(TMP2, base, offset + kMips64WordSize);
+        Dinsu(reg, TMP2, 32, 32);
+      } else {
+        Ld(reg, base, offset);
+      }
       break;
   }
 }
 
 void Mips64Assembler::LoadFpuFromOffset(LoadOperandType type, FpuRegister reg, GpuRegister base,
                                         int32_t offset) {
-  if (!IsInt<16>(offset)) {
-    LoadConst32(AT, offset);
+  if (!IsInt<16>(offset) ||
+      (type == kLoadDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
+       !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
+    LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
     Daddu(AT, AT, base);
     base = AT;
-    offset = 0;
+    offset &= (kMips64DoublewordSize - 1);
   }
 
   switch (type) {
     case kLoadWord:
+      CHECK_ALIGNED(offset, kMips64WordSize);
       Lwc1(reg, base, offset);
       break;
     case kLoadDoubleword:
-      Ldc1(reg, base, offset);
+      if (!IsAligned<kMips64DoublewordSize>(offset)) {
+        CHECK_ALIGNED(offset, kMips64WordSize);
+        Lwc1(reg, base, offset);
+        Lw(TMP2, base, offset + kMips64WordSize);
+        Mthc1(TMP2, reg);
+      } else {
+        Ldc1(reg, base, offset);
+      }
       break;
     default:
       LOG(FATAL) << "UNREACHABLE";
@@ -1869,11 +1905,13 @@
 
 void Mips64Assembler::StoreToOffset(StoreOperandType type, GpuRegister reg, GpuRegister base,
                                     int32_t offset) {
-  if (!IsInt<16>(offset)) {
-    LoadConst32(AT, offset);
+  if (!IsInt<16>(offset) ||
+      (type == kStoreDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
+       !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
+    LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
     Daddu(AT, AT, base);
     base = AT;
-    offset = 0;
+    offset &= (kMips64DoublewordSize - 1);
   }
 
   switch (type) {
@@ -1884,10 +1922,18 @@
       Sh(reg, base, offset);
       break;
     case kStoreWord:
+      CHECK_ALIGNED(offset, kMips64WordSize);
       Sw(reg, base, offset);
       break;
     case kStoreDoubleword:
-      Sd(reg, base, offset);
+      if (!IsAligned<kMips64DoublewordSize>(offset)) {
+        CHECK_ALIGNED(offset, kMips64WordSize);
+        Sw(reg, base, offset);
+        Dsrl32(TMP2, reg, 0);
+        Sw(TMP2, base, offset + kMips64WordSize);
+      } else {
+        Sd(reg, base, offset);
+      }
       break;
     default:
       LOG(FATAL) << "UNREACHABLE";
@@ -1896,19 +1942,29 @@
 
 void Mips64Assembler::StoreFpuToOffset(StoreOperandType type, FpuRegister reg, GpuRegister base,
                                        int32_t offset) {
-  if (!IsInt<16>(offset)) {
-    LoadConst32(AT, offset);
+  if (!IsInt<16>(offset) ||
+      (type == kStoreDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
+       !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
+    LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
     Daddu(AT, AT, base);
     base = AT;
-    offset = 0;
+    offset &= (kMips64DoublewordSize - 1);
   }
 
   switch (type) {
     case kStoreWord:
+      CHECK_ALIGNED(offset, kMips64WordSize);
       Swc1(reg, base, offset);
       break;
     case kStoreDoubleword:
-      Sdc1(reg, base, offset);
+      if (!IsAligned<kMips64DoublewordSize>(offset)) {
+        CHECK_ALIGNED(offset, kMips64WordSize);
+        Mfhc1(TMP2, reg);
+        Swc1(reg, base, offset);
+        Sw(TMP2, base, offset + kMips64WordSize);
+      } else {
+        Sdc1(reg, base, offset);
+      }
       break;
     default:
       LOG(FATAL) << "UNREACHABLE";
@@ -2053,7 +2109,7 @@
   StoreToOffset(kStoreWord, scratch.AsGpuRegister(), SP, dest.Int32Value());
 }
 
-void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs,
+void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs,
                                                  FrameOffset fr_offs,
                                                  ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
@@ -2062,7 +2118,7 @@
   StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, thr_offs.Int32Value());
 }
 
-void Mips64Assembler::StoreStackPointerToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs) {
+void Mips64Assembler::StoreStackPointerToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs) {
   StoreToOffset(kStoreDoubleword, SP, S1, thr_offs.Int32Value());
 }
 
@@ -2080,7 +2136,7 @@
 }
 
 void Mips64Assembler::LoadFromThread64(ManagedRegister mdest,
-                                       ThreadOffset<kMipsDoublewordSize> src,
+                                       ThreadOffset<kMips64DoublewordSize> src,
                                        size_t size) {
   return EmitLoad(mdest, S1, src.Int32Value(), size);
 }
@@ -2102,7 +2158,7 @@
     // Negate the 32-bit ref
     Dsubu(dest.AsGpuRegister(), ZERO, dest.AsGpuRegister());
     // And constrain it to 32 bits (zero-extend into bits 32 through 63) as on Arm64 and x86/64
-    Dext(dest.AsGpuRegister(), dest.AsGpuRegister(), 0, 31);
+    Dext(dest.AsGpuRegister(), dest.AsGpuRegister(), 0, 32);
   }
 }
 
@@ -2115,7 +2171,7 @@
 }
 
 void Mips64Assembler::LoadRawPtrFromThread64(ManagedRegister mdest,
-                                             ThreadOffset<kMipsDoublewordSize> offs) {
+                                             ThreadOffset<kMips64DoublewordSize> offs) {
   Mips64ManagedRegister dest = mdest.AsMips64();
   CHECK(dest.IsGpuRegister());
   LoadFromOffset(kLoadDoubleword, dest.AsGpuRegister(), S1, offs.Int32Value());
@@ -2160,7 +2216,7 @@
 }
 
 void Mips64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs,
-                                             ThreadOffset<kMipsDoublewordSize> thr_offs,
+                                             ThreadOffset<kMips64DoublewordSize> thr_offs,
                                              ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
   CHECK(scratch.IsGpuRegister()) << scratch;
@@ -2168,7 +2224,7 @@
   StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), SP, fr_offs.Int32Value());
 }
 
-void Mips64Assembler::CopyRawPtrToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs,
+void Mips64Assembler::CopyRawPtrToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs,
                                            FrameOffset fr_offs,
                                            ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
@@ -2372,7 +2428,7 @@
   // TODO: place reference map on call
 }
 
-void Mips64Assembler::CallFromThread64(ThreadOffset<kMipsDoublewordSize> offset ATTRIBUTE_UNUSED,
+void Mips64Assembler::CallFromThread64(ThreadOffset<kMips64DoublewordSize> offset ATTRIBUTE_UNUSED,
                                        ManagedRegister mscratch ATTRIBUTE_UNUSED) {
   UNIMPLEMENTED(FATAL) << "No MIPS64 implementation";
 }
@@ -2392,7 +2448,7 @@
   LoadFromOffset(kLoadDoubleword,
                  scratch.AsGpuRegister(),
                  S1,
-                 Thread::ExceptionOffset<kMipsDoublewordSize>().Int32Value());
+                 Thread::ExceptionOffset<kMips64DoublewordSize>().Int32Value());
   Bnezc(scratch.AsGpuRegister(), exception_blocks_.back().Entry());
 }
 
@@ -2409,7 +2465,7 @@
   LoadFromOffset(kLoadDoubleword,
                  T9,
                  S1,
-                 QUICK_ENTRYPOINT_OFFSET(kMipsDoublewordSize, pDeliverException).Int32Value());
+                 QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, pDeliverException).Int32Value());
   Jr(T9);
   Nop();
 

diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 3262640..71f5e00 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h

@@ -31,7 +31,8 @@
 namespace art {
 namespace mips64 {
 
-static constexpr size_t kMipsDoublewordSize = 8;
+static constexpr size_t kMips64WordSize = 4;
+static constexpr size_t kMips64DoublewordSize = 8;
 
 enum LoadOperandType {
   kLoadSignedByte,
@@ -151,7 +152,8 @@
   void Seh(GpuRegister rd, GpuRegister rt);
   void Dsbh(GpuRegister rd, GpuRegister rt);
   void Dshd(GpuRegister rd, GpuRegister rt);
-  void Dext(GpuRegister rs, GpuRegister rt, int pos, int size_less_one);  // MIPS64
+  void Dext(GpuRegister rs, GpuRegister rt, int pos, int size);  // MIPS64
+  void Dinsu(GpuRegister rt, GpuRegister rs, int pos, int size);  // MIPS64
   void Wsbh(GpuRegister rd, GpuRegister rt);
   void Sc(GpuRegister rt, GpuRegister base, int16_t imm9 = 0);
   void Scd(GpuRegister rt, GpuRegister base, int16_t imm9 = 0);
@@ -301,7 +303,9 @@
   void Cvtdl(FpuRegister fd, FpuRegister fs);
 
   void Mfc1(GpuRegister rt, FpuRegister fs);
+  void Mfhc1(GpuRegister rt, FpuRegister fs);
   void Mtc1(GpuRegister rt, FpuRegister fs);
+  void Mthc1(GpuRegister rt, FpuRegister fs);
   void Dmfc1(GpuRegister rt, FpuRegister fs);  // MIPS64
   void Dmtc1(GpuRegister rt, FpuRegister fs);  // MIPS64
   void Lwc1(FpuRegister ft, GpuRegister rs, uint16_t imm16);
@@ -378,10 +382,10 @@
 
   void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) OVERRIDE;
 
-  void StoreStackOffsetToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs,
+  void StoreStackOffsetToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs, FrameOffset fr_offs,
                                   ManagedRegister mscratch) OVERRIDE;
 
-  void StoreStackPointerToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs) OVERRIDE;
+  void StoreStackPointerToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs) OVERRIDE;
 
   void StoreSpanning(FrameOffset dest, ManagedRegister msrc, FrameOffset in_off,
                      ManagedRegister mscratch) OVERRIDE;
@@ -390,7 +394,7 @@
   void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE;
 
   void LoadFromThread64(ManagedRegister mdest,
-                        ThreadOffset<kMipsDoublewordSize> src,
+                        ThreadOffset<kMips64DoublewordSize> src,
                         size_t size) OVERRIDE;
 
   void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
@@ -401,15 +405,15 @@
   void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE;
 
   void LoadRawPtrFromThread64(ManagedRegister mdest,
-                              ThreadOffset<kMipsDoublewordSize> offs) OVERRIDE;
+                              ThreadOffset<kMips64DoublewordSize> offs) OVERRIDE;
 
   // Copying routines.
   void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE;
 
-  void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<kMipsDoublewordSize> thr_offs,
+  void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<kMips64DoublewordSize> thr_offs,
                               ManagedRegister mscratch) OVERRIDE;
 
-  void CopyRawPtrToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs,
+  void CopyRawPtrToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs, FrameOffset fr_offs,
                             ManagedRegister mscratch) OVERRIDE;
 
   void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) OVERRIDE;
@@ -466,7 +470,7 @@
   // Call to address held at [base+offset].
   void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE;
   void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE;
-  void CallFromThread64(ThreadOffset<kMipsDoublewordSize> offset,
+  void CallFromThread64(ThreadOffset<kMips64DoublewordSize> offset,
                         ManagedRegister mscratch) OVERRIDE;
 
   // Generate code to check if Thread::Current()->exception_ is non-null

diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index 7d79be2..b758d64 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc

@@ -543,6 +543,30 @@
   DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncLD, "trunc.l.d ${reg1}, ${reg2}"), "trunc.l.d");
 }
 
+TEST_F(AssemblerMIPS64Test, Mfc1) {
+  DriverStr(RepeatRF(&mips64::Mips64Assembler::Mfc1, "mfc1 ${reg1}, ${reg2}"), "Mfc1");
+}
+
+TEST_F(AssemblerMIPS64Test, Mfhc1) {
+  DriverStr(RepeatRF(&mips64::Mips64Assembler::Mfhc1, "mfhc1 ${reg1}, ${reg2}"), "Mfhc1");
+}
+
+TEST_F(AssemblerMIPS64Test, Mtc1) {
+  DriverStr(RepeatRF(&mips64::Mips64Assembler::Mtc1, "mtc1 ${reg1}, ${reg2}"), "Mtc1");
+}
+
+TEST_F(AssemblerMIPS64Test, Mthc1) {
+  DriverStr(RepeatRF(&mips64::Mips64Assembler::Mthc1, "mthc1 ${reg1}, ${reg2}"), "Mthc1");
+}
+
+TEST_F(AssemblerMIPS64Test, Dmfc1) {
+  DriverStr(RepeatRF(&mips64::Mips64Assembler::Dmfc1, "dmfc1 ${reg1}, ${reg2}"), "Dmfc1");
+}
+
+TEST_F(AssemblerMIPS64Test, Dmtc1) {
+  DriverStr(RepeatRF(&mips64::Mips64Assembler::Dmtc1, "dmtc1 ${reg1}, ${reg2}"), "Dmtc1");
+}
+
 ////////////////
 // CALL / JMP //
 ////////////////
@@ -827,6 +851,44 @@
   DriverStr(RepeatRR(&mips64::Mips64Assembler::Dshd, "dshd ${reg1}, ${reg2}"), "dshd");
 }
 
+TEST_F(AssemblerMIPS64Test, Dext) {
+  std::vector<mips64::GpuRegister*> reg1_registers = GetRegisters();
+  std::vector<mips64::GpuRegister*> reg2_registers = GetRegisters();
+  WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * 33 * 16);
+  std::ostringstream expected;
+  for (mips64::GpuRegister* reg1 : reg1_registers) {
+    for (mips64::GpuRegister* reg2 : reg2_registers) {
+      for (int32_t pos = 0; pos < 32; pos++) {
+        for (int32_t size = 1; size <= 32; size++) {
+          __ Dext(*reg1, *reg2, pos, size);
+          expected << "dext $" << *reg1 << ", $" << *reg2 << ", " << pos << ", " << size << "\n";
+        }
+      }
+    }
+  }
+
+  DriverStr(expected.str(), "Dext");
+}
+
+TEST_F(AssemblerMIPS64Test, Dinsu) {
+  std::vector<mips64::GpuRegister*> reg1_registers = GetRegisters();
+  std::vector<mips64::GpuRegister*> reg2_registers = GetRegisters();
+  WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * 33 * 16);
+  std::ostringstream expected;
+  for (mips64::GpuRegister* reg1 : reg1_registers) {
+    for (mips64::GpuRegister* reg2 : reg2_registers) {
+      for (int32_t pos = 32; pos < 64; pos++) {
+        for (int32_t size = 1; pos + size <= 64; size++) {
+          __ Dinsu(*reg1, *reg2, pos, size);
+          expected << "dinsu $" << *reg1 << ", $" << *reg2 << ", " << pos << ", " << size << "\n";
+        }
+      }
+    }
+  }
+
+  DriverStr(expected.str(), "Dinsu");
+}
+
 TEST_F(AssemblerMIPS64Test, Wsbh) {
   DriverStr(RepeatRR(&mips64::Mips64Assembler::Wsbh, "wsbh ${reg1}, ${reg2}"), "wsbh");
 }
@@ -942,4 +1004,638 @@
   DriverStr(RepeatRR(&mips64::Mips64Assembler::Dclo, "dclo ${reg1}, ${reg2}"), "dclo");
 }
 
+TEST_F(AssemblerMIPS64Test, LoadFromOffset) {
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A0, 0);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 1);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 256);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 1000);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0x7FFF);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0x8000);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0x8001);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0x10000);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0x12345678);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, -256);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, -32768);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A0, 0);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 1);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 256);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 1000);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0x7FFF);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0x8000);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0x8001);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0x10000);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0x12345678);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, -256);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, -32768);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A0, 0);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 2);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 256);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 1000);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0x7FFE);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0x8000);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0x8002);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0x10000);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0x12345678);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, -256);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, -32768);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A0, 0);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 2);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 256);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 1000);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0x7FFE);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0x8000);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0x8002);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0x10000);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0x12345678);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, -256);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, -32768);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A0, 0);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 4);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 256);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 1000);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0x7FFC);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0x8000);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0x8004);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0x10000);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0x12345678);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, -256);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, -32768);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A0, 0);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 4);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 256);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 1000);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0x7FFC);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0x8000);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0x8004);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0x10000);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0x12345678);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, -256);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, -32768);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A0, 0);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 4);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 256);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 1000);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x7FFC);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x8000);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x8004);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x10000);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x12345678);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, -256);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, -32768);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  const char* expected =
+      "lb $a0, 0($a0)\n"
+      "lb $a0, 0($a1)\n"
+      "lb $a0, 1($a1)\n"
+      "lb $a0, 256($a1)\n"
+      "lb $a0, 1000($a1)\n"
+      "lb $a0, 0x7FFF($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lb $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lb $a0, 1($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "lb $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "lb $a0, 0($at)\n"
+      "lb $a0, -256($a1)\n"
+      "lb $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "lb $a0, 0($at)\n"
+
+      "lbu $a0, 0($a0)\n"
+      "lbu $a0, 0($a1)\n"
+      "lbu $a0, 1($a1)\n"
+      "lbu $a0, 256($a1)\n"
+      "lbu $a0, 1000($a1)\n"
+      "lbu $a0, 0x7FFF($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lbu $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lbu $a0, 1($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "lbu $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "lbu $a0, 0($at)\n"
+      "lbu $a0, -256($a1)\n"
+      "lbu $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "lbu $a0, 0($at)\n"
+
+      "lh $a0, 0($a0)\n"
+      "lh $a0, 0($a1)\n"
+      "lh $a0, 2($a1)\n"
+      "lh $a0, 256($a1)\n"
+      "lh $a0, 1000($a1)\n"
+      "lh $a0, 0x7FFE($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lh $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lh $a0, 2($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "lh $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "lh $a0, 0($at)\n"
+      "lh $a0, -256($a1)\n"
+      "lh $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "lh $a0, 0($at)\n"
+
+      "lhu $a0, 0($a0)\n"
+      "lhu $a0, 0($a1)\n"
+      "lhu $a0, 2($a1)\n"
+      "lhu $a0, 256($a1)\n"
+      "lhu $a0, 1000($a1)\n"
+      "lhu $a0, 0x7FFE($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lhu $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lhu $a0, 2($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "lhu $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "lhu $a0, 0($at)\n"
+      "lhu $a0, -256($a1)\n"
+      "lhu $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "lhu $a0, 0($at)\n"
+
+      "lw $a0, 0($a0)\n"
+      "lw $a0, 0($a1)\n"
+      "lw $a0, 4($a1)\n"
+      "lw $a0, 256($a1)\n"
+      "lw $a0, 1000($a1)\n"
+      "lw $a0, 0x7FFC($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lw $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lw $a0, 4($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "lw $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "lw $a0, 0($at)\n"
+      "lw $a0, -256($a1)\n"
+      "lw $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "lw $a0, 0($at)\n"
+
+      "lwu $a0, 0($a0)\n"
+      "lwu $a0, 0($a1)\n"
+      "lwu $a0, 4($a1)\n"
+      "lwu $a0, 256($a1)\n"
+      "lwu $a0, 1000($a1)\n"
+      "lwu $a0, 0x7FFC($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lwu $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lwu $a0, 4($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "lwu $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "lwu $a0, 0($at)\n"
+      "lwu $a0, -256($a1)\n"
+      "lwu $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "lwu $a0, 0($at)\n"
+
+      "ld $a0, 0($a0)\n"
+      "ld $a0, 0($a1)\n"
+      "lwu $a0, 4($a1)\n"
+      "lwu $t3, 8($a1)\n"
+      "dins $a0, $t3, 32, 32\n"
+      "ld $a0, 256($a1)\n"
+      "ld $a0, 1000($a1)\n"
+      "ori $at, $zero, 0x7FF8\n"
+      "daddu $at, $at, $a1\n"
+      "lwu $a0, 4($at)\n"
+      "lwu $t3, 8($at)\n"
+      "dins $a0, $t3, 32, 32\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "ld $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "lwu $a0, 4($at)\n"
+      "lwu $t3, 8($at)\n"
+      "dins $a0, $t3, 32, 32\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "ld $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "ld $a0, 0($at)\n"
+      "ld $a0, -256($a1)\n"
+      "ld $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "ld $a0, 0($at)\n";
+  DriverStr(expected, "LoadFromOffset");
+}
+
+TEST_F(AssemblerMIPS64Test, LoadFpuFromOffset) {
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 0);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 4);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 256);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 0x7FFC);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 0x8000);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 0x8004);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 0x10000);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 0x12345678);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, -256);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, -32768);
+  __ LoadFpuFromOffset(mips64::kLoadWord, mips64::F0, mips64::A0, 0xABCDEF00);
+
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 0);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 4);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 256);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 0x7FFC);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 0x8000);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 0x8004);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 0x10000);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 0x12345678);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, -256);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, -32768);
+  __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 0xABCDEF00);
+
+  const char* expected =
+      "lwc1 $f0, 0($a0)\n"
+      "lwc1 $f0, 4($a0)\n"
+      "lwc1 $f0, 256($a0)\n"
+      "lwc1 $f0, 0x7FFC($a0)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a0\n"
+      "lwc1 $f0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a0\n"
+      "lwc1 $f0, 4($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a0\n"
+      "lwc1 $f0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a0\n"
+      "lwc1 $f0, 0($at)\n"
+      "lwc1 $f0, -256($a0)\n"
+      "lwc1 $f0, -32768($a0)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a0\n"
+      "lwc1 $f0, 0($at)\n"
+
+      "ldc1 $f0, 0($a0)\n"
+      "lwc1 $f0, 4($a0)\n"
+      "lw $t3, 8($a0)\n"
+      "mthc1 $t3, $f0\n"
+      "ldc1 $f0, 256($a0)\n"
+      "ori $at, $zero, 0x7FF8\n"
+      "daddu $at, $at, $a0\n"
+      "lwc1 $f0, 4($at)\n"
+      "lw $t3, 8($at)\n"
+      "mthc1 $t3, $f0\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a0\n"
+      "ldc1 $f0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a0\n"
+      "lwc1 $f0, 4($at)\n"
+      "lw $t3, 8($at)\n"
+      "mthc1 $t3, $f0\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a0\n"
+      "ldc1 $f0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a0\n"
+      "ldc1 $f0, 0($at)\n"
+      "ldc1 $f0, -256($a0)\n"
+      "ldc1 $f0, -32768($a0)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a0\n"
+      "ldc1 $f0, 0($at)\n";
+  DriverStr(expected, "LoadFpuFromOffset");
+}
+
+TEST_F(AssemblerMIPS64Test, StoreToOffset) {
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A0, 0);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 0);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 1);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 256);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 1000);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 0x7FFF);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 0x8000);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 0x8001);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 0x10000);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 0x12345678);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, -256);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, -32768);
+  __ StoreToOffset(mips64::kStoreByte, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A0, 0);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 0);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 2);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 256);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 1000);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 0x7FFE);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 0x8000);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 0x8002);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 0x10000);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 0x12345678);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, -256);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, -32768);
+  __ StoreToOffset(mips64::kStoreHalfword, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A0, 0);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 0);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 4);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 256);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 1000);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 0x7FFC);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 0x8000);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 0x8004);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 0x10000);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 0x12345678);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, -256);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, -32768);
+  __ StoreToOffset(mips64::kStoreWord, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A0, 0);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 4);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 256);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 1000);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0x7FFC);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0x8000);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0x8004);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0x10000);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0x12345678);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, -256);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, -32768);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0xABCDEF00);
+
+  const char* expected =
+      "sb $a0, 0($a0)\n"
+      "sb $a0, 0($a1)\n"
+      "sb $a0, 1($a1)\n"
+      "sb $a0, 256($a1)\n"
+      "sb $a0, 1000($a1)\n"
+      "sb $a0, 0x7FFF($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "sb $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "sb $a0, 1($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "sb $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "sb $a0, 0($at)\n"
+      "sb $a0, -256($a1)\n"
+      "sb $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "sb $a0, 0($at)\n"
+
+      "sh $a0, 0($a0)\n"
+      "sh $a0, 0($a1)\n"
+      "sh $a0, 2($a1)\n"
+      "sh $a0, 256($a1)\n"
+      "sh $a0, 1000($a1)\n"
+      "sh $a0, 0x7FFE($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "sh $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "sh $a0, 2($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "sh $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "sh $a0, 0($at)\n"
+      "sh $a0, -256($a1)\n"
+      "sh $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "sh $a0, 0($at)\n"
+
+      "sw $a0, 0($a0)\n"
+      "sw $a0, 0($a1)\n"
+      "sw $a0, 4($a1)\n"
+      "sw $a0, 256($a1)\n"
+      "sw $a0, 1000($a1)\n"
+      "sw $a0, 0x7FFC($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "sw $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "sw $a0, 4($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "sw $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "sw $a0, 0($at)\n"
+      "sw $a0, -256($a1)\n"
+      "sw $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "sw $a0, 0($at)\n"
+
+      "sd $a0, 0($a0)\n"
+      "sd $a0, 0($a1)\n"
+      "sw $a0, 4($a1)\n"
+      "dsrl32 $t3, $a0, 0\n"
+      "sw $t3, 8($a1)\n"
+      "sd $a0, 256($a1)\n"
+      "sd $a0, 1000($a1)\n"
+      "ori $at, $zero, 0x7FF8\n"
+      "daddu $at, $at, $a1\n"
+      "sw $a0, 4($at)\n"
+      "dsrl32 $t3, $a0, 0\n"
+      "sw $t3, 8($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "sd $a0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a1\n"
+      "sw $a0, 4($at)\n"
+      "dsrl32 $t3, $a0, 0\n"
+      "sw $t3, 8($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a1\n"
+      "sd $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a1\n"
+      "sd $a0, 0($at)\n"
+      "sd $a0, -256($a1)\n"
+      "sd $a0, -32768($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a1\n"
+      "sd $a0, 0($at)\n";
+  DriverStr(expected, "StoreToOffset");
+}
+
+TEST_F(AssemblerMIPS64Test, StoreFpuToOffset) {
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 0);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 4);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 256);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 0x7FFC);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 0x8000);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 0x8004);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 0x10000);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 0x12345678);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, -256);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, -32768);
+  __ StoreFpuToOffset(mips64::kStoreWord, mips64::F0, mips64::A0, 0xABCDEF00);
+
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 0);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 4);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 256);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 0x7FFC);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 0x8000);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 0x8004);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 0x10000);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 0x12345678);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, -256);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, -32768);
+  __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 0xABCDEF00);
+
+  const char* expected =
+      "swc1 $f0, 0($a0)\n"
+      "swc1 $f0, 4($a0)\n"
+      "swc1 $f0, 256($a0)\n"
+      "swc1 $f0, 0x7FFC($a0)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a0\n"
+      "swc1 $f0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a0\n"
+      "swc1 $f0, 4($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a0\n"
+      "swc1 $f0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a0\n"
+      "swc1 $f0, 0($at)\n"
+      "swc1 $f0, -256($a0)\n"
+      "swc1 $f0, -32768($a0)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a0\n"
+      "swc1 $f0, 0($at)\n"
+
+      "sdc1 $f0, 0($a0)\n"
+      "mfhc1 $t3, $f0\n"
+      "swc1 $f0, 4($a0)\n"
+      "sw $t3, 8($a0)\n"
+      "sdc1 $f0, 256($a0)\n"
+      "ori $at, $zero, 0x7FF8\n"
+      "daddu $at, $at, $a0\n"
+      "mfhc1 $t3, $f0\n"
+      "swc1 $f0, 4($at)\n"
+      "sw $t3, 8($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a0\n"
+      "sdc1 $f0, 0($at)\n"
+      "ori $at, $zero, 0x8000\n"
+      "daddu $at, $at, $a0\n"
+      "mfhc1 $t3, $f0\n"
+      "swc1 $f0, 4($at)\n"
+      "sw $t3, 8($at)\n"
+      "lui $at, 1\n"
+      "daddu $at, $at, $a0\n"
+      "sdc1 $f0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "daddu $at, $at, $a0\n"
+      "sdc1 $f0, 0($at)\n"
+      "sdc1 $f0, -256($a0)\n"
+      "sdc1 $f0, -32768($a0)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "daddu $at, $at, $a0\n"
+      "sdc1 $f0, 0($at)\n";
+  DriverStr(expected, "StoreFpuToOffset");
+}
+
+#undef __
+
 }  // namespace art

diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 8e80961..d30f697 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc

@@ -126,6 +126,11 @@
       continue;
     }
 
+    // The image format is dropped.
+    if (StartsWith(original_argv[i], "--image-format=")) {
+      continue;
+    }
+
     // This should leave any dex-file and oat-file options, describing what we compiled.
 
     // However, we prefer to drop this when we saw --zip-fd.
@@ -928,16 +933,25 @@
     // Fill some values into the key-value store for the oat header.
     key_value_store_.reset(new SafeMap<std::string, std::string>());
 
-    // Automatically force determinism for the boot image in a host
-    // build, except when read barriers are enabled, as the former
-    // switches the GC to a non-concurrent one by passing the
-    // option `-Xgc:nonconcurrent` (see below).
-    if (!kIsTargetBuild && IsBootImage() && !kEmitCompilerReadBarrier) {
-      force_determinism_ = true;
+    // Automatically force determinism for the boot image in a host build if the default GC is CMS
+    // or MS and read barriers are not enabled, as the former switches the GC to a non-concurrent
+    // one by passing the option `-Xgc:nonconcurrent` (see below).
+    if (!kIsTargetBuild && IsBootImage()) {
+      if (SupportsDeterministicCompilation()) {
+        force_determinism_ = true;
+      } else {
+        LOG(WARNING) << "Deterministic compilation is disabled.";
+      }
     }
     compiler_options_->force_determinism_ = force_determinism_;
   }
 
+  static bool SupportsDeterministicCompilation() {
+    return (gc::kCollectorTypeDefault == gc::kCollectorTypeCMS ||
+            gc::kCollectorTypeDefault == gc::kCollectorTypeMS) &&
+        !kEmitCompilerReadBarrier;
+  }
+
   void ExpandOatAndImageFilenames() {
     std::string base_oat = oat_filenames_[0];
     size_t last_oat_slash = base_oat.rfind('/');
@@ -1183,8 +1197,8 @@
       } else if (option.starts_with("--no-inline-from=")) {
         no_inline_from_string_ = option.substr(strlen("--no-inline-from=")).data();
       } else if (option == "--force-determinism") {
-        if (kEmitCompilerReadBarrier) {
-          Usage("Cannot use --force-determinism with read barriers");
+        if (!SupportsDeterministicCompilation()) {
+          Usage("Cannot use --force-determinism with read barriers or non-CMS garbage collector");
         }
         force_determinism_ = true;
       } else if (!compiler_options_->ParseCompilerOption(option, Usage)) {

diff --git a/runtime/Android.mk b/runtime/Android.mk
index 7bf6d21..288f95e 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk

@@ -263,7 +263,8 @@
   arch/arm/fault_handler_arm.cc
 
 LIBART_TARGET_SRC_FILES_arm64 := \
-  interpreter/mterp/mterp_stub.cc \
+  interpreter/mterp/mterp.cc \
+  interpreter/mterp/out/mterp_arm64.S \
   arch/arm64/context_arm64.cc \
   arch/arm64/entrypoints_init_arm64.cc \
   arch/arm64/jni_entrypoints_arm64.S \
@@ -508,6 +509,7 @@
   ifeq ($$(art_target_or_host),target)
     $$(eval $$(call set-target-local-clang-vars))
     $$(eval $$(call set-target-local-cflags-vars,$(2)))
+    LOCAL_CLANG_arm64 := true
     LOCAL_CFLAGS_$(DEX2OAT_TARGET_ARCH) += -DART_DEFAULT_INSTRUCTION_SET_FEATURES="$(LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES)"
     LOCAL_CFLAGS_$(2ND_DEX2OAT_TARGET_ARCH) += -DART_DEFAULT_INSTRUCTION_SET_FEATURES="$(2ND_LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES)"
   else # host

diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 5c8ff8f..4db9411 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc

@@ -113,6 +113,25 @@
   qpoints->pShrLong = nullptr;
   qpoints->pUshrLong = nullptr;
 
+  // More math.
+  qpoints->pCos = cos;
+  qpoints->pSin = sin;
+  qpoints->pAcos = acos;
+  qpoints->pAsin = asin;
+  qpoints->pAtan = atan;
+  qpoints->pAtan2 = atan2;
+  qpoints->pCbrt = cbrt;
+  qpoints->pCosh = cosh;
+  qpoints->pExp = exp;
+  qpoints->pExpm1 = expm1;
+  qpoints->pHypot = hypot;
+  qpoints->pLog = log;
+  qpoints->pLog10 = log10;
+  qpoints->pNextAfter = nextafter;
+  qpoints->pSinh = sinh;
+  qpoints->pTan = tan;
+  qpoints->pTanh = tanh;
+
   // Intrinsics
   qpoints->pIndexOf = art_quick_indexof;
   qpoints->pStringCompareTo = art_quick_string_compareto;

diff --git a/runtime/arch/arm64/instruction_set_features_arm64.h b/runtime/arch/arm64/instruction_set_features_arm64.h
index 805131f..abd7e83 100644
--- a/runtime/arch/arm64/instruction_set_features_arm64.h
+++ b/runtime/arch/arm64/instruction_set_features_arm64.h

@@ -66,14 +66,6 @@
       return fix_cortex_a53_843419_;
   }
 
-  // NOTE: This flag can be tunned on a CPU basis. In general all ARMv8 CPUs
-  // should prefer the Acquire-Release semantics over the explicit DMBs when
-  // handling load/store-volatile. For a specific use case see the ARM64
-  // Optimizing backend.
-  bool PreferAcquireRelease() const {
-    return true;
-  }
-
   virtual ~Arm64InstructionSetFeatures() {}
 
  protected:

diff --git a/runtime/arch/arm64/instruction_set_features_arm64_test.cc b/runtime/arch/arm64/instruction_set_features_arm64_test.cc
index 599f24e..027e59c 100644
--- a/runtime/arch/arm64/instruction_set_features_arm64_test.cc
+++ b/runtime/arch/arm64/instruction_set_features_arm64_test.cc

@@ -30,8 +30,6 @@
   EXPECT_TRUE(arm64_features->Equals(arm64_features.get()));
   EXPECT_STREQ("smp,a53", arm64_features->GetFeatureString().c_str());
   EXPECT_EQ(arm64_features->AsBitmap(), 3U);
-  // See the comments in instruction_set_features_arm64.h.
-  EXPECT_TRUE(arm64_features->AsArm64InstructionSetFeatures()->PreferAcquireRelease());
 }
 
 }  // namespace art

diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 66c8aad..d264c9b 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S

@@ -1366,7 +1366,106 @@
 .endm
 
 // Generate the allocation entrypoints for each allocator.
-GENERATE_ALL_ALLOC_ENTRYPOINTS
+GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
+ENTRY art_quick_alloc_object_rosalloc
+
+    # Fast path rosalloc allocation
+    # a0: type_idx
+    # a1: ArtMethod*
+    # s1: Thread::Current
+    # -----------------------------
+    # t0: class
+    # t1: object size
+    # t2: rosalloc run
+    # t3: thread stack top offset
+    # a4: thread stack bottom offset
+    # v0: free list head
+    #
+    # a5, a6 : temps
+
+    ld     $t0, ART_METHOD_DEX_CACHE_TYPES_OFFSET_64($a1)   # Load dex cache resolved types array.
+
+    dsll   $a5, $a0, COMPRESSED_REFERENCE_SIZE_SHIFT        # Shift the value.
+    daddu  $a5, $t0, $a5                                    # Compute the index.
+    lwu    $t0, 0($a5)                                      # Load class (t0).
+    beqzc  $t0, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    li     $a6, MIRROR_CLASS_STATUS_INITIALIZED
+    lwu    $a5, MIRROR_CLASS_STATUS_OFFSET($t0)             # Check class status.
+    bnec   $a5, $a6, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    # Add a fake dependence from the following access flag and size loads to the status load. This
+    # is to prevent those loads from being reordered above the status load and reading wrong values.
+    xor    $a5, $a5, $a5
+    daddu  $t0, $t0, $a5
+
+    lwu    $a5, MIRROR_CLASS_ACCESS_FLAGS_OFFSET($t0)       # Check if access flags has
+    li     $a6, ACCESS_FLAGS_CLASS_IS_FINALIZABLE           # kAccClassIsFinalizable.
+    and    $a6, $a5, $a6
+    bnezc  $a6, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    ld     $t3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET($s1)    # Check if thread local allocation stack
+    ld     $a4, THREAD_LOCAL_ALLOC_STACK_END_OFFSET($s1)    # has any room left.
+    bgeuc  $t3, $a4, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    lwu    $t1, MIRROR_CLASS_OBJECT_SIZE_OFFSET($t0)        # Load object size (t1).
+    li     $a5, ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE      # Check if size is for a thread local
+                                                            # allocation.
+    bltuc  $a5, $t1, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    # Compute the rosalloc bracket index from the size. Allign up the size by the rosalloc bracket
+    # quantum size and divide by the quantum size and subtract by 1.
+    daddiu $t1, $t1, -1                                     # Decrease obj size and shift right by
+    dsrl   $t1, $t1, ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT    # quantum.
+
+    dsll   $t2, $t1, POINTER_SIZE_SHIFT
+    daddu  $t2, $t2, $s1
+    ld     $t2, THREAD_ROSALLOC_RUNS_OFFSET($t2)            # Load rosalloc run (t2).
+
+    # Load the free list head (v0).
+    # NOTE: this will be the return val.
+    ld     $v0, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)($t2)
+    beqzc  $v0, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    # Load the next pointer of the head and update the list head with the next pointer.
+    ld     $a5, ROSALLOC_SLOT_NEXT_OFFSET($v0)
+    sd     $a5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)($t2)
+
+    # Store the class pointer in the header. This also overwrites the first pointer. The offsets are
+    # asserted to match.
+
+#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
+#error "Class pointer needs to overwrite next pointer."
+#endif
+
+    POISON_HEAP_REF $t0
+    sw     $t0, MIRROR_OBJECT_CLASS_OFFSET($v0)
+
+    # Push the new object onto the thread local allocation stack and increment the thread local
+    # allocation stack top.
+    sd     $v0, 0($t3)
+    daddiu $t3, $t3, COMPRESSED_REFERENCE_SIZE
+    sd     $t3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET($s1)
+
+    # Decrement the size of the free list.
+    lw     $a5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)($t2)
+    addiu  $a5, $a5, -1
+    sw     $a5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)($t2)
+
+    sync                                         # Fence.
+
+    jalr   $zero, $ra
+    .cpreturn                                    # Restore gp from t8 in branch delay slot.
+
+.Lart_quick_alloc_object_rosalloc_slow_path:
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    jal    artAllocObjectFromCodeRosAlloc
+    move   $a2 ,$s1                              # Pass self as argument.
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+
+END art_quick_alloc_object_rosalloc
 
     /*
      * Entry from managed code to resolve a string, this stub will allocate a String and deliver an

diff --git a/runtime/arch/mips64/registers_mips64.h b/runtime/arch/mips64/registers_mips64.h
index 1d07d47..b027c95 100644
--- a/runtime/arch/mips64/registers_mips64.h
+++ b/runtime/arch/mips64/registers_mips64.h

@@ -61,6 +61,7 @@
   RA   = 31,  // Return address.
   TR   = S1,  // ART Thread Register
   TMP  = T8,  // scratch register (in addition to AT)
+  TMP2 = T3,  // scratch register (in addition to AT, reserved for assembler)
   kNumberOfGpuRegisters = 32,
   kNoGpuRegister = -1  // Signals an illegal register.
 };

diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index 74eb722..28540c8 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h

@@ -41,17 +41,15 @@
 
 namespace art {
 
+template <ReadBarrierOption kReadBarrierOption>
 inline mirror::Class* ArtMethod::GetDeclaringClassUnchecked() {
   GcRootSource gc_root_source(this);
-  return declaring_class_.Read(&gc_root_source);
+  return declaring_class_.Read<kReadBarrierOption>(&gc_root_source);
 }
 
-inline mirror::Class* ArtMethod::GetDeclaringClassNoBarrier() {
-  return declaring_class_.Read<kWithoutReadBarrier>();
-}
-
+template <ReadBarrierOption kReadBarrierOption>
 inline mirror::Class* ArtMethod::GetDeclaringClass() {
-  mirror::Class* result = GetDeclaringClassUnchecked();
+  mirror::Class* result = GetDeclaringClassUnchecked<kReadBarrierOption>();
   if (kIsDebugBuild) {
     if (!IsRuntimeMethod()) {
       CHECK(result != nullptr) << this;
@@ -79,24 +77,28 @@
 
 // AssertSharedHeld doesn't work in GetAccessFlags, so use a NO_THREAD_SAFETY_ANALYSIS helper.
 // TODO: Figure out why ASSERT_SHARED_CAPABILITY doesn't work.
-ALWAYS_INLINE
-static inline void DoGetAccessFlagsHelper(ArtMethod* method) NO_THREAD_SAFETY_ANALYSIS {
-  CHECK(method->IsRuntimeMethod() || method->GetDeclaringClass()->IsIdxLoaded() ||
-        method->GetDeclaringClass()->IsErroneous());
+template <ReadBarrierOption kReadBarrierOption>
+ALWAYS_INLINE static inline void DoGetAccessFlagsHelper(ArtMethod* method)
+    NO_THREAD_SAFETY_ANALYSIS {
+  CHECK(method->IsRuntimeMethod() ||
+        method->GetDeclaringClass<kReadBarrierOption>()->IsIdxLoaded() ||
+        method->GetDeclaringClass<kReadBarrierOption>()->IsErroneous());
 }
 
+template <ReadBarrierOption kReadBarrierOption>
 inline uint32_t ArtMethod::GetAccessFlags() {
   if (kIsDebugBuild) {
     Thread* self = Thread::Current();
     if (!Locks::mutator_lock_->IsSharedHeld(self)) {
       ScopedObjectAccess soa(self);
-      CHECK(IsRuntimeMethod() || GetDeclaringClass()->IsIdxLoaded() ||
-            GetDeclaringClass()->IsErroneous());
+      CHECK(IsRuntimeMethod() ||
+            GetDeclaringClass<kReadBarrierOption>()->IsIdxLoaded() ||
+            GetDeclaringClass<kReadBarrierOption>()->IsErroneous());
     } else {
       // We cannot use SOA in this case. We might be holding the lock, but may not be in the
       // runnable state (e.g., during GC).
       Locks::mutator_lock_->AssertSharedHeld(self);
-      DoGetAccessFlagsHelper(this);
+      DoGetAccessFlagsHelper<kReadBarrierOption>(this);
     }
   }
   return access_flags_;
@@ -469,7 +471,7 @@
 
 template <typename Visitor>
 inline void ArtMethod::UpdateObjectsForImageRelocation(const Visitor& visitor) {
-  mirror::Class* old_class = GetDeclaringClassNoBarrier();
+  mirror::Class* old_class = GetDeclaringClassUnchecked<kWithoutReadBarrier>();
   mirror::Class* new_class = visitor(old_class);
   if (old_class != new_class) {
     SetDeclaringClass(new_class);
@@ -486,9 +488,9 @@
   }
 }
 
-template <typename Visitor>
+template <ReadBarrierOption kReadBarrierOption, typename Visitor>
 inline void ArtMethod::UpdateEntrypoints(const Visitor& visitor) {
-  if (IsNative()) {
+  if (IsNative<kReadBarrierOption>()) {
     const void* old_native_code = GetEntryPointFromJni();
     const void* new_native_code = visitor(old_native_code);
     if (old_native_code != new_native_code) {

diff --git a/runtime/art_method.h b/runtime/art_method.h
index a020e9d..078a978 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h

@@ -57,11 +57,10 @@
                                         jobject jlr_method)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE mirror::Class* GetDeclaringClass() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ALWAYS_INLINE mirror::Class* GetDeclaringClassNoBarrier()
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE mirror::Class* GetDeclaringClassUnchecked()
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -77,6 +76,7 @@
 
   // Note: GetAccessFlags acquires the mutator lock in debug mode to check that it is not called for
   // a proxy method.
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE uint32_t GetAccessFlags();
 
   void SetAccessFlags(uint32_t new_access_flags) {
@@ -154,8 +154,9 @@
     return (GetAccessFlags() & kAccDefault) != 0;
   }
 
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsNative() {
-    return (GetAccessFlags() & kAccNative) != 0;
+    return (GetAccessFlags<kReadBarrierOption>() & kAccNative) != 0;
   }
 
   bool IsFastNative() {
@@ -485,7 +486,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Update entry points by passing them through the visitor.
-  template <typename Visitor>
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier, typename Visitor>
   ALWAYS_INLINE void UpdateEntrypoints(const Visitor& visitor);
 
  protected:

diff --git a/runtime/base/stl_util.h b/runtime/base/stl_util.h
index ad03c31..a53dcea 100644
--- a/runtime/base/stl_util.h
+++ b/runtime/base/stl_util.h

@@ -156,6 +156,20 @@
   }
 };
 
+// 32-bit FNV-1a hash function suitable for std::unordered_map.
+// It can be used with any container which works with range-based for loop.
+// See http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
+template <typename Vector>
+struct FNVHash {
+  size_t operator()(const Vector& vector) const {
+    uint32_t hash = 2166136261u;
+    for (const auto& value : vector) {
+      hash = (hash ^ value) * 16777619u;
+    }
+    return hash;
+  }
+};
+
 // Use to suppress type deduction for a function argument.
 // See std::identity<> for more background:
 // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2005/n1856.html#20.2.2 - move/forward helpers

diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 0667e23..88d49b2 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc

@@ -1182,11 +1182,15 @@
   ClassTable* const table_;
 };
 
-void ClassLinker::UpdateAppImageClassLoadersAndDexCaches(
+bool ClassLinker::UpdateAppImageClassLoadersAndDexCaches(
     gc::space::ImageSpace* space,
     Handle<mirror::ClassLoader> class_loader,
     Handle<mirror::ObjectArray<mirror::DexCache>> dex_caches,
-    bool added_class_table) {
+    bool added_class_table,
+    bool* out_forward_dex_cache_array,
+    std::string* out_error_msg) {
+  DCHECK(out_forward_dex_cache_array != nullptr);
+  DCHECK(out_error_msg != nullptr);
   Thread* const self = Thread::Current();
   gc::Heap* const heap = Runtime::Current()->GetHeap();
   const ImageHeader& header = space->GetImageHeader();
@@ -1194,8 +1198,11 @@
   // class loader fields.
   WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
   ClassTable* table = InsertClassTableForClassLoader(class_loader.Get());
-  // TODO: Store class table in the image to avoid manually adding the classes.
-  for (int32_t i = 0, num_dex_caches = dex_caches->GetLength(); i < num_dex_caches; i++) {
+  // Dex cache array fixup is all or nothing, we must reject app images that have mixed since we
+  // rely on clobering the dex cache arrays in the image to forward to bss.
+  size_t num_dex_caches_with_bss_arrays = 0;
+  const size_t num_dex_caches = dex_caches->GetLength();
+  for (size_t i = 0; i < num_dex_caches; i++) {
     mirror::DexCache* const dex_cache = dex_caches->Get(i);
     const DexFile* const dex_file = dex_cache->GetDexFile();
     // If the oat file expects the dex cache arrays to be in the BSS, then allocate there and
@@ -1209,22 +1216,22 @@
     CHECK_EQ(num_types, dex_cache->NumResolvedTypes());
     CHECK_EQ(num_methods, dex_cache->NumResolvedMethods());
     CHECK_EQ(num_fields, dex_cache->NumResolvedFields());
-    if (dex_file->GetOatDexFile() != nullptr &&
-        dex_file->GetOatDexFile()->GetDexCacheArrays() != nullptr) {
+    const OatFile::OatDexFile* oat_dex_file = dex_file->GetOatDexFile();
+    if (oat_dex_file != nullptr && oat_dex_file->GetDexCacheArrays() != nullptr) {
+      ++num_dex_caches_with_bss_arrays;
       DexCacheArraysLayout layout(image_pointer_size_, dex_file);
-      uint8_t* const raw_arrays = dex_file->GetOatDexFile()->GetDexCacheArrays();
-      // The space is not yet visible to the GC, we can avoid the read barriers and use
-      // std::copy_n.
+      uint8_t* const raw_arrays = oat_dex_file->GetDexCacheArrays();
+      // The space is not yet visible to the GC, we can avoid the read barriers and use std::copy_n.
       if (num_strings != 0u) {
+        GcRoot<mirror::String>* const image_resolved_strings = dex_cache->GetStrings();
         GcRoot<mirror::String>* const strings =
             reinterpret_cast<GcRoot<mirror::String>*>(raw_arrays + layout.StringsOffset());
         for (size_t j = 0; kIsDebugBuild && j < num_strings; ++j) {
           DCHECK(strings[j].IsNull());
         }
-        std::copy_n(dex_cache->GetStrings(), num_strings, strings);
+        std::copy_n(image_resolved_strings, num_strings, strings);
         dex_cache->SetStrings(strings);
       }
-
       if (num_types != 0u) {
         GcRoot<mirror::Class>* const image_resolved_types = dex_cache->GetResolvedTypes();
         GcRoot<mirror::Class>* const types =
@@ -1282,6 +1289,12 @@
           // Update the class loader from the one in the image class loader to the one that loaded
           // the app image.
           klass->SetClassLoader(class_loader.Get());
+          // The resolved type could be from another dex cache, go through the dex cache just in
+          // case. May be null for array classes.
+          if (klass->GetDexCacheStrings() != nullptr) {
+            DCHECK(!klass->IsArrayClass());
+            klass->SetDexCacheStrings(klass->GetDexCache()->GetStrings());
+          }
           // If there are multiple dex caches, there may be the same class multiple times
           // in different dex caches. Check for this since inserting will add duplicates
           // otherwise.
@@ -1326,7 +1339,6 @@
               CHECK_EQ(table->LookupByDescriptor(super_class), super_class);
             }
           }
-          DCHECK_EQ(klass->GetClassLoader(), class_loader.Get());
           if (kIsDebugBuild) {
             for (ArtMethod& m : klass->GetDirectMethods(sizeof(void*))) {
               const void* code = m.GetEntryPointFromQuickCompiledCode();
@@ -1354,20 +1366,68 @@
       }
     }
   }
-  {
+  *out_forward_dex_cache_array = num_dex_caches_with_bss_arrays != 0;
+  if (*out_forward_dex_cache_array) {
+    if (num_dex_caches_with_bss_arrays != num_dex_caches) {
+      // Reject application image since we cannot forward only some of the dex cache arrays.
+      // TODO: We could get around this by having a dedicated forwarding slot. It should be an
+      // uncommon case.
+      *out_error_msg = StringPrintf("Dex caches in bss does not match total: %zu vs %zu",
+                                    num_dex_caches_with_bss_arrays,
+                                    num_dex_caches);
+      return false;
+    }
     FixupArtMethodArrayVisitor visitor(header);
     header.GetImageSection(ImageHeader::kSectionArtMethods).VisitPackedArtMethods(
-        &visitor, space->Begin(), sizeof(void*));
+        &visitor,
+        space->Begin(),
+        sizeof(void*));
     Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader.Get());
   }
   if (kIsDebugBuild) {
     ClassTable* const class_table = class_loader.Get()->GetClassTable();
     VerifyClassInTableArtMethodVisitor visitor2(class_table);
     header.GetImageSection(ImageHeader::kSectionArtMethods).VisitPackedArtMethods(
-        &visitor2, space->Begin(), sizeof(void*));
+        &visitor2,
+        space->Begin(),
+        sizeof(void*));
   }
+  return true;
 }
 
+class UpdateClassLoaderAndResolvedStringsVisitor {
+ public:
+  UpdateClassLoaderAndResolvedStringsVisitor(gc::space::ImageSpace* space,
+                                             mirror::ClassLoader* class_loader,
+                                             bool forward_strings)
+      : space_(space),
+        class_loader_(class_loader),
+        forward_strings_(forward_strings) {}
+
+  bool operator()(mirror::Class* klass) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (forward_strings_) {
+      GcRoot<mirror::String>* strings = klass->GetDexCacheStrings();
+      if (strings != nullptr) {
+        DCHECK(
+            space_->GetImageHeader().GetImageSection(ImageHeader::kSectionDexCacheArrays).Contains(
+                reinterpret_cast<uint8_t*>(strings) - space_->Begin()))
+            << "String dex cache array for " << PrettyClass(klass) << " is not in app image";
+        // Dex caches have already been updated, so take the strings pointer from there.
+        GcRoot<mirror::String>* new_strings = klass->GetDexCache()->GetStrings();
+        DCHECK_NE(strings, new_strings);
+        klass->SetDexCacheStrings(new_strings);
+      }
+    }
+    // Finally, update class loader.
+    klass->SetClassLoader(class_loader_);
+    return true;
+  }
+
+  gc::space::ImageSpace* const space_;
+  mirror::ClassLoader* const class_loader_;
+  const bool forward_strings_;
+};
+
 bool ClassLinker::AddImageSpace(
     gc::space::ImageSpace* space,
     Handle<mirror::ClassLoader> class_loader,
@@ -1576,21 +1636,55 @@
   if (app_image) {
     GetOrCreateAllocatorForClassLoader(class_loader.Get());  // Make sure we have a linear alloc.
   }
-  if (class_table_section.Size() > 0u) {
-    const uint64_t start_time2 = NanoTime();
+  ClassTable* class_table = nullptr;
+  {
     WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
-    ClassTable* const class_table = InsertClassTableForClassLoader(class_loader.Get());
-    class_table->ReadFromMemory(space->Begin() + class_table_section.Offset());
-    if (app_image) {
-      class_table->SetClassLoader(class_loader.Get());
-    } else {
-      dex_cache_boot_image_class_lookup_required_ = false;
+    class_table = InsertClassTableForClassLoader(class_loader.Get());
+    if (class_table_section.Size() > 0u) {
+      const uint64_t start_time2 = NanoTime();
+      class_table->ReadFromMemory(space->Begin() + class_table_section.Offset());
+      if (!app_image) {
+        dex_cache_boot_image_class_lookup_required_ = false;
+      }
+      VLOG(image) << "Adding class table classes took " << PrettyDuration(NanoTime() - start_time2);
+      added_class_table = true;
     }
-    VLOG(image) << "Adding class table classes took " << PrettyDuration(NanoTime() - start_time2);
-    added_class_table = true;
   }
   if (app_image) {
-    UpdateAppImageClassLoadersAndDexCaches(space, class_loader, dex_caches, added_class_table);
+    bool forward_dex_cache_arrays = false;
+    if (!UpdateAppImageClassLoadersAndDexCaches(space,
+                                                class_loader,
+                                                dex_caches,
+                                                added_class_table,
+                                                /*out*/&forward_dex_cache_arrays,
+                                                /*out*/error_msg)) {
+      return false;
+    }
+    if (added_class_table) {
+      WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
+      // Update class loader and resolved strings. If added_class_table is false, the resolved
+      // strings were already updated in UpdateAppImageClassLoadersAndDexCaches.
+      UpdateClassLoaderAndResolvedStringsVisitor visitor(space,
+                                                         class_loader.Get(),
+                                                         forward_dex_cache_arrays);
+      class_table->Visit(visitor);
+    }
+    // forward_dex_cache_arrays is true iff we copied all of the dex cache arrays into the .bss.
+    // In this case, madvise away the dex cache arrays section of the image to reduce RAM usage and
+    // mark as PROT_NONE to catch any invalid accesses.
+    if (forward_dex_cache_arrays) {
+      const ImageSection& dex_cache_section = header.GetImageSection(
+          ImageHeader::kSectionDexCacheArrays);
+      uint8_t* section_begin = AlignUp(space->Begin() + dex_cache_section.Offset(), kPageSize);
+      uint8_t* section_end = AlignDown(space->Begin() + dex_cache_section.End(), kPageSize);
+      if (section_begin < section_end) {
+        madvise(section_begin, section_end - section_begin, MADV_DONTNEED);
+        mprotect(section_begin, section_end - section_begin, PROT_NONE);
+        VLOG(image) << "Released and protected dex cache array image section from "
+                    << reinterpret_cast<const void*>(section_begin) << "-"
+                    << reinterpret_cast<const void*>(section_end);
+      }
+    }
   }
   VLOG(class_linker) << "Adding image space took " << PrettyDuration(NanoTime() - start_time);
   return true;
@@ -1677,7 +1771,7 @@
   void Visit(mirror::ClassLoader* class_loader)
       SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_) OVERRIDE {
     ClassTable* const class_table = class_loader->GetClassTable();
-    if (!done_ && class_table != nullptr && !class_table->Visit(visitor_)) {
+    if (!done_ && class_table != nullptr && !class_table->Visit(*visitor_)) {
       // If the visitor ClassTable returns false it means that we don't need to continue.
       done_ = true;
     }
@@ -1690,7 +1784,7 @@
 };
 
 void ClassLinker::VisitClassesInternal(ClassVisitor* visitor) {
-  if (boot_class_table_.Visit(visitor)) {
+  if (boot_class_table_.Visit(*visitor)) {
     VisitClassLoaderClassesVisitor loader_visitor(visitor);
     VisitClassLoaders(&loader_visitor);
   }
@@ -1713,7 +1807,7 @@
 
 class GetClassesInToVector : public ClassVisitor {
  public:
-  bool Visit(mirror::Class* klass) OVERRIDE {
+  bool operator()(mirror::Class* klass) OVERRIDE {
     classes_.push_back(klass);
     return true;
   }
@@ -1725,7 +1819,7 @@
   explicit GetClassInToObjectArray(mirror::ObjectArray<mirror::Class>* arr)
       : arr_(arr), index_(0) {}
 
-  bool Visit(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool operator()(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     ++index_;
     if (index_ <= arr_->GetLength()) {
       arr_->Set(index_ - 1, klass);
@@ -1746,16 +1840,17 @@
 void ClassLinker::VisitClassesWithoutClassesLock(ClassVisitor* visitor) {
   // TODO: it may be possible to avoid secondary storage if we iterate over dex caches. The problem
   // is avoiding duplicates.
+  Thread* const self = Thread::Current();
   if (!kMovingClasses) {
+    ScopedAssertNoThreadSuspension nts(self, __FUNCTION__);
     GetClassesInToVector accumulator;
     VisitClasses(&accumulator);
     for (mirror::Class* klass : accumulator.classes_) {
-      if (!visitor->Visit(klass)) {
+      if (!visitor->operator()(klass)) {
         return;
       }
     }
   } else {
-    Thread* const self = Thread::Current();
     StackHandleScope<1> hs(self);
     auto classes = hs.NewHandle<mirror::ObjectArray<mirror::Class>>(nullptr);
     // We size the array assuming classes won't be added to the class table during the visit.
@@ -1783,7 +1878,7 @@
       // the class table grew then the loop repeats. If classes are created after the loop has
       // finished then we don't visit.
       mirror::Class* klass = classes->Get(i);
-      if (klass != nullptr && !visitor->Visit(klass)) {
+      if (klass != nullptr && !visitor->operator()(klass)) {
         return;
       }
     }
@@ -7154,7 +7249,7 @@
  public:
   explicit DumpClassVisitor(int flags) : flags_(flags) {}
 
-  bool Visit(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool operator()(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     klass->DumpClass(LOG(ERROR), flags_);
     return true;
   }

diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 3b4e912..71fcf29 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h

@@ -60,6 +60,13 @@
 
 enum VisitRootFlags : uint8_t;
 
+class ClassVisitor {
+ public:
+  virtual ~ClassVisitor() {}
+  // Return true to continue visiting.
+  virtual bool operator()(mirror::Class* klass) = 0;
+};
+
 class ClassLoaderVisitor {
  public:
   virtual ~ClassLoaderVisitor() {}
@@ -1008,11 +1015,13 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!Locks::classlinker_classes_lock_);
 
-  void UpdateAppImageClassLoadersAndDexCaches(
+  bool UpdateAppImageClassLoadersAndDexCaches(
       gc::space::ImageSpace* space,
       Handle<mirror::ClassLoader> class_loader,
       Handle<mirror::ObjectArray<mirror::DexCache>> dex_caches,
-      bool added_class_table)
+      bool added_class_table,
+      bool* out_forward_dex_cache_array,
+      std::string* out_error_msg)
       REQUIRES(!dex_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 

diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index b86da9f..3a0f3e5 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc

@@ -210,11 +210,10 @@
                                                klass->GetDescriptor(&temp2)));
     if (klass->IsInterface()) {
       EXPECT_TRUE(klass->IsAbstract());
-      if (klass->NumDirectMethods() == 1) {
-        EXPECT_TRUE(klass->GetDirectMethod(0, sizeof(void*))->IsClassInitializer());
-        EXPECT_TRUE(klass->GetDirectMethod(0, sizeof(void*))->IsDirect());
-      } else {
-        EXPECT_EQ(0U, klass->NumDirectMethods());
+      // Check that all direct methods are static (either <clinit> or a regular static method).
+      for (ArtMethod& m : klass->GetDirectMethods(sizeof(void*))) {
+        EXPECT_TRUE(m.IsStatic());
+        EXPECT_TRUE(m.IsDirect());
       }
     } else {
       if (!klass->IsSynthetic()) {

diff --git a/runtime/class_table-inl.h b/runtime/class_table-inl.h
index aef02b6..e512906 100644
--- a/runtime/class_table-inl.h
+++ b/runtime/class_table-inl.h

@@ -28,6 +28,9 @@
       visitor.VisitRoot(root.AddressWithoutBarrier());
     }
   }
+  for (GcRoot<mirror::Object>& root : dex_files_) {
+    visitor.VisitRoot(root.AddressWithoutBarrier());
+  }
 }
 
 template<class Visitor>
@@ -42,6 +45,19 @@
   }
 }
 
+template <typename Visitor>
+bool ClassTable::Visit(Visitor& visitor) {
+  for (ClassSet& class_set : classes_) {
+    for (GcRoot<mirror::Class>& root : class_set) {
+      if (!visitor(root.Read())) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_CLASS_TABLE_INL_H_

diff --git a/runtime/class_table.cc b/runtime/class_table.cc
index 2a4f0e0..afb0556 100644
--- a/runtime/class_table.cc
+++ b/runtime/class_table.cc

@@ -73,17 +73,6 @@
   return existing;
 }
 
-bool ClassTable::Visit(ClassVisitor* visitor) {
-  for (ClassSet& class_set : classes_) {
-    for (GcRoot<mirror::Class>& root : class_set) {
-      if (!visitor->Visit(root.Read())) {
-        return false;
-      }
-    }
-  }
-  return true;
-}
-
 size_t ClassTable::NumZygoteClasses() const {
   size_t sum = 0;
   for (size_t i = 0; i < classes_.size() - 1; ++i) {
@@ -183,12 +172,4 @@
   return read_count;
 }
 
-void ClassTable::SetClassLoader(mirror::ClassLoader* class_loader) {
-  for (const ClassSet& class_set : classes_) {
-    for (const GcRoot<mirror::Class>& root : class_set) {
-      root.Read()->SetClassLoader(class_loader);
-    }
-  }
-}
-
 }  // namespace art

diff --git a/runtime/class_table.h b/runtime/class_table.h
index 0b42035..5f2eb48 100644
--- a/runtime/class_table.h
+++ b/runtime/class_table.h

@@ -36,13 +36,6 @@
   class ClassLoader;
 }  // namespace mirror
 
-class ClassVisitor {
- public:
-  virtual ~ClassVisitor() {}
-  // Return true to continue visiting.
-  virtual bool Visit(mirror::Class* klass) = 0;
-};
-
 // Each loader has a ClassTable
 class ClassTable {
  public:
@@ -80,8 +73,9 @@
       NO_THREAD_SAFETY_ANALYSIS
       SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
 
-  // Return false if the callback told us to exit.
-  bool Visit(ClassVisitor* visitor)
+  // Stops visit if the visitor returns false.
+  template <typename Visitor>
+  bool Visit(Visitor& visitor)
       SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
 
   // Return the first class that matches the descriptor. Returns null if there are none.
@@ -118,11 +112,6 @@
       REQUIRES(Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Change the class loader of all the contained classes.
-  void SetClassLoader(mirror::ClassLoader* class_loader)
-    REQUIRES(Locks::classlinker_classes_lock_)
-    SHARED_REQUIRES(Locks::mutator_lock_);
-
  private:
   class ClassDescriptorHashEquals {
    public:

diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index a0f875d..904490a 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc

@@ -983,7 +983,7 @@
  public:
   explicit ClassListCreator(std::vector<JDWP::RefTypeId>* classes) : classes_(classes) {}
 
-  bool Visit(mirror::Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool operator()(mirror::Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     if (!c->IsPrimitive()) {
       classes_->push_back(Dbg::GetObjectRegistry()->AddRefType(c));
     }

diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index 5345b89..5c5abeb 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc

@@ -349,7 +349,7 @@
   // Check that the class pointer inside the object is not null and is aligned.
   // TODO: Method might be not a heap address, and GetClass could fault.
   // No read barrier because method_obj may not be a real object.
-  mirror::Class* cls = method_obj->GetDeclaringClassNoBarrier();
+  mirror::Class* cls = method_obj->GetDeclaringClassUnchecked<kWithoutReadBarrier>();
   if (cls == nullptr) {
     VLOG(signals) << "not a class";
     return false;

diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 9397c35..8e1b7f4 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc

@@ -1081,7 +1081,7 @@
                 !IsInToSpace(to_ref->AsReference()->GetReferent<kWithoutReadBarrier>())))) {
     // Leave this Reference gray in the queue so that GetReferent() will trigger a read barrier. We
     // will change it to black or white later in ReferenceQueue::DequeuePendingReference().
-    DCHECK(to_ref->AsReference()->IsEnqueued()) << "Left unenqueued ref gray " << to_ref;
+    DCHECK(to_ref->AsReference()->GetPendingNext() != nullptr) << "Left unenqueued ref gray " << to_ref;
   } else {
     // We may occasionally leave a Reference black or white in the queue if its referent happens to
     // be concurrently marked after the Scan() call above has enqueued the Reference, in which case

diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index d76a8d1..8269f76 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc

@@ -271,7 +271,7 @@
     // The loaded spaces. Secondary images may fail to load, in which case we need to remove
     // already added spaces.
     std::vector<space::Space*> added_image_spaces;
-
+    uint8_t* const original_requested_alloc_space_begin = requested_alloc_space_begin;
     for (size_t index = 0; index < image_file_names.size(); ++index) {
       std::string& image_name = image_file_names[index];
       ATRACE_BEGIN("ImageSpace::Create");
@@ -317,7 +317,10 @@
         // Remove already loaded spaces.
         for (space::Space* loaded_space : added_image_spaces) {
           RemoveSpace(loaded_space);
+          delete loaded_space;
         }
+        boot_image_spaces_.clear();
+        requested_alloc_space_begin = original_requested_alloc_space_begin;
         break;
       }
     }

diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc
index 8356814..e172f85 100644
--- a/runtime/gc/reference_processor.cc
+++ b/runtime/gc/reference_processor.cc

@@ -93,7 +93,7 @@
       // in the heap causing corruption since this field would get swept.
       if (collector_->IsMarkedHeapReference(referent_addr)) {
         if (!preserving_references_ ||
-           (LIKELY(!reference->IsFinalizerReferenceInstance()) && !reference->IsEnqueued())) {
+           (LIKELY(!reference->IsFinalizerReferenceInstance()) && reference->IsUnprocessed())) {
           return referent_addr->AsMirrorPtr();
         }
       }
@@ -275,7 +275,7 @@
   // GC queues, but since we hold the lock finalizer_reference_queue_ lock it also prevents this
   // race.
   MutexLock mu2(self, *Locks::reference_queue_finalizer_references_lock_);
-  if (!reference->IsEnqueued()) {
+  if (reference->IsUnprocessed()) {
     CHECK(reference->IsFinalizerReferenceInstance());
     reference->SetPendingNext(reference);
     return true;

diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index 67dcc2d..03ab9a1 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc

@@ -32,42 +32,37 @@
 void ReferenceQueue::AtomicEnqueueIfNotEnqueued(Thread* self, mirror::Reference* ref) {
   DCHECK(ref != nullptr);
   MutexLock mu(self, *lock_);
-  if (!ref->IsEnqueued()) {
-    EnqueuePendingReference(ref);
+  if (ref->IsUnprocessed()) {
+    EnqueueReference(ref);
   }
 }
 
 void ReferenceQueue::EnqueueReference(mirror::Reference* ref) {
-  CHECK(ref->IsEnqueuable());
-  EnqueuePendingReference(ref);
-}
-
-void ReferenceQueue::EnqueuePendingReference(mirror::Reference* ref) {
   DCHECK(ref != nullptr);
+  CHECK(ref->IsUnprocessed());
   if (IsEmpty()) {
     // 1 element cyclic queue, ie: Reference ref = ..; ref.pendingNext = ref;
     list_ = ref;
   } else {
     mirror::Reference* head = list_->GetPendingNext();
+    DCHECK(head != nullptr);
     ref->SetPendingNext(head);
   }
+  // Add the reference in the middle to preserve the cycle.
   list_->SetPendingNext(ref);
 }
 
 mirror::Reference* ReferenceQueue::DequeuePendingReference() {
   DCHECK(!IsEmpty());
-  mirror::Reference* head = list_->GetPendingNext();
-  DCHECK(head != nullptr);
-  mirror::Reference* ref;
+  mirror::Reference* ref = list_->GetPendingNext();
+  DCHECK(ref != nullptr);
   // Note: the following code is thread-safe because it is only called from ProcessReferences which
   // is single threaded.
-  if (list_ == head) {
-    ref = list_;
+  if (list_ == ref) {
     list_ = nullptr;
   } else {
-    mirror::Reference* next = head->GetPendingNext();
+    mirror::Reference* next = ref->GetPendingNext();
     list_->SetPendingNext(next);
-    ref = head;
   }
   ref->SetPendingNext(nullptr);
   Heap* heap = Runtime::Current()->GetHeap();
@@ -152,9 +147,7 @@
       } else {
         ref->ClearReferent<false>();
       }
-      if (ref->IsEnqueuable()) {
-        cleared_references->EnqueuePendingReference(ref);
-      }
+      cleared_references->EnqueueReference(ref);
     }
   }
 }
@@ -167,8 +160,6 @@
     if (referent_addr->AsMirrorPtr() != nullptr &&
         !collector->IsMarkedHeapReference(referent_addr)) {
       mirror::Object* forward_address = collector->MarkObject(referent_addr->AsMirrorPtr());
-      // If the referent is non-null the reference must queuable.
-      DCHECK(ref->IsEnqueuable());
       // Move the updated referent to the zombie field.
       if (Runtime::Current()->IsActiveTransaction()) {
         ref->SetZombie<true>(forward_address);

diff --git a/runtime/gc/reference_queue.h b/runtime/gc/reference_queue.h
index aabac97..04d3454 100644
--- a/runtime/gc/reference_queue.h
+++ b/runtime/gc/reference_queue.h

@@ -44,27 +44,24 @@
 class Heap;
 
 // Used to temporarily store java.lang.ref.Reference(s) during GC and prior to queueing on the
-// appropriate java.lang.ref.ReferenceQueue. The linked list is maintained in the
-// java.lang.ref.Reference objects.
+// appropriate java.lang.ref.ReferenceQueue. The linked list is maintained as an unordered,
+// circular, and singly-linked list using the pendingNext fields of the java.lang.ref.Reference
+// objects.
 class ReferenceQueue {
  public:
   explicit ReferenceQueue(Mutex* lock);
 
-  // Enqueue a reference if is not already enqueued. Thread safe to call from multiple threads
-  // since it uses a lock to avoid a race between checking for the references presence and adding
-  // it.
+  // Enqueue a reference if it is unprocessed. Thread safe to call from multiple
+  // threads since it uses a lock to avoid a race between checking for the references presence and
+  // adding it.
   void AtomicEnqueueIfNotEnqueued(Thread* self, mirror::Reference* ref)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!*lock_);
 
-  // Enqueue a reference, unlike EnqueuePendingReference, enqueue reference checks that the
-  // reference IsEnqueueable. Not thread safe, used when mutators are paused to minimize lock
-  // overhead.
+  // Enqueue a reference. The reference must be unprocessed.
+  // Not thread safe, used when mutators are paused to minimize lock overhead.
   void EnqueueReference(mirror::Reference* ref) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Enqueue a reference without checking that it is enqueable.
-  void EnqueuePendingReference(mirror::Reference* ref) SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Dequeue the first reference (returns list_).
+  // Dequeue a reference from the queue and return that dequeued reference.
   mirror::Reference* DequeuePendingReference() SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Enqueues finalizer references with white referents.  White referents are blackened, moved to

diff --git a/runtime/gc/reference_queue_test.cc b/runtime/gc/reference_queue_test.cc
index dc23afe..35bf718 100644
--- a/runtime/gc/reference_queue_test.cc
+++ b/runtime/gc/reference_queue_test.cc

@@ -41,19 +41,22 @@
   ASSERT_TRUE(ref1.Get() != nullptr);
   auto ref2(hs.NewHandle(ref_class->AllocObject(self)->AsReference()));
   ASSERT_TRUE(ref2.Get() != nullptr);
-  // FIFO ordering.
-  queue.EnqueuePendingReference(ref1.Get());
+  queue.EnqueueReference(ref1.Get());
   ASSERT_TRUE(!queue.IsEmpty());
   ASSERT_EQ(queue.GetLength(), 1U);
-  queue.EnqueuePendingReference(ref2.Get());
+  queue.EnqueueReference(ref2.Get());
   ASSERT_TRUE(!queue.IsEmpty());
   ASSERT_EQ(queue.GetLength(), 2U);
-  ASSERT_EQ(queue.DequeuePendingReference(), ref2.Get());
+
+  std::set<mirror::Reference*> refs = {ref1.Get(), ref2.Get()};
+  std::set<mirror::Reference*> dequeued;
+  dequeued.insert(queue.DequeuePendingReference());
   ASSERT_TRUE(!queue.IsEmpty());
   ASSERT_EQ(queue.GetLength(), 1U);
-  ASSERT_EQ(queue.DequeuePendingReference(), ref1.Get());
+  dequeued.insert(queue.DequeuePendingReference());
   ASSERT_EQ(queue.GetLength(), 0U);
   ASSERT_TRUE(queue.IsEmpty());
+  ASSERT_EQ(refs, dequeued);
 }
 
 TEST_F(ReferenceQueueTest, Dump) {
@@ -75,9 +78,9 @@
   ASSERT_TRUE(ref1.Get() != nullptr);
   auto ref2(hs.NewHandle(finalizer_ref_class->AllocObject(self)->AsReference()));
   ASSERT_TRUE(ref2.Get() != nullptr);
-  queue.EnqueuePendingReference(ref1.Get());
+  queue.EnqueueReference(ref1.Get());
   queue.Dump(LOG(INFO));
-  queue.EnqueuePendingReference(ref2.Get());
+  queue.EnqueueReference(ref2.Get());
   queue.Dump(LOG(INFO));
 }
 

diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 998db52..9269339 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc

@@ -867,20 +867,20 @@
     if (obj->IsClass<kVerifyNone, kWithoutReadBarrier>()) {
       mirror::Class* klass = obj->AsClass<kVerifyNone, kWithoutReadBarrier>();
       FixupObjectAdapter visitor(boot_image_, boot_oat_, app_image_, app_oat_);
-      klass->FixupNativePointers(klass, sizeof(void*), visitor);
+      klass->FixupNativePointers<kVerifyNone, kWithoutReadBarrier>(klass, sizeof(void*), visitor);
       // Deal with the arrays.
       mirror::PointerArray* vtable = klass->GetVTable<kVerifyNone, kWithoutReadBarrier>();
       if (vtable != nullptr) {
-        vtable->Fixup(vtable, sizeof(void*), visitor);
+        vtable->Fixup<kVerifyNone, kWithoutReadBarrier>(vtable, sizeof(void*), visitor);
       }
       mirror::IfTable* iftable = klass->GetIfTable<kVerifyNone, kWithoutReadBarrier>();
       if (iftable != nullptr) {
-        for (int32_t i = 0; i < klass->GetIfTableCount(); ++i) {
-          if (iftable->GetMethodArrayCount(i) > 0) {
+        for (int32_t i = 0, count = iftable->Count(); i < count; ++i) {
+          if (iftable->GetMethodArrayCount<kVerifyNone, kWithoutReadBarrier>(i) > 0) {
             mirror::PointerArray* methods =
                 iftable->GetMethodArray<kVerifyNone, kWithoutReadBarrier>(i);
             DCHECK(methods != nullptr);
-            methods->Fixup(methods, sizeof(void*), visitor);
+            methods->Fixup<kVerifyNone, kWithoutReadBarrier>(methods, sizeof(void*), visitor);
           }
         }
       }
@@ -925,7 +925,7 @@
     if (fixup_heap_objects_) {
       method->UpdateObjectsForImageRelocation(ForwardObjectAdapter(this));
     }
-    method->UpdateEntrypoints(ForwardCodeAdapter(this));
+    method->UpdateEntrypoints<kWithoutReadBarrier>(ForwardCodeAdapter(this));
   }
 
  private:
@@ -1014,6 +1014,7 @@
     // Nothing to fix up.
     return true;
   }
+  ScopedDebugDisallowReadBarriers sddrb(Thread::Current());
   // Need to update the image to be at the target base.
   const ImageSection& objects_section = image_header.GetImageSection(ImageHeader::kSectionObjects);
   uintptr_t objects_begin = reinterpret_cast<uintptr_t>(target_base + objects_section.Offset());
@@ -1039,7 +1040,7 @@
     CHECK_EQ(image_header.GetImageBegin(), target_base);
     // Fix up dex cache DexFile pointers.
     auto* dex_caches = image_header.GetImageRoot<kWithoutReadBarrier>(ImageHeader::kDexCaches)->
-        AsObjectArray<mirror::DexCache>();
+        AsObjectArray<mirror::DexCache, kVerifyNone, kWithoutReadBarrier>();
     for (int32_t i = 0, count = dex_caches->GetLength(); i < count; ++i) {
       mirror::DexCache* dex_cache = dex_caches->Get<kVerifyNone, kWithoutReadBarrier>(i);
       // Fix up dex cache pointers.
@@ -1047,7 +1048,7 @@
       if (strings != nullptr) {
         GcRoot<mirror::String>* new_strings = fixup_adapter.ForwardObject(strings);
         if (strings != new_strings) {
-          dex_cache->SetFieldPtr64<false>(mirror::DexCache::StringsOffset(), new_strings);
+          dex_cache->SetStrings(new_strings);
         }
         dex_cache->FixupStrings<kWithoutReadBarrier>(new_strings, fixup_adapter);
       }
@@ -1055,7 +1056,7 @@
       if (types != nullptr) {
         GcRoot<mirror::Class>* new_types = fixup_adapter.ForwardObject(types);
         if (types != new_types) {
-          dex_cache->SetFieldPtr64<false>(mirror::DexCache::ResolvedTypesOffset(), new_types);
+          dex_cache->SetResolvedTypes(new_types);
         }
         dex_cache->FixupResolvedTypes<kWithoutReadBarrier>(new_types, fixup_adapter);
       }
@@ -1063,7 +1064,7 @@
       if (methods != nullptr) {
         ArtMethod** new_methods = fixup_adapter.ForwardObject(methods);
         if (methods != new_methods) {
-          dex_cache->SetFieldPtr64<false>(mirror::DexCache::ResolvedMethodsOffset(), new_methods);
+          dex_cache->SetResolvedMethods(new_methods);
         }
         for (size_t j = 0, num = dex_cache->NumResolvedMethods(); j != num; ++j) {
           ArtMethod* orig = mirror::DexCache::GetElementPtrSize(new_methods, j, sizeof(void*));
@@ -1077,7 +1078,7 @@
       if (fields != nullptr) {
         ArtField** new_fields = fixup_adapter.ForwardObject(fields);
         if (fields != new_fields) {
-          dex_cache->SetFieldPtr64<false>(mirror::DexCache::ResolvedFieldsOffset(), new_fields);
+          dex_cache->SetResolvedFields(new_fields);
         }
         for (size_t j = 0, num = dex_cache->NumResolvedFields(); j != num; ++j) {
           ArtField* orig = mirror::DexCache::GetElementPtrSize(new_fields, j, sizeof(void*));

diff --git a/runtime/image.cc b/runtime/image.cc
index de00343..1f54e3e 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc

@@ -24,7 +24,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '2', '6', '\0' };
+const uint8_t ImageHeader::kImageVersion[] = { '0', '2', '7', '\0' };
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,

diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index c57b1bb..7484635 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc

@@ -55,7 +55,7 @@
   explicit InstallStubsClassVisitor(Instrumentation* instrumentation)
       : instrumentation_(instrumentation) {}
 
-  bool Visit(mirror::Class* klass) OVERRIDE REQUIRES(Locks::mutator_lock_) {
+  bool operator()(mirror::Class* klass) OVERRIDE REQUIRES(Locks::mutator_lock_) {
     instrumentation_->InstallStubsForClass(klass);
     return true;  // we visit all classes.
   }

diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 56aeefc..e3cbf53 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h

@@ -290,6 +290,14 @@
   bool IsActive() const SHARED_REQUIRES(Locks::mutator_lock_) {
     return have_dex_pc_listeners_ || have_method_entry_listeners_ || have_method_exit_listeners_ ||
         have_field_read_listeners_ || have_field_write_listeners_ ||
+        have_exception_caught_listeners_ || have_method_unwind_listeners_ ||
+        have_branch_listeners_ || have_invoke_virtual_or_interface_listeners_;
+  }
+
+  // Any instrumentation *other* than what is needed for Jit profiling active?
+  bool NonJitProfilingActive() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    return have_dex_pc_listeners_ || have_method_exit_listeners_ ||
+        have_field_read_listeners_ || have_field_write_listeners_ ||
         have_exception_caught_listeners_ || have_method_unwind_listeners_;
   }
 

diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 2559222..01498a2 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc

@@ -240,7 +240,7 @@
 }
 
 #if !defined(__clang__)
-#if (defined(__arm__) || defined(__i386__))
+#if (defined(__arm__) || defined(__i386__) || defined(__aarch64__))
 // TODO: remove when all targets implemented.
 static constexpr InterpreterImplKind kInterpreterImplKind = kMterpImplKind;
 #else
@@ -248,7 +248,7 @@
 #endif
 #else
 // Clang 3.4 fails to build the goto interpreter implementation.
-#if (defined(__arm__) || defined(__i386__))
+#if (defined(__arm__) || defined(__i386__) || defined(__aarch64__))
 static constexpr InterpreterImplKind kInterpreterImplKind = kMterpImplKind;
 #else
 static constexpr InterpreterImplKind kInterpreterImplKind = kSwitchImplKind;
@@ -322,8 +322,14 @@
         const instrumentation::Instrumentation* const instrumentation =
             Runtime::Current()->GetInstrumentation();
         while (true) {
-          if (instrumentation->IsActive() || !Runtime::Current()->IsStarted()) {
-            // TODO: allow JIT profiling instrumentation.  Now, just punt on all instrumentation.
+          // Mterp does not support all instrumentation.
+          bool unhandled_instrumentation;
+          if ((kRuntimeISA == kArm64) || (kRuntimeISA == kArm)) {
+            unhandled_instrumentation = instrumentation->NonJitProfilingActive();
+          } else {
+            unhandled_instrumentation = instrumentation->IsActive();
+          }
+          if (unhandled_instrumentation || !Runtime::Current()->IsStarted()) {
 #if !defined(__clang__)
             return ExecuteGotoImpl<false, false>(self, code_item, shadow_frame, result_register);
 #else

diff --git a/runtime/interpreter/mterp/arm/bincmp.S b/runtime/interpreter/mterp/arm/bincmp.S
index 474bc3c..dae3b57 100644
--- a/runtime/interpreter/mterp/arm/bincmp.S
+++ b/runtime/interpreter/mterp/arm/bincmp.S

@@ -25,10 +25,18 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    mov${revcmp} r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    mov${revcmp} rINST, #2              @ rINST<- BYTE branch dist for not-taken
+#if MTERP_PROFILE_BRANCHES
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST

diff --git a/runtime/interpreter/mterp/arm/footer.S b/runtime/interpreter/mterp/arm/footer.S
index 1dba856..a444843 100644
--- a/runtime/interpreter/mterp/arm/footer.S
+++ b/runtime/interpreter/mterp/arm/footer.S

@@ -124,6 +124,18 @@
     GOTO_OPCODE ip                      @ jump to next instruction
 
 /*
+ * On-stack replacement pending.
+ * Branch offset in rINST on entry.
+ */
+MterpOnStackReplacement:
+#if MTERP_LOGGING
+    mov r0, rSELF
+    add r1, rFP, #OFF_FP_SHADOWFRAME
+    mov r2, rINST
+    bl MterpLogOSR
+#endif
+    b MterpFallback                     @ Let the reference interpreter deal with it.
+/*
  * Bail out to reference interpreter.
  */
 MterpFallback:

diff --git a/runtime/interpreter/mterp/arm/header.S b/runtime/interpreter/mterp/arm/header.S
index 14319d9..b67bd22 100644
--- a/runtime/interpreter/mterp/arm/header.S
+++ b/runtime/interpreter/mterp/arm/header.S

@@ -85,6 +85,8 @@
  */
 #include "asm_support.h"
 
+#define MTERP_PROFILE_BRANCHES 1
+
 /* During bringup, we'll use the shadow frame model instead of rFP */
 /* single-purpose registers, given names for clarity */
 #define rPC     r4

diff --git a/runtime/interpreter/mterp/arm/op_goto.S b/runtime/interpreter/mterp/arm/op_goto.S
index 9b3632a..0f02438 100644
--- a/runtime/interpreter/mterp/arm/op_goto.S
+++ b/runtime/interpreter/mterp/arm/op_goto.S

@@ -16,10 +16,18 @@
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     mov     r0, rINST, lsl #16          @ r0<- AAxx0000
-    movs    r1, r0, asr #24             @ r1<- ssssssAA (sign-extended)
-    add     r2, r1, r1                  @ r2<- byte offset, set flags
+    movs    rINST, r0, asr #24          @ rINST<- ssssssAA (sign-extended)
+#if MTERP_PROFILE_BRANCHES
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    add     r2, rINST, rINST            @ r2<- byte offset, set flags
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
        @ If backwards branch refresh rIBASE
     bmi     MterpCheckSuspendAndContinue

diff --git a/runtime/interpreter/mterp/arm/op_goto_16.S b/runtime/interpreter/mterp/arm/op_goto_16.S
index 2231acd..8a9acf0 100644
--- a/runtime/interpreter/mterp/arm/op_goto_16.S
+++ b/runtime/interpreter/mterp/arm/op_goto_16.S

@@ -13,9 +13,17 @@
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
-    FETCH_S r0, 1                       @ r0<- ssssAAAA (sign-extended)
+    FETCH_S rINST, 1                    @ rINST<- ssssAAAA (sign-extended)
+#if MTERP_PROFILE_BRANCHES
+    mov     r0, rINST
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpOnStackReplacement     @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, r0, r0                  @ r1<- byte offset, flags set
+    adds    r1, rINST, rINST            @ r1<- byte offset, flags set
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST

diff --git a/runtime/interpreter/mterp/arm/op_goto_32.S b/runtime/interpreter/mterp/arm/op_goto_32.S
index 6b72ff5..51a6f06 100644
--- a/runtime/interpreter/mterp/arm/op_goto_32.S
+++ b/runtime/interpreter/mterp/arm/op_goto_32.S

@@ -22,9 +22,17 @@
 #else
     FETCH r0, 1                         @ r0<- aaaa (lo)
     FETCH r1, 2                         @ r1<- AAAA (hi)
+    orr     rINST, r0, r1, lsl #16      @ rINST<- AAAAaaaa
+#if MTERP_PROFILE_BRANCHES
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpOnStackReplacement     @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    orr     r0, r0, r1, lsl #16         @ r0<- AAAAaaaa
-    adds    r1, r0, r0                  @ r1<- byte offset
+    adds    r1, rINST, rINST            @ r1<- byte offset
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST

diff --git a/runtime/interpreter/mterp/arm/op_packed_switch.S b/runtime/interpreter/mterp/arm/op_packed_switch.S
index 1e3370e..109b245 100644
--- a/runtime/interpreter/mterp/arm/op_packed_switch.S
+++ b/runtime/interpreter/mterp/arm/op_packed_switch.S

@@ -30,8 +30,17 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      $func                       @ r0<- code-unit branch offset
+    mov     rINST, r0
+#if MTERP_PROFILE_BRANCHES
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, r0, r0                  @ r1<- byte offset; clear V
+    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST

diff --git a/runtime/interpreter/mterp/arm/zcmp.S b/runtime/interpreter/mterp/arm/zcmp.S
index 6e9ef55..b2cc18b 100644
--- a/runtime/interpreter/mterp/arm/zcmp.S
+++ b/runtime/interpreter/mterp/arm/zcmp.S

@@ -20,11 +20,19 @@
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    mov${revcmp} r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    mov${revcmp} rINST, #2              @ rINST<- inst branch dist for not-taken
+#if MTERP_PROFILE_BRANCHES
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST

diff --git a/runtime/interpreter/mterp/arm64/alt_stub.S b/runtime/interpreter/mterp/arm64/alt_stub.S
new file mode 100644
index 0000000..9b8b16d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/alt_stub.S

@@ -0,0 +1,12 @@
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (${opnum} * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.

diff --git a/runtime/interpreter/mterp/arm64/bincmp.S b/runtime/interpreter/mterp/arm64/bincmp.S
new file mode 100644
index 0000000..53d4dfa
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/bincmp.S

@@ -0,0 +1,44 @@
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+#if MTERP_SUSPEND
+    mov     w1, wINST, lsr #12          // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    cmp     w2, w3                      // compare (vA, vB)
+    mov${condition} w1, #2                 // w1<- BYTE branch dist for not-taken
+    adds    w2, w1, w1                  // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    mov     w0, #2                      // Offset if branch not taken
+    cmp     w2, w3                      // compare (vA, vB)
+    csel    wINST, w1, w0, ${condition} // Branch if true, stashing result in callee save reg.
+#if MTERP_PROFILE_BRANCHES
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif

diff --git a/runtime/interpreter/mterp/arm64/binop.S b/runtime/interpreter/mterp/arm64/binop.S
new file mode 100644
index 0000000..b629b0b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/binop.S

@@ -0,0 +1,33 @@
+%default {"preinstr":"", "result":"w0", "chkzero":"0"}
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if $chkzero
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    $preinstr                           // optional op; may set condition codes
+    $instr                              // $result<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG $result, w9                // vAA<- $result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */

diff --git a/runtime/interpreter/mterp/arm64/binop2addr.S b/runtime/interpreter/mterp/arm64/binop2addr.S
new file mode 100644
index 0000000..a480a7d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/binop2addr.S

@@ -0,0 +1,30 @@
+%default {"preinstr":"", "result":"w0", "chkzero":"0"}
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if $chkzero
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    $preinstr                           // optional op; may set condition codes
+    $instr                              // $result<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG $result, w9                // vAA<- $result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */

diff --git a/runtime/interpreter/mterp/arm64/binopLit16.S b/runtime/interpreter/mterp/arm64/binopLit16.S
new file mode 100644
index 0000000..4f9d205
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/binopLit16.S

@@ -0,0 +1,28 @@
+%default {"preinstr":"", "result":"w0", "chkzero":"0"}
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S w1, 1                       // w1<- ssssCCCC (sign-extended)
+    lsr     w2, wINST, #12              // w2<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w0, w2                     // w0<- vB
+    .if $chkzero
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    $preinstr
+    $instr                              // $result<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG $result, w9                // vAA<- $result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */

diff --git a/runtime/interpreter/mterp/arm64/binopLit8.S b/runtime/interpreter/mterp/arm64/binopLit8.S
new file mode 100644
index 0000000..326c657
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/binopLit8.S

@@ -0,0 +1,30 @@
+%default {"preinstr":"", "result":"w0", "chkzero":"0"}
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    .if $chkzero
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    $preinstr                           // optional op; may set condition codes
+    $instr                              // $result<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG $result, w9                // vAA<- $result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */

diff --git a/runtime/interpreter/mterp/arm64/binopWide.S b/runtime/interpreter/mterp/arm64/binopWide.S
new file mode 100644
index 0000000..9de24f1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/binopWide.S

@@ -0,0 +1,30 @@
+%default {"preinstr":"", "instr":"add x0, x1, x2", "result":"x0", "r1":"x1", "r2":"x2", "chkzero":"0"}
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE $r2, w2               // w2<- vCC
+    GET_VREG_WIDE $r1, w1               // w1<- vBB
+    .if $chkzero
+    cbz     $r2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    $preinstr
+    $instr                              // $result<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE $result, w4           // vAA<- $result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */

diff --git a/runtime/interpreter/mterp/arm64/binopWide2addr.S b/runtime/interpreter/mterp/arm64/binopWide2addr.S
new file mode 100644
index 0000000..d9927a2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/binopWide2addr.S

@@ -0,0 +1,29 @@
+%default {"preinstr":"", "instr":"add x0, x0, x1", "r0":"x0", "r1":"x1", "chkzero":"0"}
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE $r1, w1               // x1<- vB
+    GET_VREG_WIDE $r0, w2               // x0<- vA
+    .if $chkzero
+    cbz     $r1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    $preinstr
+    $instr                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE $r0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */

diff --git a/runtime/interpreter/mterp/arm64/entry.S b/runtime/interpreter/mterp/arm64/entry.S
new file mode 100644
index 0000000..f9073ab
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/entry.S

@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+    .text
+
+/*
+ * Interpreter entry point.
+ * On entry:
+ *  x0  Thread* self/
+ *  x1  code_item
+ *  x2  ShadowFrame
+ *  x3  JValue* result_register
+ *
+ */
+    .global ExecuteMterpImpl
+    .type   ExecuteMterpImpl, %function
+    .balign 16
+
+ExecuteMterpImpl:
+    .cfi_startproc
+    stp     xIBASE, xREFS, [sp, #-64]!
+    stp     xSELF, xINST, [sp, #16]
+    stp     xPC, xFP, [sp, #32]
+    stp     fp, lr, [sp, #48]
+    add     fp, sp, #48
+
+    /* Remember the return register */
+    str     x3, [x2, #SHADOWFRAME_RESULT_REGISTER_OFFSET]
+
+    /* Remember the code_item */
+    str     x1, [x2, #SHADOWFRAME_CODE_ITEM_OFFSET]
+
+    /* set up "named" registers */
+    mov     xSELF, x0
+    ldr     w0, [x2, #SHADOWFRAME_NUMBER_OF_VREGS_OFFSET]
+    add     xFP, x2, #SHADOWFRAME_VREGS_OFFSET     // point to insns[] (i.e. - the dalivk byte code).
+    add     xREFS, xFP, w0, lsl #2                 // point to reference array in shadow frame
+    ldr     w0, [x2, #SHADOWFRAME_DEX_PC_OFFSET]   // Get starting dex_pc.
+    add     xPC, x1, #CODEITEM_INSNS_OFFSET        // Point to base of insns[]
+    add     xPC, xPC, w0, lsl #1                   // Create direct pointer to 1st dex opcode
+    EXPORT_PC
+
+    /* Starting ibase */
+    ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
+
+    /* start executing the instruction at rPC */
+    FETCH_INST                          // load wINST from rPC
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* NOTE: no fallthrough */

diff --git a/runtime/interpreter/mterp/arm64/fallback.S b/runtime/interpreter/mterp/arm64/fallback.S
new file mode 100644
index 0000000..44e7e12
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/fallback.S

@@ -0,0 +1,3 @@
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
+

diff --git a/runtime/interpreter/mterp/arm64/fbinop.S b/runtime/interpreter/mterp/arm64/fbinop.S
new file mode 100644
index 0000000..926d078
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/fbinop.S

@@ -0,0 +1,19 @@
+%default {}
+    /*:
+     * Generic 32-bit floating-point operation.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     * form: <op> s0, s0, s1
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH w0, 1                         // r0<- CCBB
+    lsr     w1, w0, #8                  // r2<- CC
+    and     w0, w0, #255                // r1<- BB
+    GET_VREG  s1, w1
+    GET_VREG  s0, w0
+    $instr                              // s0<- op
+    lsr     w1, wINST, #8               // r1<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG  s0, w1
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/fbinop2addr.S b/runtime/interpreter/mterp/arm64/fbinop2addr.S
new file mode 100644
index 0000000..0d57cbf
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/fbinop2addr.S

@@ -0,0 +1,18 @@
+    /*
+     * Generic 32-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "s2 = s0 op s1".
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w9, wINST, #8               // w9<- A+
+    and     w9, w9, #15                 // w9<- A
+    GET_VREG s1, w3
+    GET_VREG s0, w9
+    $instr                              // s2<- op
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG s2, w9
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/fcmp.S b/runtime/interpreter/mterp/arm64/fcmp.S
new file mode 100644
index 0000000..a45e789
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/fcmp.S

@@ -0,0 +1,20 @@
+%default {"wide":"", "r1":"s1", "r2":"s2", "default_val":"-1","cond":"le"}
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG$wide $r1, w2
+    GET_VREG$wide $r2, w3
+    mov     w0, #$default_val
+    fcmp $r1, $r2
+    csneg w0, w0, w0, $cond
+    csel w0, wzr, w0, eq
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w4                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/footer.S b/runtime/interpreter/mterp/arm64/footer.S
new file mode 100644
index 0000000..d237c51
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/footer.S

@@ -0,0 +1,183 @@
+/*
+ * ===========================================================================
+ *  Common subroutines and data
+ * ===========================================================================
+ */
+
+
+/*
+ * We've detected a condition that will result in an exception, but the exception
+ * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
+ * TUNING: for consistency, we may want to just go ahead and handle these here.
+ */
+#define MTERP_LOGGING 0
+common_errDivideByZero:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogDivideByZeroException
+#endif
+    b MterpCommonFallback
+
+common_errArrayIndex:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogArrayIndexException
+#endif
+    b MterpCommonFallback
+
+common_errNegativeArraySize:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNegativeArraySizeException
+#endif
+    b MterpCommonFallback
+
+common_errNoSuchMethod:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNoSuchMethodException
+#endif
+    b MterpCommonFallback
+
+common_errNullObject:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNullObjectException
+#endif
+    b MterpCommonFallback
+
+common_exceptionThrown:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogExceptionThrownException
+#endif
+    b MterpCommonFallback
+
+MterpSuspendFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    ldr  x2, [xSELF, #THREAD_FLAGS_OFFSET]
+    bl MterpLogSuspendFallback
+#endif
+    b MterpCommonFallback
+
+/*
+ * If we're here, something is out of the ordinary.  If there is a pending
+ * exception, handle it.  Otherwise, roll back and retry with the reference
+ * interpreter.
+ */
+MterpPossibleException:
+    ldr     x0, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    cbz     x0, MterpFallback                       // If not, fall back to reference interpreter.
+    /* intentional fallthrough - handle pending exception. */
+/*
+ * On return from a runtime helper routine, we've found a pending exception.
+ * Can we handle it here - or need to bail out to caller?
+ *
+ */
+MterpException:
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    bl      MterpHandleException                    // (self, shadow_frame)
+    cbz     w0, MterpExceptionReturn                // no local catch, back to caller.
+    ldr     x0, [xFP, #OFF_FP_CODE_ITEM]
+    ldr     w1, [xFP, #OFF_FP_DEX_PC]
+    ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
+    add     xPC, x0, #CODEITEM_INSNS_OFFSET
+    add     xPC, xPC, x1, lsl #1                    // generate new dex_pc_ptr
+    str     xPC, [xFP, #OFF_FP_DEX_PC_PTR]
+    /* resume execution at catch block */
+    FETCH_INST
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+    /* NOTE: no fallthrough */
+
+/*
+ * Check for suspend check request.  Assumes wINST already loaded, xPC advanced and
+ * still needs to get the opcode and branch to it, and flags are in lr.
+ */
+MterpCheckSuspendAndContinue:
+    ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh xIBASE
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    check1
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+check1:
+    EXPORT_PC
+    mov     x0, xSELF
+    bl      MterpSuspendCheck           // (self)
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/*
+ * On-stack replacement pending.
+ * Branch offset in wINST on entry.
+ */
+MterpOnStackReplacement:
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm x2, xINST, 0, 31
+    bl MterpLogOSR
+#endif
+    b MterpFallback                     // Let the reference interpreter deal with it.
+
+/*
+ * Bail out to reference interpreter.
+ */
+MterpFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogFallback
+#endif
+MterpCommonFallback:
+    mov     x0, #0                                  // signal retry with reference interpreter.
+    b       MterpDone
+
+/*
+ * We pushed some registers on the stack in ExecuteMterpImpl, then saved
+ * SP and LR.  Here we restore SP, restore the registers, and then restore
+ * LR to PC.
+ *
+ * On entry:
+ *  uint32_t* xFP  (should still be live, pointer to base of vregs)
+ */
+MterpExceptionReturn:
+    mov     x0, #1                                  // signal return to caller.
+    b MterpDone
+MterpReturn:
+    ldr     x2, [xFP, #OFF_FP_RESULT_REGISTER]
+    ldr     lr, [xSELF, #THREAD_FLAGS_OFFSET]
+    str     x0, [x2]
+    mov     x0, xSELF
+    ands    lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.eq    check2
+    bl      MterpSuspendCheck                       // (self)
+check2:
+    mov     x0, #1                                  // signal return to caller.
+MterpDone:
+    ldp     fp, lr, [sp, #48]
+    ldp     xPC, xFP, [sp, #32]
+    ldp     xSELF, xINST, [sp, #16]
+    ldp     xIBASE, xREFS, [sp], #64
+    ret
+
+    .cfi_endproc
+    .size   ExecuteMterpImpl, .-ExecuteMterpImpl
+

diff --git a/runtime/interpreter/mterp/arm64/funopNarrow.S b/runtime/interpreter/mterp/arm64/funopNarrow.S
new file mode 100644
index 0000000..9f5ad1e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/funopNarrow.S

@@ -0,0 +1,18 @@
+%default {"srcreg":"s0", "tgtreg":"d0"}
+    /*
+     * Generic 32bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "$tgtreg = op $srcreg".
+     *
+     * For: int-to-float, float-to-int
+     * TODO: refactor all of the conversions - parameterize width and use same template.
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w4, wINST, #8               // w4<- A+
+    GET_VREG $srcreg, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    and     w4, w4, #15                 // w4<- A
+    $instr                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG $tgtreg, w4                // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/funopNarrower.S b/runtime/interpreter/mterp/arm64/funopNarrower.S
new file mode 100644
index 0000000..411396b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/funopNarrower.S

@@ -0,0 +1,17 @@
+%default {"srcreg":"s0", "tgtreg":"d0"}
+    /*
+     * Generic 64bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "$tgtreg = op $srcreg".
+     *
+     * For: int-to-double, float-to-double, float-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w4, wINST, #8               // w4<- A+
+    GET_VREG_WIDE $srcreg, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    and     w4, w4, #15                 // w4<- A
+    $instr                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG $tgtreg, w4                // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/funopWide.S b/runtime/interpreter/mterp/arm64/funopWide.S
new file mode 100644
index 0000000..d83b39c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/funopWide.S

@@ -0,0 +1,17 @@
+%default {"srcreg":"s0", "tgtreg":"d0"}
+    /*
+     * Generic 64bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "$tgtreg = op $srcreg".
+     *
+     * For: long-to-double, double-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w4, wINST, #8               // w4<- A+
+    GET_VREG_WIDE $srcreg, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    and     w4, w4, #15                 // w4<- A
+    $instr                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE $tgtreg, w4           // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/funopWider.S b/runtime/interpreter/mterp/arm64/funopWider.S
new file mode 100644
index 0000000..50a73f1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/funopWider.S

@@ -0,0 +1,17 @@
+%default {"srcreg":"s0", "tgtreg":"d0"}
+    /*
+     * Generic 32bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "$tgtreg = op $srcreg".
+     *
+     * For: int-to-double, float-to-double, float-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w4, wINST, #8               // w4<- A+
+    GET_VREG $srcreg, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    and     w4, w4, #15                 // w4<- A
+    $instr                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE $tgtreg, w4           // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/header.S b/runtime/interpreter/mterp/arm64/header.S
new file mode 100644
index 0000000..622abc8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/header.S

@@ -0,0 +1,290 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+  Art assembly interpreter notes:
+
+  First validate assembly code by implementing ExecuteXXXImpl() style body (doesn't
+  handle invoke, allows higher-level code to create frame & shadow frame.
+
+  Once that's working, support direct entry code & eliminate shadow frame (and
+  excess locals allocation.
+
+  Some (hopefully) temporary ugliness.  We'll treat xFP as pointing to the
+  base of the vreg array within the shadow frame.  Access the other fields,
+  dex_pc_, method_ and number_of_vregs_ via negative offsets.  For now, we'll continue
+  the shadow frame mechanism of double-storing object references - via xFP &
+  number_of_vregs_.
+
+ */
+
+/*
+ARM64 Runtime register usage conventions.
+
+  r0     : w0 is 32-bit return register and x0 is 64-bit.
+  r0-r7  : Argument registers.
+  r8-r15 : Caller save registers (used as temporary registers).
+  r16-r17: Also known as ip0-ip1, respectively. Used as scratch registers by
+           the linker, by the trampolines and other stubs (the backend uses
+           these as temporary registers).
+  r18    : Caller save register (used as temporary register).
+  r19    : Pointer to thread-local storage.
+  r20-r29: Callee save registers.
+  r30    : (lr) is reserved (the link register).
+  rsp    : (sp) is reserved (the stack pointer).
+  rzr    : (zr) is reserved (the zero register).
+
+  Floating-point registers
+  v0-v31
+
+  v0     : s0 is return register for singles (32-bit) and d0 for doubles (64-bit).
+           This is analogous to the C/C++ (hard-float) calling convention.
+  v0-v7  : Floating-point argument registers in both Dalvik and C/C++ conventions.
+           Also used as temporary and codegen scratch registers.
+
+  v0-v7 and v16-v31 : trashed across C calls.
+  v8-v15 : bottom 64-bits preserved across C calls (d8-d15 are preserved).
+
+  v16-v31: Used as codegen temp/scratch.
+  v8-v15 : Can be used for promotion.
+
+  Must maintain 16-byte stack alignment.
+
+Mterp notes:
+
+The following registers have fixed assignments:
+
+  reg nick      purpose
+  x20  xPC       interpreted program counter, used for fetching instructions
+  x21  xFP       interpreted frame pointer, used for accessing locals and args
+  x22  xSELF     self (Thread) pointer
+  x23  xINST     first 16-bit code unit of current instruction
+  x24  xIBASE    interpreted instruction base pointer, used for computed goto
+  x25  xREFS     base of object references in shadow frame  (ideally, we'll get rid of this later).
+  x16  ip        scratch reg
+  x17  ip2       scratch reg (used by macros)
+
+Macros are provided for common operations.  They MUST NOT alter unspecified registers or condition
+codes.
+*/
+
+/*
+ * This is a #include, not a %include, because we want the C pre-processor
+ * to expand the macros into assembler assignment statements.
+ */
+#include "asm_support.h"
+
+#define MTERP_PROFILE_BRANCHES 1
+
+/* During bringup, we'll use the shadow frame model instead of xFP */
+/* single-purpose registers, given names for clarity */
+#define xPC     x20
+#define xFP     x21
+#define xSELF   x22
+#define xINST   x23
+#define wINST   w23
+#define xIBASE  x24
+#define xREFS   x25
+#define ip      x16
+#define ip2     x17
+
+/*
+ * Instead of holding a pointer to the shadow frame, we keep xFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+
+/*
+ *
+ * The reference interpreter performs explicit suspect checks, which is somewhat wasteful.
+ * Dalvik's interpreter folded suspend checks into the jump table mechanism, and eventually
+ * mterp should do so as well.
+ */
+#define MTERP_SUSPEND 0
+
+/*
+ * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
+ * be done *before* something throws.
+ *
+ * It's okay to do this more than once.
+ *
+ * NOTE: the fast interpreter keeps track of dex pc as a direct pointer to the mapped
+ * dex byte codes.  However, the rest of the runtime expects dex pc to be an instruction
+ * offset into the code_items_[] array.  For effiency, we will "export" the
+ * current dex pc as a direct pointer using the EXPORT_PC macro, and rely on GetDexPC
+ * to convert to a dex pc when needed.
+ */
+.macro EXPORT_PC
+    str  xPC, [xFP, #OFF_FP_DEX_PC_PTR]
+.endm
+
+/*
+ * Fetch the next instruction from xPC into wINST.  Does not advance xPC.
+ */
+.macro FETCH_INST
+    ldrh    wINST, [xPC]
+.endm
+
+/*
+ * Fetch the next instruction from the specified offset.  Advances xPC
+ * to point to the next instruction.  "_count" is in 16-bit code units.
+ *
+ * Because of the limited size of immediate constants on ARM, this is only
+ * suitable for small forward movements (i.e. don't try to implement "goto"
+ * with this).
+ *
+ * This must come AFTER anything that can throw an exception, or the
+ * exception catch may miss.  (This also implies that it must come after
+ * EXPORT_PC.)
+ */
+.macro FETCH_ADVANCE_INST count
+    ldrh    wINST, [xPC, #((\count)*2)]!
+.endm
+
+/*
+ * The operation performed here is similar to FETCH_ADVANCE_INST, except the
+ * src and dest registers are parameterized (not hard-wired to xPC and xINST).
+ */
+.macro PREFETCH_ADVANCE_INST dreg, sreg, count
+    ldrh    \dreg, [\sreg, #((\count)*2)]!
+.endm
+
+/*
+ * Similar to FETCH_ADVANCE_INST, but does not update xPC.  Used to load
+ * xINST ahead of possible exception point.  Be sure to manually advance xPC
+ * later.
+ */
+.macro PREFETCH_INST count
+    ldrh    wINST, [xPC, #((\count)*2)]
+.endm
+
+/* Advance xPC by some number of code units. */
+.macro ADVANCE count
+  add  xPC, xPC, #((\count)*2)
+.endm
+
+/*
+ * Fetch the next instruction from an offset specified by _reg and advance xPC.
+ * xPC to point to the next instruction.  "_reg" must specify the distance
+ * in bytes, *not* 16-bit code units, and may be a signed value.  Must not set flags.
+ *
+ */
+.macro FETCH_ADVANCE_INST_RB reg
+    add     xPC, xPC, \reg, sxtw
+    ldrh    wINST, [xPC]
+.endm
+
+/*
+ * Fetch a half-word code unit from an offset past the current PC.  The
+ * "_count" value is in 16-bit code units.  Does not advance xPC.
+ *
+ * The "_S" variant works the same but treats the value as signed.
+ */
+.macro FETCH reg, count
+    ldrh    \reg, [xPC, #((\count)*2)]
+.endm
+
+.macro FETCH_S reg, count
+    ldrsh   \reg, [xPC, #((\count)*2)]
+.endm
+
+/*
+ * Fetch one byte from an offset past the current PC.  Pass in the same
+ * "_count" as you would for FETCH, and an additional 0/1 indicating which
+ * byte of the halfword you want (lo/hi).
+ */
+.macro FETCH_B reg, count, byte
+    ldrb     \reg, [xPC, #((\count)*2+(\byte))]
+.endm
+
+/*
+ * Put the instruction's opcode field into the specified register.
+ */
+.macro GET_INST_OPCODE reg
+    and     \reg, xINST, #255
+.endm
+
+/*
+ * Put the prefetched instruction's opcode field into the specified register.
+ */
+.macro GET_PREFETCHED_OPCODE oreg, ireg
+    and     \oreg, \ireg, #255
+.endm
+
+/*
+ * Begin executing the opcode in _reg.  Clobbers reg
+ */
+
+.macro GOTO_OPCODE reg
+    add     \reg, xIBASE, \reg, lsl #${handler_size_bits}
+    br      \reg
+.endm
+.macro GOTO_OPCODE_BASE base,reg
+    add     \reg, \base, \reg, lsl #${handler_size_bits}
+    br      \reg
+.endm
+
+/*
+ * Get/set the 32-bit value from a Dalvik register.
+ */
+.macro GET_VREG reg, vreg
+    ldr     \reg, [xFP, \vreg, uxtw #2]
+.endm
+.macro SET_VREG reg, vreg
+    str     \reg, [xFP, \vreg, uxtw #2]
+    str     wzr, [xREFS, \vreg, uxtw #2]
+.endm
+.macro SET_VREG_OBJECT reg, vreg, tmpreg
+    str     \reg, [xFP, \vreg, uxtw #2]
+    str     \reg, [xREFS, \vreg, uxtw #2]
+.endm
+
+/*
+ * Get/set the 64-bit value from a Dalvik register.
+ * TUNING: can we do better here?
+ */
+.macro GET_VREG_WIDE reg, vreg
+    add     ip2, xFP, \vreg, lsl #2
+    ldr     \reg, [ip2]
+.endm
+.macro SET_VREG_WIDE reg, vreg
+    add     ip2, xFP, \vreg, lsl #2
+    str     \reg, [ip2]
+    add     ip2, xREFS, \vreg, lsl #2
+    str     xzr, [ip2]
+.endm
+
+/*
+ * Convert a virtual register index into an address.
+ */
+.macro VREG_INDEX_TO_ADDR reg, vreg
+    add     \reg, xFP, \vreg, lsl #2   /* WARNING/FIXME: handle shadow frame vreg zero if store */
+.endm
+
+/*
+ * Refresh handler table.
+ */
+.macro REFRESH_IBASE
+  ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
+.endm

diff --git a/runtime/interpreter/mterp/arm64/invoke.S b/runtime/interpreter/mterp/arm64/invoke.S
new file mode 100644
index 0000000..ff1974c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/invoke.S

@@ -0,0 +1,19 @@
+%default { "helper":"UndefinedInvokeHandler" }
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern $helper
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    // and     x3, xINST, 0xFFFF
+    mov     x3, xINST
+    bl      $helper
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+

diff --git a/runtime/interpreter/mterp/arm64/op_add_double.S b/runtime/interpreter/mterp/arm64/op_add_double.S
new file mode 100644
index 0000000..8509f70
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_add_double.S

@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"fadd d0, d1, d2", "result":"d0", "r1":"d1", "r2":"d2"}

diff --git a/runtime/interpreter/mterp/arm64/op_add_double_2addr.S b/runtime/interpreter/mterp/arm64/op_add_double_2addr.S
new file mode 100644
index 0000000..61fd58f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_add_double_2addr.S

@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"fadd     d0, d0, d1", "r0":"d0", "r1":"d1"}

diff --git a/runtime/interpreter/mterp/arm64/op_add_float.S b/runtime/interpreter/mterp/arm64/op_add_float.S
new file mode 100644
index 0000000..7d09fef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_add_float.S

@@ -0,0 +1 @@
+%include "arm64/fbinop.S" {"instr":"fadd   s0, s0, s1"}

diff --git a/runtime/interpreter/mterp/arm64/op_add_float_2addr.S b/runtime/interpreter/mterp/arm64/op_add_float_2addr.S
new file mode 100644
index 0000000..7b378e2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_add_float_2addr.S

@@ -0,0 +1 @@
+%include "arm64/fbinop2addr.S" {"instr":"fadd   s2, s0, s1"}

diff --git a/runtime/interpreter/mterp/arm64/op_add_int.S b/runtime/interpreter/mterp/arm64/op_add_int.S
new file mode 100644
index 0000000..6eadb54
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_add_int.S

@@ -0,0 +1 @@
+%include "arm64/binop.S" {"instr":"add     w0, w0, w1"}

diff --git a/runtime/interpreter/mterp/arm64/op_add_int_2addr.S b/runtime/interpreter/mterp/arm64/op_add_int_2addr.S
new file mode 100644
index 0000000..d35bc8e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_add_int_2addr.S

@@ -0,0 +1 @@
+%include "arm64/binop2addr.S" {"instr":"add     w0, w0, w1"}

diff --git a/runtime/interpreter/mterp/arm64/op_add_int_lit16.S b/runtime/interpreter/mterp/arm64/op_add_int_lit16.S
new file mode 100644
index 0000000..4930ad7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_add_int_lit16.S

@@ -0,0 +1 @@
+%include "arm64/binopLit16.S" {"instr":"add     w0, w0, w1"}

diff --git a/runtime/interpreter/mterp/arm64/op_add_int_lit8.S b/runtime/interpreter/mterp/arm64/op_add_int_lit8.S
new file mode 100644
index 0000000..196ea99
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_add_int_lit8.S

@@ -0,0 +1 @@
+%include "arm64/binopLit8.S" {"instr":"add     w0, w0, w1"}

diff --git a/runtime/interpreter/mterp/arm64/op_add_long.S b/runtime/interpreter/mterp/arm64/op_add_long.S
new file mode 100644
index 0000000..bc334aa
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_add_long.S

@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"add x0, x1, x2"}

diff --git a/runtime/interpreter/mterp/arm64/op_add_long_2addr.S b/runtime/interpreter/mterp/arm64/op_add_long_2addr.S
new file mode 100644
index 0000000..5e5dbce
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_add_long_2addr.S

@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"add     x0, x0, x1"}

diff --git a/runtime/interpreter/mterp/arm64/op_aget.S b/runtime/interpreter/mterp/arm64/op_aget.S
new file mode 100644
index 0000000..662c9cc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aget.S

@@ -0,0 +1,28 @@
+%default { "load":"ldr", "shift":"2", "data_offset":"MIRROR_INT_ARRAY_DATA_OFFSET" }
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     x0, common_errNullObject    // bail if null array object.
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- arrayObj->length
+    add     x0, x0, w1, uxtw #$shift    // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    $load   w2, [x0, #$data_offset]     // w2<- vBB[vCC]
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w2, w9                     // vAA<- w2
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_aget_boolean.S b/runtime/interpreter/mterp/arm64/op_aget_boolean.S
new file mode 100644
index 0000000..6ab6cc1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aget_boolean.S

@@ -0,0 +1 @@
+%include "arm64/op_aget.S" { "load":"ldrb", "shift":"0", "data_offset":"MIRROR_BOOLEAN_ARRAY_DATA_OFFSET" }

diff --git a/runtime/interpreter/mterp/arm64/op_aget_byte.S b/runtime/interpreter/mterp/arm64/op_aget_byte.S
new file mode 100644
index 0000000..c7f5b23
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aget_byte.S

@@ -0,0 +1 @@
+%include "arm64/op_aget.S" { "load":"ldrsb", "shift":"0", "data_offset":"MIRROR_BYTE_ARRAY_DATA_OFFSET" }

diff --git a/runtime/interpreter/mterp/arm64/op_aget_char.S b/runtime/interpreter/mterp/arm64/op_aget_char.S
new file mode 100644
index 0000000..9fddf17
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aget_char.S

@@ -0,0 +1 @@
+%include "arm64/op_aget.S" { "load":"ldrh", "shift":"1", "data_offset":"MIRROR_CHAR_ARRAY_DATA_OFFSET" }

diff --git a/runtime/interpreter/mterp/arm64/op_aget_object.S b/runtime/interpreter/mterp/arm64/op_aget_object.S
new file mode 100644
index 0000000..1bbe3e8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aget_object.S

@@ -0,0 +1,20 @@
+    /*
+     * Array object get.  vAA <- vBB[vCC].
+     *
+     * for: aget-object
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    FETCH_B w3, 1, 1                    // w3<- CC
+    EXPORT_PC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    bl       artAGetObjectFromMterp     // (array, index)
+    ldr      x1, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr      w2, wINST, #8               // w9<- AA
+    PREFETCH_INST 2
+    cbnz     w1, MterpException
+    SET_VREG_OBJECT w0, w2
+    ADVANCE 2
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_aget_short.S b/runtime/interpreter/mterp/arm64/op_aget_short.S
new file mode 100644
index 0000000..39554de
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aget_short.S

@@ -0,0 +1 @@
+%include "arm64/op_aget.S" { "load":"ldrsh", "shift":"1", "data_offset":"MIRROR_SHORT_ARRAY_DATA_OFFSET" }

diff --git a/runtime/interpreter/mterp/arm64/op_aget_wide.S b/runtime/interpreter/mterp/arm64/op_aget_wide.S
new file mode 100644
index 0000000..6f990ba
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aget_wide.S

@@ -0,0 +1,21 @@
+    /*
+     * Array get, 64 bits.  vAA <- vBB[vCC].
+     *
+     */
+    /* aget-wide vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     w0, common_errNullObject        // yes, bail
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- arrayObj->length
+    add     x0, x0, w1, lsl #3          // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    ldr     x2, [x0, #MIRROR_WIDE_ARRAY_DATA_OFFSET]  // x2<- vBB[vCC]
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x2, w4
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_and_int.S b/runtime/interpreter/mterp/arm64/op_and_int.S
new file mode 100644
index 0000000..31f3f73
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_and_int.S

@@ -0,0 +1 @@
+%include "arm64/binop.S" {"instr":"and     w0, w0, w1"}

diff --git a/runtime/interpreter/mterp/arm64/op_and_int_2addr.S b/runtime/interpreter/mterp/arm64/op_and_int_2addr.S
new file mode 100644
index 0000000..e59632c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_and_int_2addr.S

@@ -0,0 +1 @@
+%include "arm64/binop2addr.S" {"instr":"and     w0, w0, w1"}

diff --git a/runtime/interpreter/mterp/arm64/op_and_int_lit16.S b/runtime/interpreter/mterp/arm64/op_and_int_lit16.S
new file mode 100644
index 0000000..6540f81
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_and_int_lit16.S

@@ -0,0 +1 @@
+%include "arm64/binopLit16.S" {"instr":"and     w0, w0, w1"}

diff --git a/runtime/interpreter/mterp/arm64/op_and_int_lit8.S b/runtime/interpreter/mterp/arm64/op_and_int_lit8.S
new file mode 100644
index 0000000..167b40e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_and_int_lit8.S

@@ -0,0 +1 @@
+%include "arm64/binopLit8.S" {"instr":"and     w0, w0, w1"}

diff --git a/runtime/interpreter/mterp/arm64/op_and_long.S b/runtime/interpreter/mterp/arm64/op_and_long.S
new file mode 100644
index 0000000..ede047d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_and_long.S

@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"and x0, x1, x2"}

diff --git a/runtime/interpreter/mterp/arm64/op_and_long_2addr.S b/runtime/interpreter/mterp/arm64/op_and_long_2addr.S
new file mode 100644
index 0000000..d62ccef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_and_long_2addr.S

@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"and     x0, x0, x1"}

diff --git a/runtime/interpreter/mterp/arm64/op_aput.S b/runtime/interpreter/mterp/arm64/op_aput.S
new file mode 100644
index 0000000..175b483
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aput.S

@@ -0,0 +1,28 @@
+%default { "store":"str", "shift":"2", "data_offset":"MIRROR_INT_ARRAY_DATA_OFFSET" }
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     w0, common_errNullObject    // bail if null
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]     // w3<- arrayObj->length
+    add     x0, x0, w1, lsl #$shift     // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_VREG w2, w9                     // w2<- vAA
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    $store  w2, [x0, #$data_offset]     // vBB[vCC]<- w2
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_aput_boolean.S b/runtime/interpreter/mterp/arm64/op_aput_boolean.S
new file mode 100644
index 0000000..5e7a86f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aput_boolean.S

@@ -0,0 +1 @@
+%include "arm64/op_aput.S" { "store":"strb", "shift":"0", "data_offset":"MIRROR_BOOLEAN_ARRAY_DATA_OFFSET" }

diff --git a/runtime/interpreter/mterp/arm64/op_aput_byte.S b/runtime/interpreter/mterp/arm64/op_aput_byte.S
new file mode 100644
index 0000000..d659ebc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aput_byte.S

@@ -0,0 +1 @@
+%include "arm64/op_aput.S" { "store":"strb", "shift":"0", "data_offset":"MIRROR_BYTE_ARRAY_DATA_OFFSET" }

diff --git a/runtime/interpreter/mterp/arm64/op_aput_char.S b/runtime/interpreter/mterp/arm64/op_aput_char.S
new file mode 100644
index 0000000..7547c80
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aput_char.S

@@ -0,0 +1 @@
+%include "arm64/op_aput.S" { "store":"strh", "shift":"1", "data_offset":"MIRROR_CHAR_ARRAY_DATA_OFFSET" }

diff --git a/runtime/interpreter/mterp/arm64/op_aput_object.S b/runtime/interpreter/mterp/arm64/op_aput_object.S
new file mode 100644
index 0000000..0146fdc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aput_object.S

@@ -0,0 +1,13 @@
+    /*
+     * Store an object into an array.  vBB[vCC] <- vAA.
+     */
+    /* op vAA, vBB, vCC */
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     w2, wINST
+    bl      MterpAputObject
+    cbz     w0, MterpPossibleException
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_aput_short.S b/runtime/interpreter/mterp/arm64/op_aput_short.S
new file mode 100644
index 0000000..8631e28
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aput_short.S

@@ -0,0 +1 @@
+%include "arm64/op_aput.S" { "store":"strh", "shift":"1", "data_offset":"MIRROR_SHORT_ARRAY_DATA_OFFSET" }

diff --git a/runtime/interpreter/mterp/arm64/op_aput_wide.S b/runtime/interpreter/mterp/arm64/op_aput_wide.S
new file mode 100644
index 0000000..e1cf9c1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_aput_wide.S

@@ -0,0 +1,21 @@
+    /*
+     * Array put, 64 bits.  vBB[vCC] <- vAA.
+     *
+     */
+    /* aput-wide vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     w0, common_errNullObject    // bail if null
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- arrayObj->length
+    add     x0, x0, w1, lsl #3          // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    GET_VREG_WIDE x1, w4
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    str     x1, [x0, #MIRROR_WIDE_ARRAY_DATA_OFFSET]
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_array_length.S b/runtime/interpreter/mterp/arm64/op_array_length.S
new file mode 100644
index 0000000..0cce917
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_array_length.S

@@ -0,0 +1,12 @@
+    /*
+     * Return the length of an array.
+     */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG w0, w1                     // w0<- vB (object ref)
+    cbz     w0, common_errNullObject    // yup, fail
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- array length
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w3, w2                     // vB<- length
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_check_cast.S b/runtime/interpreter/mterp/arm64/op_check_cast.S
new file mode 100644
index 0000000..cb9f606
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_check_cast.S

@@ -0,0 +1,16 @@
+    /*
+     * Check to see if a cast from one class to another is allowed.
+     */
+    /* check-cast vAA, class//BBBB */
+    EXPORT_PC
+    FETCH    w0, 1                      // w0<- BBBB
+    lsr      w1, wINST, #8              // w1<- AA
+    VREG_INDEX_TO_ADDR x1, w1           // w1<- &object
+    ldr      x2, [xFP, #OFF_FP_METHOD]  // w2<- method
+    mov      x3, xSELF                  // w3<- self
+    bl       MterpCheckCast             // (index, &obj, method, self)
+    PREFETCH_INST 2
+    cbnz     w0, MterpPossibleException
+    ADVANCE  2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_cmp_long.S b/runtime/interpreter/mterp/arm64/op_cmp_long.S
new file mode 100644
index 0000000..982e5b1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_cmp_long.S

@@ -0,0 +1,13 @@
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG_WIDE x1, w2
+    GET_VREG_WIDE x2, w3
+    cmp     x1, x2
+    csinc   w0, wzr, wzr, eq
+    csneg   w0, w0, w0, ge
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    SET_VREG w0, w4
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_cmpg_double.S b/runtime/interpreter/mterp/arm64/op_cmpg_double.S
new file mode 100644
index 0000000..14f9ff8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_cmpg_double.S

@@ -0,0 +1 @@
+%include "arm64/fcmp.S" {"wide":"_WIDE", "r1":"d1", "r2":"d2", "default_val":"1", "cond":"pl"}

diff --git a/runtime/interpreter/mterp/arm64/op_cmpg_float.S b/runtime/interpreter/mterp/arm64/op_cmpg_float.S
new file mode 100644
index 0000000..3a20cba
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_cmpg_float.S

@@ -0,0 +1 @@
+%include "arm64/fcmp.S" {"wide":"", "r1":"s1", "r2":"s2", "default_val":"1", "cond":"pl"}

diff --git a/runtime/interpreter/mterp/arm64/op_cmpl_double.S b/runtime/interpreter/mterp/arm64/op_cmpl_double.S
new file mode 100644
index 0000000..06d5917
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_cmpl_double.S

@@ -0,0 +1 @@
+%include "arm64/fcmp.S" {"wide":"_WIDE", "r1":"d1", "r2":"d2", "default_val":"-1", "cond":"le"}

diff --git a/runtime/interpreter/mterp/arm64/op_cmpl_float.S b/runtime/interpreter/mterp/arm64/op_cmpl_float.S
new file mode 100644
index 0000000..d87d086
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_cmpl_float.S

@@ -0,0 +1 @@
+%include "arm64/fcmp.S" {"wide":"", "r1":"s1", "r2":"s2", "default_val":"-1", "cond":"le"}

diff --git a/runtime/interpreter/mterp/arm64/op_const.S b/runtime/interpreter/mterp/arm64/op_const.S
new file mode 100644
index 0000000..031ede1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const.S

@@ -0,0 +1,9 @@
+    /* const vAA, #+BBBBbbbb */
+    lsr     w3, wINST, #8               // w3<- AA
+    FETCH w0, 1                         // w0<- bbbb (low
+    FETCH w1, 2                         // w1<- BBBB (high
+    FETCH_ADVANCE_INST 3                // advance rPC, load wINST
+    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG w0, w3                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_const_16.S b/runtime/interpreter/mterp/arm64/op_const_16.S
new file mode 100644
index 0000000..27f5273
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const_16.S

@@ -0,0 +1,7 @@
+    /* const/16 vAA, #+BBBB */
+    FETCH_S w0, 1                       // w0<- ssssBBBB (sign-extended
+    lsr     w3, wINST, #8               // w3<- AA
+    FETCH_ADVANCE_INST 2                // advance xPC, load wINST
+    SET_VREG w0, w3                     // vAA<- w0
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_const_4.S b/runtime/interpreter/mterp/arm64/op_const_4.S
new file mode 100644
index 0000000..04cd4f8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const_4.S

@@ -0,0 +1,8 @@
+    /* const/4 vA, #+B */
+    lsl     w1, wINST, #16              // w1<- Bxxx0000
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    FETCH_ADVANCE_INST 1                // advance xPC, load wINST
+    asr     w1, w1, #28                 // w1<- sssssssB (sign-extended)
+    GET_INST_OPCODE ip                  // ip<- opcode from xINST
+    SET_VREG w1, w0                     // fp[A]<- w1
+    GOTO_OPCODE ip                      // execute next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_const_class.S b/runtime/interpreter/mterp/arm64/op_const_class.S
new file mode 100644
index 0000000..971cfa0
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const_class.S

@@ -0,0 +1,12 @@
+    /* const/class vAA, Class//BBBB */
+    EXPORT_PC
+    FETCH   w0, 1                       // w0<- BBBB
+    lsr     w1, wINST, #8               // w1<- AA
+    add     x2, xFP, #OFF_FP_SHADOWFRAME
+    mov     x3, xSELF
+    bl      MterpConstClass             // (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 2
+    cbnz    w0, MterpPossibleException
+    ADVANCE 2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_const_high16.S b/runtime/interpreter/mterp/arm64/op_const_high16.S
new file mode 100644
index 0000000..dd51ce1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const_high16.S

@@ -0,0 +1,8 @@
+    /* const/high16 vAA, #+BBBB0000 */
+    FETCH   w0, 1                       // r0<- 0000BBBB (zero-extended
+    lsr     w3, wINST, #8               // r3<- AA
+    lsl     w0, w0, #16                 // r0<- BBBB0000
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    SET_VREG w0, w3                     // vAA<- r0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_const_string.S b/runtime/interpreter/mterp/arm64/op_const_string.S
new file mode 100644
index 0000000..896f1e7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const_string.S

@@ -0,0 +1,12 @@
+    /* const/string vAA, String//BBBB */
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- BBBB
+    lsr     w1, wINST, #8               // w1<- AA
+    add     x2, xFP, #OFF_FP_SHADOWFRAME
+    mov     x3, xSELF
+    bl      MterpConstString            // (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 2                     // load rINST
+    cbnz    w0, MterpPossibleException  // let reference interpreter deal with it.
+    ADVANCE 2                           // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_const_string_jumbo.S b/runtime/interpreter/mterp/arm64/op_const_string_jumbo.S
new file mode 100644
index 0000000..e1a7339
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const_string_jumbo.S

@@ -0,0 +1,14 @@
+    /* const/string vAA, String//BBBBBBBB */
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- bbbb (low
+    FETCH w2, 2                         // w2<- BBBB (high
+    lsr     w1, wINST, #8               // w1<- AA
+    orr     w0, w0, w2, lsl #16         // w1<- BBBBbbbb
+    add     x2, xFP, #OFF_FP_SHADOWFRAME
+    mov     x3, xSELF
+    bl      MterpConstString            // (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 3                     // advance rPC
+    cbnz    w0, MterpPossibleException      // let reference interpreter deal with it.
+    ADVANCE 3                           // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_const_wide.S b/runtime/interpreter/mterp/arm64/op_const_wide.S
new file mode 100644
index 0000000..8f57dda
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const_wide.S

@@ -0,0 +1,13 @@
+    /* const-wide vAA, #+HHHHhhhhBBBBbbbb */
+    FETCH w0, 1                         // w0<- bbbb (low)
+    FETCH w1, 2                         // w1<- BBBB (low middle)
+    FETCH w2, 3                         // w2<- hhhh (high middle)
+    FETCH w3, 4                         // w3<- HHHH (high)
+    lsr     w4, wINST, #8               // r4<- AA
+    FETCH_ADVANCE_INST 5                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    orr     w0, w0, w1, lsl #16         // w0<-         BBBBbbbb
+    orr     x0, x0, x2, lsl #32         // w0<-     hhhhBBBBbbbb
+    orr     x0, x0, x3, lsl #48         // w0<- HHHHhhhhBBBBbbbb
+    SET_VREG_WIDE x0, w4
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_const_wide_16.S b/runtime/interpreter/mterp/arm64/op_const_wide_16.S
new file mode 100644
index 0000000..e43628b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const_wide_16.S

@@ -0,0 +1,8 @@
+    /* const-wide/16 vAA, #+BBBB */
+    FETCH_S w0, 1                       // w0<- ssssBBBB (sign-extended
+    lsr     w3, wINST, #8               // w3<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    sbfm    x0, x0, 0, 31
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w3
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_const_wide_32.S b/runtime/interpreter/mterp/arm64/op_const_wide_32.S
new file mode 100644
index 0000000..527f7d8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const_wide_32.S

@@ -0,0 +1,10 @@
+    /* const-wide/32 vAA, #+BBBBbbbb */
+    FETCH w0, 1                         // w0<- 0000bbbb (low)
+    lsr     w3, wINST, #8               // w3<- AA
+    FETCH_S w2, 2                       // w2<- ssssBBBB (high)
+    FETCH_ADVANCE_INST 3                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    orr     w0, w0, w2, lsl #16         // w0<- BBBBbbbb
+    sbfm    x0, x0, 0, 31
+    SET_VREG_WIDE x0, w3
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_const_wide_high16.S b/runtime/interpreter/mterp/arm64/op_const_wide_high16.S
new file mode 100644
index 0000000..94ab987
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_const_wide_high16.S

@@ -0,0 +1,8 @@
+    /* const-wide/high16 vAA, #+BBBB000000000000 */
+    FETCH w0, 1                         // w0<- 0000BBBB (zero-extended)
+    lsr     w1, wINST, #8               // w1<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    lsl     x0, x0, #48
+    SET_VREG_WIDE x0, w1
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_div_double.S b/runtime/interpreter/mterp/arm64/op_div_double.S
new file mode 100644
index 0000000..1f7dad0
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_div_double.S

@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"fdiv d0, d1, d2", "result":"d0", "r1":"d1", "r2":"d2"}

diff --git a/runtime/interpreter/mterp/arm64/op_div_double_2addr.S b/runtime/interpreter/mterp/arm64/op_div_double_2addr.S
new file mode 100644
index 0000000..414a175
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_div_double_2addr.S

@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"fdiv     d0, d0, d1", "r0":"d0", "r1":"d1"}

diff --git a/runtime/interpreter/mterp/arm64/op_div_float.S b/runtime/interpreter/mterp/arm64/op_div_float.S
new file mode 100644
index 0000000..f24a26c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_div_float.S

@@ -0,0 +1 @@
+%include "arm64/fbinop.S" {"instr":"fdiv   s0, s0, s1"}

diff --git a/runtime/interpreter/mterp/arm64/op_div_float_2addr.S b/runtime/interpreter/mterp/arm64/op_div_float_2addr.S
new file mode 100644
index 0000000..2888049
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_div_float_2addr.S

@@ -0,0 +1 @@
+%include "arm64/fbinop2addr.S" {"instr":"fdiv   s2, s0, s1"}

diff --git a/runtime/interpreter/mterp/arm64/op_div_int.S b/runtime/interpreter/mterp/arm64/op_div_int.S
new file mode 100644
index 0000000..88371c0
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_div_int.S

@@ -0,0 +1 @@
+%include "arm64/binop.S" {"instr":"sdiv     w0, w0, w1", "chkzero":"1"}

diff --git a/runtime/interpreter/mterp/arm64/op_div_int_2addr.S b/runtime/interpreter/mterp/arm64/op_div_int_2addr.S
new file mode 100644
index 0000000..5f5a80f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_div_int_2addr.S

@@ -0,0 +1 @@
+%include "arm64/binop2addr.S" {"instr":"sdiv     w0, w0, w1", "chkzero":"1"}

diff --git a/runtime/interpreter/mterp/arm64/op_div_int_lit16.S b/runtime/interpreter/mterp/arm64/op_div_int_lit16.S
new file mode 100644
index 0000000..dc7a484
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_div_int_lit16.S

@@ -0,0 +1 @@
+%include "arm64/binopLit16.S" {"instr":"sdiv w0, w0, w1", "chkzero":"1"}

diff --git a/runtime/interpreter/mterp/arm64/op_div_int_lit8.S b/runtime/interpreter/mterp/arm64/op_div_int_lit8.S
new file mode 100644
index 0000000..c06521c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_div_int_lit8.S

@@ -0,0 +1 @@
+%include "arm64/binopLit8.S" {"instr":"sdiv     w0, w0, w1", "chkzero":"1"}

diff --git a/runtime/interpreter/mterp/arm64/op_div_long.S b/runtime/interpreter/mterp/arm64/op_div_long.S
new file mode 100644
index 0000000..820ae3d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_div_long.S

@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"sdiv x0, x1, x2", "chkzero":"1"}

diff --git a/runtime/interpreter/mterp/arm64/op_div_long_2addr.S b/runtime/interpreter/mterp/arm64/op_div_long_2addr.S
new file mode 100644
index 0000000..da7eabd
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_div_long_2addr.S

@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"sdiv     x0, x0, x1", "chkzero":"1"}

diff --git a/runtime/interpreter/mterp/arm64/op_double_to_float.S b/runtime/interpreter/mterp/arm64/op_double_to_float.S
new file mode 100644
index 0000000..c1555fd
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_double_to_float.S

@@ -0,0 +1 @@
+%include "arm64/funopNarrower.S" {"instr":"fcvt s0, d0", "srcreg":"d0", "tgtreg":"s0"}

diff --git a/runtime/interpreter/mterp/arm64/op_double_to_int.S b/runtime/interpreter/mterp/arm64/op_double_to_int.S
new file mode 100644
index 0000000..7244bac
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_double_to_int.S

@@ -0,0 +1 @@
+%include "arm64/funopNarrower.S" {"instr":"fcvtzs w0, d0", "srcreg":"d0", "tgtreg":"w0"}

diff --git a/runtime/interpreter/mterp/arm64/op_double_to_long.S b/runtime/interpreter/mterp/arm64/op_double_to_long.S
new file mode 100644
index 0000000..741160b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_double_to_long.S

@@ -0,0 +1 @@
+%include "arm64/funopWide.S" {"instr":"fcvtzs x0, d0", "srcreg":"d0", "tgtreg":"x0"}

diff --git a/runtime/interpreter/mterp/arm64/op_fill_array_data.S b/runtime/interpreter/mterp/arm64/op_fill_array_data.S
new file mode 100644
index 0000000..f50d9e4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_fill_array_data.S

@@ -0,0 +1,13 @@
+    /* fill-array-data vAA, +BBBBBBBB */
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- bbbb (lo)
+    FETCH w1, 2                         // w1<- BBBB (hi)
+    lsr     w3, wINST, #8               // w3<- AA
+    orr     w1, w0, w1, lsl #16         // w1<- BBBBbbbb
+    GET_VREG w0, w3                     // w0<- vAA (array object)
+    add     x1, xPC, w1, lsl #1         // w1<- PC + BBBBbbbb*2 (array data off.)
+    bl      MterpFillArrayData          // (obj, payload)
+    cbz     w0, MterpPossibleException      // exception?
+    FETCH_ADVANCE_INST 3                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_filled_new_array.S b/runtime/interpreter/mterp/arm64/op_filled_new_array.S
new file mode 100644
index 0000000..806a1b1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_filled_new_array.S

@@ -0,0 +1,18 @@
+%default { "helper":"MterpFilledNewArray" }
+    /*
+     * Create a new array with elements filled from registers.
+     *
+     * for: filled-new-array, filled-new-array/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class//CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type//BBBB */
+    .extern $helper
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     x2, xSELF
+    bl      $helper
+    cbz     w0, MterpPossibleException
+    FETCH_ADVANCE_INST 3                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_filled_new_array_range.S b/runtime/interpreter/mterp/arm64/op_filled_new_array_range.S
new file mode 100644
index 0000000..3c9a419
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_filled_new_array_range.S

@@ -0,0 +1 @@
+%include "arm64/op_filled_new_array.S" { "helper":"MterpFilledNewArrayRange" }

diff --git a/runtime/interpreter/mterp/arm64/op_float_to_double.S b/runtime/interpreter/mterp/arm64/op_float_to_double.S
new file mode 100644
index 0000000..892feca
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_float_to_double.S

@@ -0,0 +1 @@
+%include "arm64/funopWider.S" {"instr":"fcvt  d0, s0", "srcreg":"s0", "tgtreg":"d0"}

diff --git a/runtime/interpreter/mterp/arm64/op_float_to_int.S b/runtime/interpreter/mterp/arm64/op_float_to_int.S
new file mode 100644
index 0000000..c849d81
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_float_to_int.S

@@ -0,0 +1 @@
+%include "arm64/funopNarrow.S" {"instr":"fcvtzs w0, s0", "srcreg":"s0", "tgtreg":"w0"}

diff --git a/runtime/interpreter/mterp/arm64/op_float_to_long.S b/runtime/interpreter/mterp/arm64/op_float_to_long.S
new file mode 100644
index 0000000..c3de16f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_float_to_long.S

@@ -0,0 +1 @@
+%include "arm64/funopWider.S" {"instr":"fcvtzs x0, s0", "srcreg":"s0", "tgtreg":"x0"}

diff --git a/runtime/interpreter/mterp/arm64/op_goto.S b/runtime/interpreter/mterp/arm64/op_goto.S
new file mode 100644
index 0000000..803bade
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_goto.S

@@ -0,0 +1,35 @@
+    /*
+     * Unconditional branch, 8-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     */
+    /* goto +AA */
+    /* tuning: use sbfx for 6t2+ targets */
+#if MTERP_SUSPEND
+    mov     w0, wINST, lsl #16          // w0<- AAxx0000
+    movs    w1, w0, asr #24             // w1<- ssssssAA (sign-extended)
+    add     w2, w1, w1                  // w2<- byte offset, set flags
+       // If backwards branch refresh rIBASE
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    lsl     w0, wINST, #16              // w0<- AAxx0000
+    asr     wINST, w0, #24              // wINST<- ssssssAA (sign-extended)
+#if MTERP_PROFILE_BRANCHES
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]  // Preload flags for MterpCheckSuspendAndContinue
+    adds    w1, wINST, wINST            // Convert dalvik offset to byte offset, setting flags
+    FETCH_ADVANCE_INST_RB w1            // load wINST and advance xPC
+       // If backwards branch refresh rIBASE
+    b.mi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif

diff --git a/runtime/interpreter/mterp/arm64/op_goto_16.S b/runtime/interpreter/mterp/arm64/op_goto_16.S
new file mode 100644
index 0000000..ad26b36
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_goto_16.S

@@ -0,0 +1,30 @@
+    /*
+     * Unconditional branch, 16-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     */
+    /* goto/16 +AAAA */
+#if MTERP_SUSPEND
+    FETCH_S w0, 1                       // w0<- ssssAAAA (sign-extended)
+    adds    w1, w0, w0                  // w1<- byte offset, flags set
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load rINST
+    ldrmi   xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    FETCH_S wINST, 1                    // wINST<- ssssAAAA (sign-extended)
+#if MTERP_PROFILE_BRANCHES
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w1, wINST, wINST            // w1<- byte offset, flags set
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load rINST
+    b.mi    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif

diff --git a/runtime/interpreter/mterp/arm64/op_goto_32.S b/runtime/interpreter/mterp/arm64/op_goto_32.S
new file mode 100644
index 0000000..3f040e6
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_goto_32.S

@@ -0,0 +1,39 @@
+    /*
+     * Unconditional branch, 32-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     *
+     * Unlike most opcodes, this one is allowed to branch to itself, so
+     * our "backward branch" test must be "<=0" instead of "<0".  Because
+     * we need the V bit set, we'll use an adds to convert from Dalvik
+     * offset to byte offset.
+     */
+    /* goto/32 +AAAAAAAA */
+#if MTERP_SUSPEND
+    FETCH w0, 1                         // w0<- aaaa (lo)
+    FETCH w1, 2                         // w1<- AAAA (hi)
+    orr     w0, w0, w1, lsl #16         // w0<- AAAAaaaa
+    adds    w1, w0, w0                  // w1<- byte offset
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load xINST
+    ldrle   xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
+    GET_INST_OPCODE ip                  // extract opcode from xINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    FETCH w0, 1                         // w0<- aaaa (lo)
+    FETCH w1, 2                         // w1<- AAAA (hi)
+    orr     wINST, w0, w1, lsl #16      // wINST<- AAAAaaaa
+#if MTERP_PROFILE_BRANCHES
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w1, wINST, wINST            // w1<- byte offset
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load xINST
+    b.le    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from xINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif

diff --git a/runtime/interpreter/mterp/arm64/op_if_eq.S b/runtime/interpreter/mterp/arm64/op_if_eq.S
new file mode 100644
index 0000000..aa4a0f1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_eq.S

@@ -0,0 +1 @@
+%include "arm64/bincmp.S" { "condition":"eq" }

diff --git a/runtime/interpreter/mterp/arm64/op_if_eqz.S b/runtime/interpreter/mterp/arm64/op_if_eqz.S
new file mode 100644
index 0000000..1d3202e1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_eqz.S

@@ -0,0 +1 @@
+%include "arm64/zcmp.S" { "condition":"eq" }

diff --git a/runtime/interpreter/mterp/arm64/op_if_ge.S b/runtime/interpreter/mterp/arm64/op_if_ge.S
new file mode 100644
index 0000000..d6ec761
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_ge.S

@@ -0,0 +1 @@
+%include "arm64/bincmp.S" { "condition":"ge" }

diff --git a/runtime/interpreter/mterp/arm64/op_if_gez.S b/runtime/interpreter/mterp/arm64/op_if_gez.S
new file mode 100644
index 0000000..8e3abd3
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_gez.S

@@ -0,0 +1 @@
+%include "arm64/zcmp.S" { "condition":"ge" }

diff --git a/runtime/interpreter/mterp/arm64/op_if_gt.S b/runtime/interpreter/mterp/arm64/op_if_gt.S
new file mode 100644
index 0000000..7db8e9d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_gt.S

@@ -0,0 +1 @@
+%include "arm64/bincmp.S" { "condition":"gt" }

diff --git a/runtime/interpreter/mterp/arm64/op_if_gtz.S b/runtime/interpreter/mterp/arm64/op_if_gtz.S
new file mode 100644
index 0000000..a4f2f6b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_gtz.S

@@ -0,0 +1 @@
+%include "arm64/zcmp.S" { "condition":"gt" }

diff --git a/runtime/interpreter/mterp/arm64/op_if_le.S b/runtime/interpreter/mterp/arm64/op_if_le.S
new file mode 100644
index 0000000..ca3a83f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_le.S

@@ -0,0 +1 @@
+%include "arm64/bincmp.S" { "condition":"le" }

diff --git a/runtime/interpreter/mterp/arm64/op_if_lez.S b/runtime/interpreter/mterp/arm64/op_if_lez.S
new file mode 100644
index 0000000..c1425fdd
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_lez.S

@@ -0,0 +1 @@
+%include "arm64/zcmp.S" { "condition":"le" }

diff --git a/runtime/interpreter/mterp/arm64/op_if_lt.S b/runtime/interpreter/mterp/arm64/op_if_lt.S
new file mode 100644
index 0000000..56450a1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_lt.S

@@ -0,0 +1 @@
+%include "arm64/bincmp.S" { "condition":"lt" }

diff --git a/runtime/interpreter/mterp/arm64/op_if_ltz.S b/runtime/interpreter/mterp/arm64/op_if_ltz.S
new file mode 100644
index 0000000..03cd3d6
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_ltz.S

@@ -0,0 +1 @@
+%include "arm64/zcmp.S" { "condition":"lt" }

diff --git a/runtime/interpreter/mterp/arm64/op_if_ne.S b/runtime/interpreter/mterp/arm64/op_if_ne.S
new file mode 100644
index 0000000..14d9e13
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_ne.S

@@ -0,0 +1 @@
+%include "arm64/bincmp.S" { "condition":"ne" }

diff --git a/runtime/interpreter/mterp/arm64/op_if_nez.S b/runtime/interpreter/mterp/arm64/op_if_nez.S
new file mode 100644
index 0000000..21e1bc2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_if_nez.S

@@ -0,0 +1 @@
+%include "arm64/zcmp.S" { "condition":"ne" }

diff --git a/runtime/interpreter/mterp/arm64/op_iget.S b/runtime/interpreter/mterp/arm64/op_iget.S
new file mode 100644
index 0000000..165c730
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget.S

@@ -0,0 +1,25 @@
+%default { "is_object":"0", "helper":"artGet32InstanceFromCode"}
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    w0, 1                         // w0<- field ref CCCC
+    lsr      w1, wINST, #12                // w1<- B
+    GET_VREG w1, w1                        // w1<- fp[B], the object pointer
+    ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
+    mov      x3, xSELF                     // w3<- self
+    bl       $helper
+    ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     w2, wINST, #8, #4             // w2<- A
+    PREFETCH_INST 2
+    cbnz     x3, MterpPossibleException    // bail out
+    .if $is_object
+    SET_VREG_OBJECT w0, w2                 // fp[A]<- w0
+    .else
+    SET_VREG w0, w2                        // fp[A]<- w0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     // extract opcode from rINST
+    GOTO_OPCODE ip                         // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_iget_boolean.S b/runtime/interpreter/mterp/arm64/op_iget_boolean.S
new file mode 100644
index 0000000..36a9b6b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_boolean.S

@@ -0,0 +1 @@
+%include "arm64/op_iget.S" { "helper":"artGetBooleanInstanceFromCode", "extend":"uxtb w0, w0" }

diff --git a/runtime/interpreter/mterp/arm64/op_iget_boolean_quick.S b/runtime/interpreter/mterp/arm64/op_iget_boolean_quick.S
new file mode 100644
index 0000000..2ceccb9
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_boolean_quick.S

@@ -0,0 +1 @@
+%include "arm64/op_iget_quick.S" { "load":"ldrb" }

diff --git a/runtime/interpreter/mterp/arm64/op_iget_byte.S b/runtime/interpreter/mterp/arm64/op_iget_byte.S
new file mode 100644
index 0000000..fd3f164
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_byte.S

@@ -0,0 +1 @@
+%include "arm64/op_iget.S" { "helper":"artGetByteInstanceFromCode", "extend":"sxtb w0, w0" }

diff --git a/runtime/interpreter/mterp/arm64/op_iget_byte_quick.S b/runtime/interpreter/mterp/arm64/op_iget_byte_quick.S
new file mode 100644
index 0000000..6e97b72
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_byte_quick.S

@@ -0,0 +1 @@
+%include "arm64/op_iget_quick.S" { "load":"ldrsb" }

diff --git a/runtime/interpreter/mterp/arm64/op_iget_char.S b/runtime/interpreter/mterp/arm64/op_iget_char.S
new file mode 100644
index 0000000..ea23275
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_char.S

@@ -0,0 +1 @@
+%include "arm64/op_iget.S" { "helper":"artGetCharInstanceFromCode", "extend":"uxth w0, w0" }

diff --git a/runtime/interpreter/mterp/arm64/op_iget_char_quick.S b/runtime/interpreter/mterp/arm64/op_iget_char_quick.S
new file mode 100644
index 0000000..325dd1c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_char_quick.S

@@ -0,0 +1 @@
+%include "arm64/op_iget_quick.S" { "load":"ldrh" }

diff --git a/runtime/interpreter/mterp/arm64/op_iget_object.S b/runtime/interpreter/mterp/arm64/op_iget_object.S
new file mode 100644
index 0000000..03be78d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_object.S

@@ -0,0 +1 @@
+%include "arm64/op_iget.S" { "is_object":"1", "helper":"artGetObjInstanceFromCode" }

diff --git a/runtime/interpreter/mterp/arm64/op_iget_object_quick.S b/runtime/interpreter/mterp/arm64/op_iget_object_quick.S
new file mode 100644
index 0000000..e9a797d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_object_quick.S

@@ -0,0 +1,15 @@
+    /* For: iget-object-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    EXPORT_PC
+    GET_VREG w0, w2                     // w0<- object we're operating on
+    bl      artIGetObjectFromMterp      // (obj, offset)
+    ldr     x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    PREFETCH_INST 2
+    cbnz    w3, MterpPossibleException      // bail out
+    SET_VREG_OBJECT w0, w2              // fp[A]<- w0
+    ADVANCE 2                           // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_iget_quick.S b/runtime/interpreter/mterp/arm64/op_iget_quick.S
new file mode 100644
index 0000000..45c68a3
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_quick.S

@@ -0,0 +1,15 @@
+%default { "load":"ldr", "extend":"" }
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- object we're operating on
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cmp     x3, #0                      // check object for null
+    beq     common_errNullObject        // object was null
+    $load   w0, [x3, x1]                // w0<- obj.field
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    $extend
+    SET_VREG w0, w2                     // fp[A]<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_iget_short.S b/runtime/interpreter/mterp/arm64/op_iget_short.S
new file mode 100644
index 0000000..c347542
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_short.S

@@ -0,0 +1 @@
+%include "arm64/op_iget.S" { "helper":"artGetShortInstanceFromCode", "extend":"sxth w0, w0" }

diff --git a/runtime/interpreter/mterp/arm64/op_iget_short_quick.S b/runtime/interpreter/mterp/arm64/op_iget_short_quick.S
new file mode 100644
index 0000000..8367070
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_short_quick.S

@@ -0,0 +1 @@
+%include "arm64/op_iget_quick.S" { "load":"ldrsh" }

diff --git a/runtime/interpreter/mterp/arm64/op_iget_wide.S b/runtime/interpreter/mterp/arm64/op_iget_wide.S
new file mode 100644
index 0000000..9718390
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_wide.S

@@ -0,0 +1,21 @@
+    /*
+     * 64-bit instance field get.
+     *
+     * for: iget-wide
+     */
+    EXPORT_PC
+    FETCH    w0, 1                         // w0<- field ref CCCC
+    lsr      w1, wINST, #12                // w1<- B
+    GET_VREG w1, w1                        // w1<- fp[B], the object pointer
+    ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
+    mov      x3, xSELF                     // w3<- self
+    bl       artGet64InstanceFromCode
+    ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     w2, wINST, #8, #4             // w2<- A
+    PREFETCH_INST 2
+    cmp      w3, #0
+    cbnz     w3, MterpException            // bail out
+    SET_VREG_WIDE x0, w2
+    ADVANCE 2
+    GET_INST_OPCODE ip                     // extract opcode from wINST
+    GOTO_OPCODE ip                         // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S b/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S
new file mode 100644
index 0000000..2480d2d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S

@@ -0,0 +1,12 @@
+    /* iget-wide-quick vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w4, 1                         // w4<- field byte offset
+    GET_VREG w3, w2                     // w3<- object we're operating on
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cbz     w3, common_errNullObject        // object was null
+    add     x4, x3, x4                  // create direct pointer
+    ldr     x0, [x4]
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    SET_VREG_WIDE x0, w2
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_instance_of.S b/runtime/interpreter/mterp/arm64/op_instance_of.S
new file mode 100644
index 0000000..647bc75
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_instance_of.S

@@ -0,0 +1,23 @@
+    /*
+     * Check to see if an object reference is an instance of a class.
+     *
+     * Most common situation is a non-null object, being compared against
+     * an already-resolved class.
+     */
+    /* instance-of vA, vB, class//CCCC */
+    EXPORT_PC
+    FETCH     w0, 1                     // w0<- CCCC
+    lsr       w1, wINST, #12            // w1<- B
+    VREG_INDEX_TO_ADDR x1, w1           // w1<- &object
+    ldr       x2, [xFP, #OFF_FP_METHOD] // w2<- method
+    mov       x3, xSELF                 // w3<- self
+    bl        MterpInstanceOf           // (index, &obj, method, self)
+    ldr       x1, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr       w2, wINST, #8             // w2<- A+
+    and       w2, w2, #15               // w2<- A
+    PREFETCH_INST 2
+    cbnz      x1, MterpException
+    ADVANCE 2                           // advance rPC
+    SET_VREG w0, w2                     // vA<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_int_to_byte.S b/runtime/interpreter/mterp/arm64/op_int_to_byte.S
new file mode 100644
index 0000000..43f8148
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_int_to_byte.S

@@ -0,0 +1 @@
+%include "arm64/unop.S" {"instr":"sxtb    w0, w0"}

diff --git a/runtime/interpreter/mterp/arm64/op_int_to_char.S b/runtime/interpreter/mterp/arm64/op_int_to_char.S
new file mode 100644
index 0000000..f092170
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_int_to_char.S

@@ -0,0 +1 @@
+%include "arm64/unop.S" {"instr":"uxth    w0, w0"}

diff --git a/runtime/interpreter/mterp/arm64/op_int_to_double.S b/runtime/interpreter/mterp/arm64/op_int_to_double.S
new file mode 100644
index 0000000..3dee75a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_int_to_double.S

@@ -0,0 +1 @@
+%include "arm64/funopWider.S" {"instr":"scvtf d0, w0", "srcreg":"w0", "tgtreg":"d0"}

diff --git a/runtime/interpreter/mterp/arm64/op_int_to_float.S b/runtime/interpreter/mterp/arm64/op_int_to_float.S
new file mode 100644
index 0000000..3ebbdc7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_int_to_float.S

@@ -0,0 +1 @@
+%include "arm64/funopNarrow.S" {"instr":"scvtf s0, w0", "srcreg":"w0", "tgtreg":"s0"}

diff --git a/runtime/interpreter/mterp/arm64/op_int_to_long.S b/runtime/interpreter/mterp/arm64/op_int_to_long.S
new file mode 100644
index 0000000..13d2120
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_int_to_long.S

@@ -0,0 +1 @@
+%include "arm64/funopWider.S" {"instr":"sbfm x0, x0, 0, 31", "srcreg":"w0", "tgtreg":"x0"}

diff --git a/runtime/interpreter/mterp/arm64/op_int_to_short.S b/runtime/interpreter/mterp/arm64/op_int_to_short.S
new file mode 100644
index 0000000..87fb804
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_int_to_short.S

@@ -0,0 +1 @@
+%include "arm64/unop.S" {"instr":"sxth    w0, w0"}

diff --git a/runtime/interpreter/mterp/arm64/op_invoke_direct.S b/runtime/interpreter/mterp/arm64/op_invoke_direct.S
new file mode 100644
index 0000000..c117232
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_direct.S

@@ -0,0 +1 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeDirect" }

diff --git a/runtime/interpreter/mterp/arm64/op_invoke_direct_range.S b/runtime/interpreter/mterp/arm64/op_invoke_direct_range.S
new file mode 100644
index 0000000..efc54c7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_direct_range.S

@@ -0,0 +1 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeDirectRange" }

diff --git a/runtime/interpreter/mterp/arm64/op_invoke_interface.S b/runtime/interpreter/mterp/arm64/op_invoke_interface.S
new file mode 100644
index 0000000..12dfa59
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_interface.S

@@ -0,0 +1,8 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeInterface" }
+    /*
+     * Handle an interface method call.
+     *
+     * for: invoke-interface, invoke-interface/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */

diff --git a/runtime/interpreter/mterp/arm64/op_invoke_interface_range.S b/runtime/interpreter/mterp/arm64/op_invoke_interface_range.S
new file mode 100644
index 0000000..61caaf4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_interface_range.S

@@ -0,0 +1 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeInterfaceRange" }

diff --git a/runtime/interpreter/mterp/arm64/op_invoke_static.S b/runtime/interpreter/mterp/arm64/op_invoke_static.S
new file mode 100644
index 0000000..634eda2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_static.S

@@ -0,0 +1,2 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeStatic" }
+

diff --git a/runtime/interpreter/mterp/arm64/op_invoke_static_range.S b/runtime/interpreter/mterp/arm64/op_invoke_static_range.S
new file mode 100644
index 0000000..32cdcdd
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_static_range.S

@@ -0,0 +1 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeStaticRange" }

diff --git a/runtime/interpreter/mterp/arm64/op_invoke_super.S b/runtime/interpreter/mterp/arm64/op_invoke_super.S
new file mode 100644
index 0000000..def2c55
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_super.S

@@ -0,0 +1,8 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeSuper" }
+    /*
+     * Handle a "super" method call.
+     *
+     * for: invoke-super, invoke-super/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */

diff --git a/runtime/interpreter/mterp/arm64/op_invoke_super_range.S b/runtime/interpreter/mterp/arm64/op_invoke_super_range.S
new file mode 100644
index 0000000..27fb859
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_super_range.S

@@ -0,0 +1 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeSuperRange" }

diff --git a/runtime/interpreter/mterp/arm64/op_invoke_virtual.S b/runtime/interpreter/mterp/arm64/op_invoke_virtual.S
new file mode 100644
index 0000000..66d0502
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_virtual.S

@@ -0,0 +1,8 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeVirtual" }
+    /*
+     * Handle a virtual method call.
+     *
+     * for: invoke-virtual, invoke-virtual/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */

diff --git a/runtime/interpreter/mterp/arm64/op_invoke_virtual_quick.S b/runtime/interpreter/mterp/arm64/op_invoke_virtual_quick.S
new file mode 100644
index 0000000..4300c34
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_virtual_quick.S

@@ -0,0 +1 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeVirtualQuick" }

diff --git a/runtime/interpreter/mterp/arm64/op_invoke_virtual_range.S b/runtime/interpreter/mterp/arm64/op_invoke_virtual_range.S
new file mode 100644
index 0000000..b43955c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_virtual_range.S

@@ -0,0 +1 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeVirtualRange" }

diff --git a/runtime/interpreter/mterp/arm64/op_invoke_virtual_range_quick.S b/runtime/interpreter/mterp/arm64/op_invoke_virtual_range_quick.S
new file mode 100644
index 0000000..90c7b65
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_invoke_virtual_range_quick.S

@@ -0,0 +1 @@
+%include "arm64/invoke.S" { "helper":"MterpInvokeVirtualQuickRange" }

diff --git a/runtime/interpreter/mterp/arm64/op_iput.S b/runtime/interpreter/mterp/arm64/op_iput.S
new file mode 100644
index 0000000..a8c0e61
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput.S

@@ -0,0 +1,21 @@
+%default { "is_object":"0", "handler":"artSet32InstanceFromMterp" }
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field//CCCC */
+    .extern $handler
+    EXPORT_PC
+    FETCH    w0, 1                      // w0<- field ref CCCC
+    lsr      w1, wINST, #12             // w1<- B
+    GET_VREG w1, w1                     // w1<- fp[B], the object pointer
+    ubfx     w2, wINST, #8, #4          // w2<- A
+    GET_VREG w2, w2                     // w2<- fp[A]
+    ldr      x3, [xFP, #OFF_FP_METHOD]  // w3<- referrer
+    PREFETCH_INST 2
+    bl       $handler
+    cbnz     w0, MterpPossibleException
+    ADVANCE  2                          // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_iput_boolean.S b/runtime/interpreter/mterp/arm64/op_iput_boolean.S
new file mode 100644
index 0000000..bbf5319
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_boolean.S

@@ -0,0 +1 @@
+%include "arm64/op_iput.S" { "handler":"artSet8InstanceFromMterp" }

diff --git a/runtime/interpreter/mterp/arm64/op_iput_boolean_quick.S b/runtime/interpreter/mterp/arm64/op_iput_boolean_quick.S
new file mode 100644
index 0000000..25c61d7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_boolean_quick.S

@@ -0,0 +1 @@
+%include "arm64/op_iput_quick.S" { "store":"strb" }

diff --git a/runtime/interpreter/mterp/arm64/op_iput_byte.S b/runtime/interpreter/mterp/arm64/op_iput_byte.S
new file mode 100644
index 0000000..bbf5319
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_byte.S

@@ -0,0 +1 @@
+%include "arm64/op_iput.S" { "handler":"artSet8InstanceFromMterp" }

diff --git a/runtime/interpreter/mterp/arm64/op_iput_byte_quick.S b/runtime/interpreter/mterp/arm64/op_iput_byte_quick.S
new file mode 100644
index 0000000..25c61d7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_byte_quick.S

@@ -0,0 +1 @@
+%include "arm64/op_iput_quick.S" { "store":"strb" }

diff --git a/runtime/interpreter/mterp/arm64/op_iput_char.S b/runtime/interpreter/mterp/arm64/op_iput_char.S
new file mode 100644
index 0000000..150d879
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_char.S

@@ -0,0 +1 @@
+%include "arm64/op_iput.S" { "handler":"artSet16InstanceFromMterp" }

diff --git a/runtime/interpreter/mterp/arm64/op_iput_char_quick.S b/runtime/interpreter/mterp/arm64/op_iput_char_quick.S
new file mode 100644
index 0000000..c6ef46a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_char_quick.S

@@ -0,0 +1 @@
+%include "arm64/op_iput_quick.S" { "store":"strh" }

diff --git a/runtime/interpreter/mterp/arm64/op_iput_object.S b/runtime/interpreter/mterp/arm64/op_iput_object.S
new file mode 100644
index 0000000..37a649b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_object.S

@@ -0,0 +1,10 @@
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     w2, wINST
+    mov     x3, xSELF
+    bl      MterpIputObject
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_iput_object_quick.S b/runtime/interpreter/mterp/arm64/op_iput_object_quick.S
new file mode 100644
index 0000000..6fbf2b1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_object_quick.S

@@ -0,0 +1,9 @@
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     w2, wINST
+    bl      MterpIputObjectQuick
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_iput_quick.S b/runtime/interpreter/mterp/arm64/op_iput_quick.S
new file mode 100644
index 0000000..2afc51b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_quick.S

@@ -0,0 +1,14 @@
+%default { "store":"str" }
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- fp[B], the object pointer
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cmp     w3, #0                      // check object for null
+    cbz     w3, common_errNullObject    // object was null
+    GET_VREG w0, w2                     // w0<- fp[A]
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    $store     w0, [x3, x1]             // obj.field<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_iput_short.S b/runtime/interpreter/mterp/arm64/op_iput_short.S
new file mode 100644
index 0000000..150d879
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_short.S

@@ -0,0 +1 @@
+%include "arm64/op_iput.S" { "handler":"artSet16InstanceFromMterp" }

diff --git a/runtime/interpreter/mterp/arm64/op_iput_short_quick.S b/runtime/interpreter/mterp/arm64/op_iput_short_quick.S
new file mode 100644
index 0000000..c6ef46a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_short_quick.S

@@ -0,0 +1 @@
+%include "arm64/op_iput_quick.S" { "store":"strh" }

diff --git a/runtime/interpreter/mterp/arm64/op_iput_wide.S b/runtime/interpreter/mterp/arm64/op_iput_wide.S
new file mode 100644
index 0000000..4ce9525
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_wide.S

@@ -0,0 +1,15 @@
+    /* iput-wide vA, vB, field//CCCC */
+    .extern artSet64InstanceFromMterp
+    EXPORT_PC
+    FETCH    w0, 1                      // w0<- field ref CCCC
+    lsr      w1, wINST, #12             // w1<- B
+    GET_VREG w1, w1                     // w1<- fp[B], the object pointer
+    ubfx     w2, wINST, #8, #4          // w2<- A
+    add      x2, xFP, x2, lsl #2        // w2<- &fp[A]
+    ldr      x3, [xFP, #OFF_FP_METHOD]  // w3<- referrer
+    PREFETCH_INST 2
+    bl       artSet64InstanceFromMterp
+    cbnz     w0, MterpPossibleException
+    ADVANCE  2                          // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S b/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S
new file mode 100644
index 0000000..27b5dc5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S

@@ -0,0 +1,13 @@
+    /* iput-wide-quick vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w3, 1                         // w3<- field byte offset
+    GET_VREG w2, w2                     // w2<- fp[B], the object pointer
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    cmp     w2, #0                      // check object for null
+    beq     common_errNullObject        // object was null
+    GET_VREG_WIDE x0, w0                // x0-< fp[A]
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    add     x1, x2, x3                  // create a direct pointer
+    str     x0, [x1]
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_long_to_double.S b/runtime/interpreter/mterp/arm64/op_long_to_double.S
new file mode 100644
index 0000000..a3f59c2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_long_to_double.S

@@ -0,0 +1 @@
+%include "arm64/funopWide.S" {"instr":"scvtf d0, x0", "srcreg":"x0", "tgtreg":"d0"}

diff --git a/runtime/interpreter/mterp/arm64/op_long_to_float.S b/runtime/interpreter/mterp/arm64/op_long_to_float.S
new file mode 100644
index 0000000..e9c9145
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_long_to_float.S

@@ -0,0 +1 @@
+%include "arm64/funopNarrower.S" {"instr":"scvtf s0, x0", "srcreg":"x0", "tgtreg":"s0"}

diff --git a/runtime/interpreter/mterp/arm64/op_long_to_int.S b/runtime/interpreter/mterp/arm64/op_long_to_int.S
new file mode 100644
index 0000000..360a69b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_long_to_int.S

@@ -0,0 +1 @@
+%include "arm64/funopNarrower.S" {"instr":"", "srcreg":"x0", "tgtreg":"w0"}

diff --git a/runtime/interpreter/mterp/arm64/op_monitor_enter.S b/runtime/interpreter/mterp/arm64/op_monitor_enter.S
new file mode 100644
index 0000000..6fbd9ae
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_monitor_enter.S

@@ -0,0 +1,13 @@
+    /*
+     * Synchronize on an object.
+     */
+    /* monitor-enter vAA */
+    EXPORT_PC
+    lsr      w2, wINST, #8               // w2<- AA
+    GET_VREG w0, w2                      // w0<- vAA (object)
+    mov      x1, xSELF                   // w1<- self
+    bl       artLockObjectFromCode
+    cbnz     w0, MterpException
+    FETCH_ADVANCE_INST 1
+    GET_INST_OPCODE ip                   // extract opcode from rINST
+    GOTO_OPCODE ip                       // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_monitor_exit.S b/runtime/interpreter/mterp/arm64/op_monitor_exit.S
new file mode 100644
index 0000000..26e2d8d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_monitor_exit.S

@@ -0,0 +1,17 @@
+    /*
+     * Unlock an object.
+     *
+     * Exceptions that occur when unlocking a monitor need to appear as
+     * if they happened at the following instruction.  See the Dalvik
+     * instruction spec.
+     */
+    /* monitor-exit vAA */
+    EXPORT_PC
+    lsr      w2, wINST, #8              // w2<- AA
+    GET_VREG w0, w2                     // w0<- vAA (object)
+    mov      x1, xSELF                  // w0<- self
+    bl       artUnlockObjectFromCode    // w0<- success for unlock(self, obj)
+    cbnz     w0, MterpException
+    FETCH_ADVANCE_INST 1                // before throw: advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_move.S b/runtime/interpreter/mterp/arm64/op_move.S
new file mode 100644
index 0000000..195b7eb
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move.S

@@ -0,0 +1,14 @@
+%default { "is_object":"0" }
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    lsr     w1, wINST, #12              // x1<- B from 15:12
+    ubfx    w0, wINST, #8, #4           // x0<- A from 11:8
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    GET_VREG w2, w1                     // x2<- fp[B]
+    GET_INST_OPCODE ip                  // ip<- opcode from wINST
+    .if $is_object
+    SET_VREG_OBJECT w2, w0              // fp[A]<- x2
+    .else
+    SET_VREG w2, w0                     // fp[A]<- x2
+    .endif
+    GOTO_OPCODE ip                      // execute next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_move_16.S b/runtime/interpreter/mterp/arm64/op_move_16.S
new file mode 100644
index 0000000..5146e3d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_16.S

@@ -0,0 +1,14 @@
+%default { "is_object":"0" }
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    FETCH w1, 2                         // w1<- BBBB
+    FETCH w0, 1                         // w0<- AAAA
+    FETCH_ADVANCE_INST 3                // advance xPC, load xINST
+    GET_VREG w2, w1                     // w2<- fp[BBBB]
+    GET_INST_OPCODE ip                  // extract opcode from xINST
+    .if $is_object
+    SET_VREG_OBJECT w2, w0              // fp[AAAA]<- w2
+    .else
+    SET_VREG w2, w0                     // fp[AAAA]<- w2
+    .endif
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_move_exception.S b/runtime/interpreter/mterp/arm64/op_move_exception.S
new file mode 100644
index 0000000..b29298f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_exception.S

@@ -0,0 +1,9 @@
+    /* move-exception vAA */
+    lsr     w2, wINST, #8               // w2<- AA
+    ldr     x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    mov     x1, #0                      // w1<- 0
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    SET_VREG_OBJECT w3, w2              // fp[AA]<- exception obj
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    str     x1, [xSELF, #THREAD_EXCEPTION_OFFSET]  // clear exception
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_move_from16.S b/runtime/interpreter/mterp/arm64/op_move_from16.S
new file mode 100644
index 0000000..78f344d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_from16.S

@@ -0,0 +1,14 @@
+%default { "is_object":"0" }
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    FETCH w1, 1                         // r1<- BBBB
+    lsr     w0, wINST, #8               // r0<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    GET_VREG w2, w1                     // r2<- fp[BBBB]
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    .if $is_object
+    SET_VREG_OBJECT w2, w0              // fp[AA]<- r2
+    .else
+    SET_VREG w2, w0                     // fp[AA]<- r2
+    .endif
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_move_object.S b/runtime/interpreter/mterp/arm64/op_move_object.S
new file mode 100644
index 0000000..a5adc59
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_object.S

@@ -0,0 +1 @@
+%include "arm64/op_move.S" {"is_object":"1"}

diff --git a/runtime/interpreter/mterp/arm64/op_move_object_16.S b/runtime/interpreter/mterp/arm64/op_move_object_16.S
new file mode 100644
index 0000000..ef86c45
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_object_16.S

@@ -0,0 +1 @@
+%include "arm64/op_move_16.S" {"is_object":"1"}

diff --git a/runtime/interpreter/mterp/arm64/op_move_object_from16.S b/runtime/interpreter/mterp/arm64/op_move_object_from16.S
new file mode 100644
index 0000000..0c73b3b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_object_from16.S

@@ -0,0 +1 @@
+%include "arm64/op_move_from16.S" {"is_object":"1"}

diff --git a/runtime/interpreter/mterp/arm64/op_move_result.S b/runtime/interpreter/mterp/arm64/op_move_result.S
new file mode 100644
index 0000000..06fe962
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_result.S

@@ -0,0 +1,14 @@
+%default { "is_object":"0" }
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    lsr     w2, wINST, #8               // r2<- AA
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    ldr     x0, [xFP, #OFF_FP_RESULT_REGISTER]  // get pointer to result JType.
+    ldr     w0, [x0]                    // r0 <- result.i.
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    .if $is_object
+    SET_VREG_OBJECT w0, w2, w1          // fp[AA]<- r0
+    .else
+    SET_VREG w0, w2                     // fp[AA]<- r0
+    .endif
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_move_result_object.S b/runtime/interpreter/mterp/arm64/op_move_result_object.S
new file mode 100644
index 0000000..da2bbee
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_result_object.S

@@ -0,0 +1 @@
+%include "arm64/op_move_result.S" {"is_object":"1"}

diff --git a/runtime/interpreter/mterp/arm64/op_move_result_wide.S b/runtime/interpreter/mterp/arm64/op_move_result_wide.S
new file mode 100644
index 0000000..f90a33f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_result_wide.S

@@ -0,0 +1,9 @@
+    /* for: move-result-wide */
+    /* op vAA */
+    lsr     w2, wINST, #8               // r2<- AA
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    ldr     x0, [xFP, #OFF_FP_RESULT_REGISTER]  // get pointer to result JType.
+    ldr     x0, [x0]                    // r0 <- result.i.
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x0, x2                // fp[AA]<- r0
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_move_wide.S b/runtime/interpreter/mterp/arm64/op_move_wide.S
new file mode 100644
index 0000000..538f079
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_wide.S

@@ -0,0 +1,9 @@
+    /* move-wide vA, vB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE  x3, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE  x3, w2
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_move_wide_16.S b/runtime/interpreter/mterp/arm64/op_move_wide_16.S
new file mode 100644
index 0000000..c79cdc50
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_wide_16.S

@@ -0,0 +1,9 @@
+    /* move-wide/16 vAAAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    FETCH w3, 2                         // w3<- BBBB
+    FETCH w2, 1                         // w2<- AAAA
+    GET_VREG_WIDE x3, w3
+    FETCH_ADVANCE_INST 3                // advance rPC, load rINST
+    SET_VREG_WIDE x3, w2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_move_wide_from16.S b/runtime/interpreter/mterp/arm64/op_move_wide_from16.S
new file mode 100644
index 0000000..70dbe99
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_move_wide_from16.S

@@ -0,0 +1,9 @@
+    /* move-wide/from16 vAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    FETCH w3, 1                         // w3<- BBBB
+    lsr     w2, wINST, #8               // w2<- AA
+    GET_VREG_WIDE x3, w3
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x3, w2
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_mul_double.S b/runtime/interpreter/mterp/arm64/op_mul_double.S
new file mode 100644
index 0000000..8d35b81
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_mul_double.S

@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"fmul d0, d1, d2", "result":"d0", "r1":"d1", "r2":"d2"}

diff --git a/runtime/interpreter/mterp/arm64/op_mul_double_2addr.S b/runtime/interpreter/mterp/arm64/op_mul_double_2addr.S
new file mode 100644
index 0000000..526cb3b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_mul_double_2addr.S

@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"fmul     d0, d0, d1", "r0":"d0", "r1":"d1"}

diff --git a/runtime/interpreter/mterp/arm64/op_mul_float.S b/runtime/interpreter/mterp/arm64/op_mul_float.S
new file mode 100644
index 0000000..eea7733
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_mul_float.S

@@ -0,0 +1 @@
+%include "arm64/fbinop.S" {"instr":"fmul   s0, s0, s1"}

diff --git a/runtime/interpreter/mterp/arm64/op_mul_float_2addr.S b/runtime/interpreter/mterp/arm64/op_mul_float_2addr.S
new file mode 100644
index 0000000..c1f2376
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_mul_float_2addr.S

@@ -0,0 +1 @@
+%include "arm64/fbinop2addr.S" {"instr":"fmul   s2, s0, s1"}

diff --git a/runtime/interpreter/mterp/arm64/op_mul_int.S b/runtime/interpreter/mterp/arm64/op_mul_int.S
new file mode 100644
index 0000000..d14cae1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_mul_int.S

@@ -0,0 +1,2 @@
+/* must be "mul w0, w1, w0" -- "w0, w0, w1" is illegal */
+%include "arm64/binop.S" {"instr":"mul     w0, w1, w0"}

diff --git a/runtime/interpreter/mterp/arm64/op_mul_int_2addr.S b/runtime/interpreter/mterp/arm64/op_mul_int_2addr.S
new file mode 100644
index 0000000..f079118
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_mul_int_2addr.S

@@ -0,0 +1,2 @@
+/* must be "mul w0, w1, w0" -- "w0, w0, w1" is illegal */
+%include "arm64/binop2addr.S" {"instr":"mul     w0, w1, w0"}

diff --git a/runtime/interpreter/mterp/arm64/op_mul_int_lit16.S b/runtime/interpreter/mterp/arm64/op_mul_int_lit16.S
new file mode 100644
index 0000000..a378559
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_mul_int_lit16.S

@@ -0,0 +1,2 @@
+/* must be "mul w0, w1, w0" -- "w0, w0, w1" is illegal */
+%include "arm64/binopLit16.S" {"instr":"mul     w0, w1, w0"}

diff --git a/runtime/interpreter/mterp/arm64/op_mul_int_lit8.S b/runtime/interpreter/mterp/arm64/op_mul_int_lit8.S
new file mode 100644
index 0000000..b3d4014
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_mul_int_lit8.S

@@ -0,0 +1,2 @@
+/* must be "mul w0, w1, w0" -- "w0, w0, w1" is illegal */
+%include "arm64/binopLit8.S" {"instr":"mul     w0, w1, w0"}

diff --git a/runtime/interpreter/mterp/arm64/op_mul_long.S b/runtime/interpreter/mterp/arm64/op_mul_long.S
new file mode 100644
index 0000000..bc0dcbd
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_mul_long.S

@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"mul x0, x1, x2"}

diff --git a/runtime/interpreter/mterp/arm64/op_mul_long_2addr.S b/runtime/interpreter/mterp/arm64/op_mul_long_2addr.S
new file mode 100644
index 0000000..fa1cdf8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_mul_long_2addr.S

@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"mul     x0, x0, x1"}

diff --git a/runtime/interpreter/mterp/arm64/op_neg_double.S b/runtime/interpreter/mterp/arm64/op_neg_double.S
new file mode 100644
index 0000000..e9064c4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_neg_double.S

@@ -0,0 +1 @@
+%include "arm64/unopWide.S" {"preinstr":"mov x1, #0x8000000000000000", "instr":"add     x0, x0, x1"}

diff --git a/runtime/interpreter/mterp/arm64/op_neg_float.S b/runtime/interpreter/mterp/arm64/op_neg_float.S
new file mode 100644
index 0000000..49d51af
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_neg_float.S

@@ -0,0 +1 @@
+%include "arm64/unop.S" {"preinstr":"mov w4, #0x80000000", "instr":"add     w0, w0, w4"}

diff --git a/runtime/interpreter/mterp/arm64/op_neg_int.S b/runtime/interpreter/mterp/arm64/op_neg_int.S
new file mode 100644
index 0000000..59c14a9
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_neg_int.S

@@ -0,0 +1 @@
+%include "arm64/unop.S" {"instr":"sub     w0, wzr, w0"}

diff --git a/runtime/interpreter/mterp/arm64/op_neg_long.S b/runtime/interpreter/mterp/arm64/op_neg_long.S
new file mode 100644
index 0000000..0c71ea7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_neg_long.S

@@ -0,0 +1 @@
+%include "arm64/unopWide.S" {"instr":"sub x0, xzr, x0"}

diff --git a/runtime/interpreter/mterp/arm64/op_new_array.S b/runtime/interpreter/mterp/arm64/op_new_array.S
new file mode 100644
index 0000000..886120a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_new_array.S

@@ -0,0 +1,18 @@
+    /*
+     * Allocate an array of objects, specified with the array class
+     * and a count.
+     *
+     * The verifier guarantees that this is an array class, so we don't
+     * check for it here.
+     */
+    /* new-array vA, vB, class//CCCC */
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     w2, wINST
+    mov     x3, xSELF
+    bl      MterpNewArray
+    cbz     w0, MterpPossibleException
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_new_instance.S b/runtime/interpreter/mterp/arm64/op_new_instance.S
new file mode 100644
index 0000000..c171ac5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_new_instance.S

@@ -0,0 +1,13 @@
+    /*
+     * Create a new instance of a class.
+     */
+    /* new-instance vAA, class//BBBB */
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xSELF
+    mov     w2, wINST
+    bl      MterpNewInstance           // (shadow_frame, self, inst_data)
+    cbz     w0, MterpPossibleException
+    FETCH_ADVANCE_INST 2               // advance rPC, load rINST
+    GET_INST_OPCODE ip                 // extract opcode from rINST
+    GOTO_OPCODE ip                     // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_nop.S b/runtime/interpreter/mterp/arm64/op_nop.S
new file mode 100644
index 0000000..80c2d45
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_nop.S

@@ -0,0 +1,3 @@
+    FETCH_ADVANCE_INST 1                // advance to next instr, load rINST
+    GET_INST_OPCODE ip                  // ip<- opcode from rINST
+    GOTO_OPCODE ip                      // execute it

diff --git a/runtime/interpreter/mterp/arm64/op_not_int.S b/runtime/interpreter/mterp/arm64/op_not_int.S
new file mode 100644
index 0000000..55d7750
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_not_int.S

@@ -0,0 +1 @@
+%include "arm64/unop.S" {"instr":"mvn     w0, w0"}

diff --git a/runtime/interpreter/mterp/arm64/op_not_long.S b/runtime/interpreter/mterp/arm64/op_not_long.S
new file mode 100644
index 0000000..e5ebdd6
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_not_long.S

@@ -0,0 +1 @@
+%include "arm64/unopWide.S" {"instr":"mvn     x0, x0"}

diff --git a/runtime/interpreter/mterp/arm64/op_or_int.S b/runtime/interpreter/mterp/arm64/op_or_int.S
new file mode 100644
index 0000000..648c1e6
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_or_int.S

@@ -0,0 +1 @@
+%include "arm64/binop.S" {"instr":"orr     w0, w0, w1"}

diff --git a/runtime/interpreter/mterp/arm64/op_or_int_2addr.S b/runtime/interpreter/mterp/arm64/op_or_int_2addr.S
new file mode 100644
index 0000000..abdf599
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_or_int_2addr.S

@@ -0,0 +1 @@
+%include "arm64/binop2addr.S" {"instr":"orr     w0, w0, w1"}

diff --git a/runtime/interpreter/mterp/arm64/op_or_int_lit16.S b/runtime/interpreter/mterp/arm64/op_or_int_lit16.S
new file mode 100644
index 0000000..db7f4ff
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_or_int_lit16.S

@@ -0,0 +1 @@
+%include "arm64/binopLit16.S" {"instr":"orr     w0, w0, w1"}

diff --git a/runtime/interpreter/mterp/arm64/op_or_int_lit8.S b/runtime/interpreter/mterp/arm64/op_or_int_lit8.S
new file mode 100644
index 0000000..51675f8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_or_int_lit8.S

@@ -0,0 +1 @@
+%include "arm64/binopLit8.S" {"instr":"orr     w0, w0, w1"}

diff --git a/runtime/interpreter/mterp/arm64/op_or_long.S b/runtime/interpreter/mterp/arm64/op_or_long.S
new file mode 100644
index 0000000..dd137ce
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_or_long.S

@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"orr x0, x1, x2"}

diff --git a/runtime/interpreter/mterp/arm64/op_or_long_2addr.S b/runtime/interpreter/mterp/arm64/op_or_long_2addr.S
new file mode 100644
index 0000000..f785230
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_or_long_2addr.S

@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"orr     x0, x0, x1"}

diff --git a/runtime/interpreter/mterp/arm64/op_packed_switch.S b/runtime/interpreter/mterp/arm64/op_packed_switch.S
new file mode 100644
index 0000000..39ab8bf
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_packed_switch.S

@@ -0,0 +1,47 @@
+%default { "func":"MterpDoPackedSwitch" }
+    /*
+     * Handle a packed-switch or sparse-switch instruction.  In both cases
+     * we decode it and hand it off to a helper function.
+     *
+     * We don't really expect backward branches in a switch statement, but
+     * they're perfectly legal, so we check for them here.
+     *
+     * for: packed-switch, sparse-switch
+     */
+    /* op vAA, +BBBB */
+#if MTERP_SUSPEND
+    FETCH w0, 1                         // w0<- bbbb (lo)
+    FETCH w1, 2                         // w1<- BBBB (hi)
+    mov     w3, wINST, lsr #8           // w3<- AA
+    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
+    GET_VREG w1, w3                     // w1<- vAA
+    add     w0, rPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
+    bl      $func                       // w0<- code-unit branch offset
+    adds    w1, w0, w0                  // w1<- byte offset; clear V
+    ldrle   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    FETCH w0, 1                         // w0<- bbbb (lo)
+    FETCH w1, 2                         // w1<- BBBB (hi)
+    lsr     w3, wINST, #8               // w3<- AA
+    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
+    GET_VREG w1, w3                     // w1<- vAA
+    add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
+    bl      $func                       // w0<- code-unit branch offset
+    sbfm    xINST, x0, 0, 31
+#if MTERP_PROFILE_BRANCHES
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xINST
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w1, wINST, wINST            // w1<- byte offset; clear V
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    b.le    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif

diff --git a/runtime/interpreter/mterp/arm64/op_rem_double.S b/runtime/interpreter/mterp/arm64/op_rem_double.S
new file mode 100644
index 0000000..c631ddb
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rem_double.S

@@ -0,0 +1,13 @@
+    /* rem vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE d1, w2                // d1<- vCC
+    GET_VREG_WIDE d0, w1                // d0<- vBB
+    bl  fmod
+    lsr     w4, wINST, #8               // w4<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w4                // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */

diff --git a/runtime/interpreter/mterp/arm64/op_rem_double_2addr.S b/runtime/interpreter/mterp/arm64/op_rem_double_2addr.S
new file mode 100644
index 0000000..db18aa7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rem_double_2addr.S

@@ -0,0 +1,12 @@
+    /* rem vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE d1, w1                // d1<- vB
+    GET_VREG_WIDE d0, w2                // d0<- vA
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    bl fmod
+    ubfx    w2, wINST, #8, #4           // w2<- A (need to reload - killed across call)
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w2                // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */

diff --git a/runtime/interpreter/mterp/arm64/op_rem_float.S b/runtime/interpreter/mterp/arm64/op_rem_float.S
new file mode 100644
index 0000000..73f7060
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rem_float.S

@@ -0,0 +1,2 @@
+/* EABI doesn't define a float remainder function, but libm does */
+%include "arm64/fbinop.S" {"instr":"bl      fmodf"}

diff --git a/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S b/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S
new file mode 100644
index 0000000..0b91891
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S

@@ -0,0 +1,13 @@
+    /* rem vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w9, wINST, #8               // w9<- A+
+    and     w9, w9, #15                 // w9<- A
+    GET_VREG s1, w3
+    GET_VREG s0, w9
+    bl  fmodf
+    lsr     w9, wINST, #8               // w9<- A+
+    and     w9, w9, #15                 // w9<- A
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG s0, w9
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_rem_int.S b/runtime/interpreter/mterp/arm64/op_rem_int.S
new file mode 100644
index 0000000..dd9dfda
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rem_int.S

@@ -0,0 +1 @@
+%include "arm64/binop.S" {"preinstr":"sdiv     w2, w0, w1", "instr":"msub w0, w2, w1, w0", "chkzero":"1"}

diff --git a/runtime/interpreter/mterp/arm64/op_rem_int_2addr.S b/runtime/interpreter/mterp/arm64/op_rem_int_2addr.S
new file mode 100644
index 0000000..57fc4971
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rem_int_2addr.S

@@ -0,0 +1 @@
+%include "arm64/binop2addr.S" {"preinstr":"sdiv     w2, w0, w1", "instr":"msub w0, w2, w1, w0", "chkzero":"1"}

diff --git a/runtime/interpreter/mterp/arm64/op_rem_int_lit16.S b/runtime/interpreter/mterp/arm64/op_rem_int_lit16.S
new file mode 100644
index 0000000..b51a739
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rem_int_lit16.S

@@ -0,0 +1 @@
+%include "arm64/binopLit16.S" {"preinstr":"sdiv w3, w0, w1", "instr":"msub w0, w3, w1, w0", "chkzero":"1"}

diff --git a/runtime/interpreter/mterp/arm64/op_rem_int_lit8.S b/runtime/interpreter/mterp/arm64/op_rem_int_lit8.S
new file mode 100644
index 0000000..03ea324
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rem_int_lit8.S

@@ -0,0 +1 @@
+%include "arm64/binopLit8.S" {"preinstr":"sdiv w3, w0, w1", "instr":"msub w0, w3, w1, w0", "chkzero":"1"}

diff --git a/runtime/interpreter/mterp/arm64/op_rem_long.S b/runtime/interpreter/mterp/arm64/op_rem_long.S
new file mode 100644
index 0000000..f133f86
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rem_long.S

@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"preinstr":"sdiv x3, x1, x2","instr":"msub x0, x3, x2, x1", "chkzero":"1"}

diff --git a/runtime/interpreter/mterp/arm64/op_rem_long_2addr.S b/runtime/interpreter/mterp/arm64/op_rem_long_2addr.S
new file mode 100644
index 0000000..b45e2a9
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rem_long_2addr.S

@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"preinstr":"sdiv x3, x0, x1", "instr":"msub x0, x3, x1, x0", "chkzero":"1"}

diff --git a/runtime/interpreter/mterp/arm64/op_return.S b/runtime/interpreter/mterp/arm64/op_return.S
new file mode 100644
index 0000000..28630ee
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_return.S

@@ -0,0 +1,19 @@
+    /*
+     * Return a 32-bit value.
+     *
+     * for: return, return-object
+     */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     x0, xSELF
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .L${opcode}_check
+.L${opcode}_return:
+    lsr     w2, wINST, #8               // r2<- AA
+    GET_VREG w0, w2                     // r0<- vAA
+    b       MterpReturn
+.L${opcode}_check:
+    bl      MterpSuspendCheck           // (self)
+    b       .L${opcode}_return

diff --git a/runtime/interpreter/mterp/arm64/op_return_object.S b/runtime/interpreter/mterp/arm64/op_return_object.S
new file mode 100644
index 0000000..b6cb532
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_return_object.S

@@ -0,0 +1 @@
+%include "arm64/op_return.S"

diff --git a/runtime/interpreter/mterp/arm64/op_return_void.S b/runtime/interpreter/mterp/arm64/op_return_void.S
new file mode 100644
index 0000000..3a5aa56
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_return_void.S

@@ -0,0 +1,12 @@
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     x0, xSELF
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .L${opcode}_check
+.L${opcode}_return:
+    mov     x0, #0
+    b       MterpReturn
+.L${opcode}_check:
+    bl      MterpSuspendCheck           // (self)
+    b       .L${opcode}_return

diff --git a/runtime/interpreter/mterp/arm64/op_return_void_no_barrier.S b/runtime/interpreter/mterp/arm64/op_return_void_no_barrier.S
new file mode 100644
index 0000000..1e06953
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_return_void_no_barrier.S

@@ -0,0 +1,10 @@
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     x0, xSELF
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .L${opcode}_check
+.L${opcode}_return:
+    mov     x0, #0
+    b       MterpReturn
+.L${opcode}_check:
+    bl      MterpSuspendCheck           // (self)
+    b       .L${opcode}_return

diff --git a/runtime/interpreter/mterp/arm64/op_return_wide.S b/runtime/interpreter/mterp/arm64/op_return_wide.S
new file mode 100644
index 0000000..c6e1d9d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_return_wide.S

@@ -0,0 +1,18 @@
+    /*
+     * Return a 64-bit value.
+     */
+    /* return-wide vAA */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     x0, xSELF
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .L${opcode}_check
+.L${opcode}_return:
+    lsr     w2, wINST, #8               // w2<- AA
+    GET_VREG_WIDE x0, w2                // x0<- vAA
+    b       MterpReturn
+.L${opcode}_check:
+    bl      MterpSuspendCheck           // (self)
+    b       .L${opcode}_return

diff --git a/runtime/interpreter/mterp/arm64/op_rsub_int.S b/runtime/interpreter/mterp/arm64/op_rsub_int.S
new file mode 100644
index 0000000..3bf45fe
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rsub_int.S

@@ -0,0 +1,2 @@
+/* this op is "rsub-int", but can be thought of as "rsub-int/lit16" */
+%include "arm64/binopLit16.S" {"instr":"sub     w0, w1, w0"}

diff --git a/runtime/interpreter/mterp/arm64/op_rsub_int_lit8.S b/runtime/interpreter/mterp/arm64/op_rsub_int_lit8.S
new file mode 100644
index 0000000..7a3572b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_rsub_int_lit8.S

@@ -0,0 +1 @@
+%include "arm64/binopLit8.S" {"instr":"sub     w0, w1, w0"}

diff --git a/runtime/interpreter/mterp/arm64/op_sget.S b/runtime/interpreter/mterp/arm64/op_sget.S
new file mode 100644
index 0000000..6352ce0
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sget.S

@@ -0,0 +1,27 @@
+%default { "is_object":"0", "helper":"artGet32StaticFromCode", "extend":"" }
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field//BBBB */
+
+    .extern $helper
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- field ref BBBB
+    ldr   x1, [xFP, #OFF_FP_METHOD]
+    mov   x2, xSELF
+    bl    $helper
+    ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr   w2, wINST, #8                 // w2<- AA
+    $extend
+    PREFETCH_INST 2
+    cbnz  x3, MterpException            // bail out
+.if $is_object
+    SET_VREG_OBJECT w0, w2              // fp[AA]<- w0
+.else
+    SET_VREG w0, w2                     // fp[AA]<- w0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip

diff --git a/runtime/interpreter/mterp/arm64/op_sget_boolean.S b/runtime/interpreter/mterp/arm64/op_sget_boolean.S
new file mode 100644
index 0000000..c40dbdd
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sget_boolean.S

@@ -0,0 +1 @@
+%include "arm64/op_sget.S" {"helper":"artGetBooleanStaticFromCode", "extend":"uxtb w0, w0"}

diff --git a/runtime/interpreter/mterp/arm64/op_sget_byte.S b/runtime/interpreter/mterp/arm64/op_sget_byte.S
new file mode 100644
index 0000000..6cf69a3
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sget_byte.S

@@ -0,0 +1 @@
+%include "arm64/op_sget.S" {"helper":"artGetByteStaticFromCode", "extend":"sxtb w0, w0"}

diff --git a/runtime/interpreter/mterp/arm64/op_sget_char.S b/runtime/interpreter/mterp/arm64/op_sget_char.S
new file mode 100644
index 0000000..8924a34
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sget_char.S

@@ -0,0 +1 @@
+%include "arm64/op_sget.S" {"helper":"artGetCharStaticFromCode", "extend":"uxth w0, w0"}

diff --git a/runtime/interpreter/mterp/arm64/op_sget_object.S b/runtime/interpreter/mterp/arm64/op_sget_object.S
new file mode 100644
index 0000000..620b0ba
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sget_object.S

@@ -0,0 +1 @@
+%include "arm64/op_sget.S" {"is_object":"1", "helper":"artGetObjStaticFromCode"}

diff --git a/runtime/interpreter/mterp/arm64/op_sget_short.S b/runtime/interpreter/mterp/arm64/op_sget_short.S
new file mode 100644
index 0000000..19dbba6
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sget_short.S

@@ -0,0 +1 @@
+%include "arm64/op_sget.S" {"helper":"artGetShortStaticFromCode", "extend":"sxth w0, w0"}

diff --git a/runtime/interpreter/mterp/arm64/op_sget_wide.S b/runtime/interpreter/mterp/arm64/op_sget_wide.S
new file mode 100644
index 0000000..287f66d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sget_wide.S

@@ -0,0 +1,19 @@
+    /*
+     * SGET_WIDE handler wrapper.
+     *
+     */
+    /* sget-wide vAA, field//BBBB */
+
+    .extern artGet64StaticFromCode
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- field ref BBBB
+    ldr   x1, [xFP, #OFF_FP_METHOD]
+    mov   x2, xSELF
+    bl    artGet64StaticFromCode
+    ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr   w4, wINST, #8                 // w4<- AA
+    cbnz  x3, MterpException            // bail out
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    SET_VREG_WIDE x0, w4
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_shl_int.S b/runtime/interpreter/mterp/arm64/op_shl_int.S
new file mode 100644
index 0000000..bd0f237
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_shl_int.S

@@ -0,0 +1 @@
+%include "arm64/binop.S" {"preinstr":"and     w1, w1, #31", "instr":"lsl     w0, w0, w1"}

diff --git a/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S b/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S
new file mode 100644
index 0000000..b4671d2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S

@@ -0,0 +1 @@
+%include "arm64/binop2addr.S" {"preinstr":"and     w1, w1, #31", "instr":"lsl     w0, w0, w1"}

diff --git a/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S b/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S
new file mode 100644
index 0000000..4dd32e0
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S

@@ -0,0 +1 @@
+%include "arm64/binopLit8.S" {"preinstr":"and     w1, w1, #31", "instr":"lsl     w0, w0, w1"}

diff --git a/runtime/interpreter/mterp/arm64/op_shl_long.S b/runtime/interpreter/mterp/arm64/op_shl_long.S
new file mode 100644
index 0000000..bbf9600
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_shl_long.S

@@ -0,0 +1 @@
+%include "arm64/shiftWide.S" {"opcode":"lsl"}

diff --git a/runtime/interpreter/mterp/arm64/op_shl_long_2addr.S b/runtime/interpreter/mterp/arm64/op_shl_long_2addr.S
new file mode 100644
index 0000000..a5c4013
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_shl_long_2addr.S

@@ -0,0 +1 @@
+%include "arm64/shiftWide2addr.S" {"opcode":"lsl"}

diff --git a/runtime/interpreter/mterp/arm64/op_shr_int.S b/runtime/interpreter/mterp/arm64/op_shr_int.S
new file mode 100644
index 0000000..c214a18
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_shr_int.S

@@ -0,0 +1 @@
+%include "arm64/binop.S" {"preinstr":"and     w1, w1, #31", "instr":"asr     w0, w0, w1"}

diff --git a/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S b/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S
new file mode 100644
index 0000000..3c1484b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S

@@ -0,0 +1 @@
+%include "arm64/binop2addr.S" {"preinstr":"and     w1, w1, #31", "instr":"asr     w0, w0, w1"}

diff --git a/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S b/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S
new file mode 100644
index 0000000..26d5024
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S

@@ -0,0 +1 @@
+%include "arm64/binopLit8.S" {"preinstr":"and     w1, w1, #31", "instr":"asr     w0, w0, w1"}

diff --git a/runtime/interpreter/mterp/arm64/op_shr_long.S b/runtime/interpreter/mterp/arm64/op_shr_long.S
new file mode 100644
index 0000000..4d33235
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_shr_long.S

@@ -0,0 +1 @@
+%include "arm64/shiftWide.S" {"opcode":"asr"}

diff --git a/runtime/interpreter/mterp/arm64/op_shr_long_2addr.S b/runtime/interpreter/mterp/arm64/op_shr_long_2addr.S
new file mode 100644
index 0000000..0a4a386
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_shr_long_2addr.S

@@ -0,0 +1 @@
+%include "arm64/shiftWide2addr.S" {"opcode":"asr"}

diff --git a/runtime/interpreter/mterp/arm64/op_sparse_switch.S b/runtime/interpreter/mterp/arm64/op_sparse_switch.S
new file mode 100644
index 0000000..5a8d748
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sparse_switch.S

@@ -0,0 +1 @@
+%include "arm64/op_packed_switch.S" { "func":"MterpDoSparseSwitch" }

diff --git a/runtime/interpreter/mterp/arm64/op_sput.S b/runtime/interpreter/mterp/arm64/op_sput.S
new file mode 100644
index 0000000..75f27ab
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sput.S

@@ -0,0 +1,19 @@
+%default { "helper":"artSet32StaticFromCode"}
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field//BBBB */
+    EXPORT_PC
+    FETCH   w0, 1                       // r0<- field ref BBBB
+    lsr     w3, wINST, #8               // r3<- AA
+    GET_VREG w1, w3                     // r1<= fp[AA]
+    ldr     x2, [xFP, #OFF_FP_METHOD]
+    mov     x3, xSELF
+    PREFETCH_INST 2                     // Get next inst, but don't advance rPC
+    bl      $helper
+    cbnz    w0, MterpException          // 0 on success
+    ADVANCE 2                           // Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_sput_boolean.S b/runtime/interpreter/mterp/arm64/op_sput_boolean.S
new file mode 100644
index 0000000..11c55e5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sput_boolean.S

@@ -0,0 +1 @@
+%include "arm64/op_sput.S" {"helper":"artSet8StaticFromCode"}

diff --git a/runtime/interpreter/mterp/arm64/op_sput_byte.S b/runtime/interpreter/mterp/arm64/op_sput_byte.S
new file mode 100644
index 0000000..11c55e5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sput_byte.S

@@ -0,0 +1 @@
+%include "arm64/op_sput.S" {"helper":"artSet8StaticFromCode"}

diff --git a/runtime/interpreter/mterp/arm64/op_sput_char.S b/runtime/interpreter/mterp/arm64/op_sput_char.S
new file mode 100644
index 0000000..b4dd5aa
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sput_char.S

@@ -0,0 +1 @@
+%include "arm64/op_sput.S" {"helper":"artSet16StaticFromCode"}

diff --git a/runtime/interpreter/mterp/arm64/op_sput_object.S b/runtime/interpreter/mterp/arm64/op_sput_object.S
new file mode 100644
index 0000000..c176da2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sput_object.S

@@ -0,0 +1,10 @@
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     x2, xINST
+    mov     x3, xSELF
+    bl      MterpSputObject
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_sput_short.S b/runtime/interpreter/mterp/arm64/op_sput_short.S
new file mode 100644
index 0000000..b4dd5aa
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sput_short.S

@@ -0,0 +1 @@
+%include "arm64/op_sput.S" {"helper":"artSet16StaticFromCode"}

diff --git a/runtime/interpreter/mterp/arm64/op_sput_wide.S b/runtime/interpreter/mterp/arm64/op_sput_wide.S
new file mode 100644
index 0000000..1d034ec
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sput_wide.S

@@ -0,0 +1,18 @@
+    /*
+     * SPUT_WIDE handler wrapper.
+     *
+     */
+    /* sput-wide vAA, field//BBBB */
+    .extern artSet64IndirectStaticFromMterp
+    EXPORT_PC
+    FETCH   w0, 1                       // w0<- field ref BBBB
+    ldr     x1, [xFP, #OFF_FP_METHOD]
+    lsr     w2, wINST, #8               // w3<- AA
+    add     x2, xFP, w2, lsl #2
+    mov     x3, xSELF
+    PREFETCH_INST 2                     // Get next inst, but don't advance rPC
+    bl      artSet64IndirectStaticFromMterp
+    cbnz    w0, MterpException          // 0 on success, -1 on failure
+    ADVANCE 2                           // Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction

diff --git a/runtime/interpreter/mterp/arm64/op_sub_double.S b/runtime/interpreter/mterp/arm64/op_sub_double.S
new file mode 100644
index 0000000..e8e3401
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sub_double.S

@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"fsub d0, d1, d2", "result":"d0", "r1":"d1", "r2":"d2"}

diff --git a/runtime/interpreter/mterp/arm64/op_sub_double_2addr.S b/runtime/interpreter/mterp/arm64/op_sub_double_2addr.S
new file mode 100644
index 0000000..ddab55e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sub_double_2addr.S

@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"fsub     d0, d0, d1", "r0":"d0", "r1":"d1"}

diff --git a/runtime/interpreter/mterp/arm64/op_sub_float.S b/runtime/interpreter/mterp/arm64/op_sub_float.S
new file mode 100644
index 0000000..227b15f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sub_float.S

@@ -0,0 +1 @@
+%include "arm64/fbinop.S" {"instr":"fsub   s0, s0, s1"}

diff --git a/runtime/interpreter/mterp/arm64/op_sub_float_2addr.S b/runtime/interpreter/mterp/arm64/op_sub_float_2addr.S
new file mode 100644
index 0000000..19ac8d5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sub_float_2addr.S

@@ -0,0 +1 @@
+%include "arm64/fbinop2addr.S" {"instr":"fsub   s2, s0, s1"}

diff --git a/runtime/interpreter/mterp/arm64/op_sub_int.S b/runtime/interpreter/mterp/arm64/op_sub_int.S
new file mode 100644
index 0000000..0e7ce0e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sub_int.S

@@ -0,0 +1 @@
+%include "arm64/binop.S" {"instr":"sub     w0, w0, w1"}

diff --git a/runtime/interpreter/mterp/arm64/op_sub_int_2addr.S b/runtime/interpreter/mterp/arm64/op_sub_int_2addr.S
new file mode 100644
index 0000000..d2c1bd3
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sub_int_2addr.S

@@ -0,0 +1 @@
+%include "arm64/binop2addr.S" {"instr":"sub     w0, w0, w1"}

diff --git a/runtime/interpreter/mterp/arm64/op_sub_long.S b/runtime/interpreter/mterp/arm64/op_sub_long.S
new file mode 100644
index 0000000..263c70d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sub_long.S

@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"sub x0, x1, x2"}

diff --git a/runtime/interpreter/mterp/arm64/op_sub_long_2addr.S b/runtime/interpreter/mterp/arm64/op_sub_long_2addr.S
new file mode 100644
index 0000000..5be3772
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_sub_long_2addr.S

@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"sub     x0, x0, x1"}

diff --git a/runtime/interpreter/mterp/arm64/op_throw.S b/runtime/interpreter/mterp/arm64/op_throw.S
new file mode 100644
index 0000000..9a951af
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_throw.S

@@ -0,0 +1,10 @@
+    /*
+     * Throw an exception object in the current thread.
+     */
+    /* throw vAA */
+    EXPORT_PC
+    lsr      w2, wINST, #8               // r2<- AA
+    GET_VREG w1, w2                      // r1<- vAA (exception object)
+    cbz      w1, common_errNullObject
+    str      x1, [xSELF, #THREAD_EXCEPTION_OFFSET]  // thread->exception<- obj
+    b        MterpException

diff --git a/runtime/interpreter/mterp/arm64/op_unused_3e.S b/runtime/interpreter/mterp/arm64/op_unused_3e.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_3e.S

@@ -0,0 +1 @@
+%include "arm64/unused.S"

diff --git a/runtime/interpreter/mterp/arm64/op_unused_3f.S b/runtime/interpreter/mterp/arm64/op_unused_3f.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_3f.S

@@ -0,0 +1 @@
+%include "arm64/unused.S"

diff --git a/runtime/interpreter/mterp/arm64/op_unused_40.S b/runtime/interpreter/mterp/arm64/op_unused_40.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_40.S

@@ -0,0 +1 @@
+%include "arm64/unused.S"

diff --git a/runtime/interpreter/mterp/arm64/op_unused_41.S b/runtime/interpreter/mterp/arm64/op_unused_41.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_41.S

@@ -0,0 +1 @@
+%include "arm64/unused.S"

diff --git a/runtime/interpreter/mterp/arm64/op_unused_42.S b/runtime/interpreter/mterp/arm64/op_unused_42.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_42.S

@@ -0,0 +1 @@
+%include "arm64/unused.S"

diff --git a/runtime/interpreter/mterp/arm64/op_unused_43.S b/runtime/interpreter/mterp/arm64/op_unused_43.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_43.S

@@ -0,0 +1 @@
+%include "arm64/unused.S"

diff --git a/runtime/interpreter/mterp/arm64/op_unused_73.S b/runtime/interpreter/mterp/arm64/op_unused_73.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_73.S

@@ -0,0 +1 @@
+%include "arm64/unused.S"

diff --git a/runtime/interpreter/mterp/arm64/op_unused_79.S b/runtime/interpreter/mterp/arm64/op_unused_79.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_79.S

@@ -0,0 +1 @@
+%include "arm64/unused.S"

diff --git a/runtime/interpreter/mterp/arm64/op_unused_7a.S b/runtime/interpreter/mterp/arm64/op_unused_7a.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_7a.S

@@ -0,0 +1 @@
+%include "arm64/unused.S"

diff --git a/runtime/interpreter/mterp/arm64/op_unused_f3.S b/runtime/interpreter/mterp/arm64/op_unused_f3.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_f3.S

@@ -0,0 +1 @@
+%include "arm64/unused.S"

diff --git a/runtime/interpreter/mterp/arm64/op_unused_f4.S b/runtime/interpreter/mterp/arm64/op_unused_f4.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_f4.S

@@ -0,0 +1 @@
+%include "arm64/unused.S"

diff --git a/runtime/interpreter/mterp/arm64/op_unused_f5.S b/runtime/interpreter/mterp/arm64/op_unused_f5.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_f5.S

@@ -0,0 +1 @@
+%include "arm64/unused.S"

diff --git a/runtime/interpreter/mterp/arm64/op_unused_f6.S b/runtime/interpreter/mterp/arm64/op_unused_f6.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_f6.S

@@ -0,0 +1 @@
+%include "arm64/unused.S"

diff --git a/runtime/interpreter/mterp/arm64/op_unused_f7.S b/runtime/interpreter/mterp/arm64/op_unused_f7.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_f7.S

@@ -0,0 +1 @@
+%include "arm64/unused.S"

diff --git a/runtime/interpreter/mterp/arm64/op_unused_f8.S b/runtime/interpreter/mterp/arm64/op_unused_f8.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_f8.S

@@ -0,0 +1 @@
+%include "arm64/unused.S"

diff --git a/runtime/interpreter/mterp/arm64/op_unused_f9.S b/runtime/interpreter/mterp/arm64/op_unused_f9.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_f9.S

@@ -0,0 +1 @@
+%include "arm64/unused.S"

diff --git a/runtime/interpreter/mterp/arm64/op_unused_fa.S b/runtime/interpreter/mterp/arm64/op_unused_fa.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_fa.S

@@ -0,0 +1 @@
+%include "arm64/unused.S"

diff --git a/runtime/interpreter/mterp/arm64/op_unused_fb.S b/runtime/interpreter/mterp/arm64/op_unused_fb.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_fb.S

@@ -0,0 +1 @@
+%include "arm64/unused.S"

diff --git a/runtime/interpreter/mterp/arm64/op_unused_fc.S b/runtime/interpreter/mterp/arm64/op_unused_fc.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_fc.S

@@ -0,0 +1 @@
+%include "arm64/unused.S"

diff --git a/runtime/interpreter/mterp/arm64/op_unused_fd.S b/runtime/interpreter/mterp/arm64/op_unused_fd.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_fd.S

@@ -0,0 +1 @@
+%include "arm64/unused.S"

diff --git a/runtime/interpreter/mterp/arm64/op_unused_fe.S b/runtime/interpreter/mterp/arm64/op_unused_fe.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_fe.S

@@ -0,0 +1 @@
+%include "arm64/unused.S"

diff --git a/runtime/interpreter/mterp/arm64/op_unused_ff.S b/runtime/interpreter/mterp/arm64/op_unused_ff.S
new file mode 100644
index 0000000..204ecef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_unused_ff.S

@@ -0,0 +1 @@
+%include "arm64/unused.S"

diff --git a/runtime/interpreter/mterp/arm64/op_ushr_int.S b/runtime/interpreter/mterp/arm64/op_ushr_int.S
new file mode 100644
index 0000000..bb8382b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_ushr_int.S

@@ -0,0 +1 @@
+%include "arm64/binop.S" {"preinstr":"and     w1, w1, #31", "instr":"lsr     w0, w0, w1"}

diff --git a/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S b/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S
new file mode 100644
index 0000000..dbccb99
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S

@@ -0,0 +1 @@
+%include "arm64/binop2addr.S" {"preinstr":"and     w1, w1, #31", "instr":"lsr     w0, w0, w1"}

diff --git a/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S b/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S
new file mode 100644
index 0000000..35090c4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S

@@ -0,0 +1 @@
+%include "arm64/binopLit8.S" {"preinstr":"and     w1, w1, #31", "instr":"lsr     w0, w0, w1"}

diff --git a/runtime/interpreter/mterp/arm64/op_ushr_long.S b/runtime/interpreter/mterp/arm64/op_ushr_long.S
new file mode 100644
index 0000000..e13c86a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_ushr_long.S

@@ -0,0 +1 @@
+%include "arm64/shiftWide.S" {"opcode":"lsr"}

diff --git a/runtime/interpreter/mterp/arm64/op_ushr_long_2addr.S b/runtime/interpreter/mterp/arm64/op_ushr_long_2addr.S
new file mode 100644
index 0000000..67ec91e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_ushr_long_2addr.S

@@ -0,0 +1 @@
+%include "arm64/shiftWide2addr.S" {"opcode":"lsr"}

diff --git a/runtime/interpreter/mterp/arm64/op_xor_int.S b/runtime/interpreter/mterp/arm64/op_xor_int.S
new file mode 100644
index 0000000..7483663
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_xor_int.S

@@ -0,0 +1 @@
+%include "arm64/binop.S" {"instr":"eor     w0, w0, w1"}

diff --git a/runtime/interpreter/mterp/arm64/op_xor_int_2addr.S b/runtime/interpreter/mterp/arm64/op_xor_int_2addr.S
new file mode 100644
index 0000000..2f9a2c7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_xor_int_2addr.S

@@ -0,0 +1 @@
+%include "arm64/binop2addr.S" {"instr":"eor     w0, w0, w1"}

diff --git a/runtime/interpreter/mterp/arm64/op_xor_int_lit16.S b/runtime/interpreter/mterp/arm64/op_xor_int_lit16.S
new file mode 100644
index 0000000..6b72c56
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_xor_int_lit16.S

@@ -0,0 +1 @@
+%include "arm64/binopLit16.S" {"instr":"eor     w0, w0, w1"}

diff --git a/runtime/interpreter/mterp/arm64/op_xor_int_lit8.S b/runtime/interpreter/mterp/arm64/op_xor_int_lit8.S
new file mode 100644
index 0000000..6d187b5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_xor_int_lit8.S

@@ -0,0 +1 @@
+%include "arm64/binopLit8.S" {"instr":"eor     w0, w0, w1"}

diff --git a/runtime/interpreter/mterp/arm64/op_xor_long.S b/runtime/interpreter/mterp/arm64/op_xor_long.S
new file mode 100644
index 0000000..3880d5d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_xor_long.S

@@ -0,0 +1 @@
+%include "arm64/binopWide.S" {"instr":"eor x0, x1, x2"}

diff --git a/runtime/interpreter/mterp/arm64/op_xor_long_2addr.S b/runtime/interpreter/mterp/arm64/op_xor_long_2addr.S
new file mode 100644
index 0000000..3690552
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/op_xor_long_2addr.S

@@ -0,0 +1 @@
+%include "arm64/binopWide2addr.S" {"instr":"eor     x0, x0, x1"}

diff --git a/runtime/interpreter/mterp/arm64/shiftWide.S b/runtime/interpreter/mterp/arm64/shiftWide.S
new file mode 100644
index 0000000..6306fca
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/shiftWide.S

@@ -0,0 +1,20 @@
+%default {"opcode":"shl"}
+    /*
+     * 64-bit shift operation.
+     *
+     * For: shl-long, shr-long, ushr-long
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr      w3, wINST, #8               // w3<- AA
+    lsr      w2, w0, #8                  // w2<- CC
+    GET_VREG w2, w2                     // w2<- vCC (shift count)
+    and      w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x1, w1                // x1<- vBB
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    and      x2, x2, #63                 // Mask low 6
+    $opcode  x0, x1, x2                 // Do the shift.
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w3                // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */

diff --git a/runtime/interpreter/mterp/arm64/shiftWide2addr.S b/runtime/interpreter/mterp/arm64/shiftWide2addr.S
new file mode 100644
index 0000000..77d104a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/shiftWide2addr.S

@@ -0,0 +1,16 @@
+%default {"opcode":"lsl"}
+    /*
+     * Generic 64-bit shift operation.
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG w1, w1                     // x1<- vB
+    GET_VREG_WIDE x0, w2                // x0<- vA
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    and     x1, x1, #63                 // Mask low 6 bits.
+    $opcode x0, x0, x1
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */

diff --git a/runtime/interpreter/mterp/arm64/unop.S b/runtime/interpreter/mterp/arm64/unop.S
new file mode 100644
index 0000000..474a961
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/unop.S

@@ -0,0 +1,20 @@
+%default {"preinstr":""}
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op w0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    GET_VREG w0, w3                     // w0<- vB
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    $preinstr                           // optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    $instr                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 8-9 instructions */

diff --git a/runtime/interpreter/mterp/arm64/unopWide.S b/runtime/interpreter/mterp/arm64/unopWide.S
new file mode 100644
index 0000000..109302a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/unopWide.S

@@ -0,0 +1,18 @@
+%default {"instr":"sub x0, xzr, x0", "preinstr":""}
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op x0".
+     *
+     * For: neg-long, not-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w4, wINST, #8, #4           // w4<- A
+    GET_VREG_WIDE x0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    $preinstr
+    $instr
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x0, w4
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-11 instructions */

diff --git a/runtime/interpreter/mterp/arm64/unused.S b/runtime/interpreter/mterp/arm64/unused.S
new file mode 100644
index 0000000..ffa00be
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/unused.S

@@ -0,0 +1,4 @@
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback

diff --git a/runtime/interpreter/mterp/arm64/zcmp.S b/runtime/interpreter/mterp/arm64/zcmp.S
new file mode 100644
index 0000000..e28668b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm64/zcmp.S

@@ -0,0 +1,40 @@
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+#if MTERP_SUSPEND
+    mov     w0, wINST, lsr #8           // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    cmp     w2, #0                      // compare (vA, 0)
+    mov${condition} w1, #2                 // w1<- inst branch dist for not-taken
+    adds    w1, w1, w1                  // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    lsr     w0, wINST, #8               // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    mov     w0, #2                      // Branch offset if not taken
+    cmp     w2, #0                      // compare (vA, 0)
+    csel    wINST, w1, w0, ${condition} // Branch if true, stashing result in callee save reg
+#if MTERP_PROFILE_BRANCHES
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif

diff --git a/runtime/interpreter/mterp/config_arm64 b/runtime/interpreter/mterp/config_arm64
index ef3c721..57206d2 100644
--- a/runtime/interpreter/mterp/config_arm64
+++ b/runtime/interpreter/mterp/config_arm64

@@ -1,3 +1,4 @@
+
 # Copyright (C) 2015 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -36,262 +37,262 @@
     # (override example:) op OP_SUB_FLOAT_2ADDR arm-vfp
     # (fallback example:) op OP_SUB_FLOAT_2ADDR FALLBACK
 
-    op op_nop FALLBACK
-    op op_move FALLBACK
-    op op_move_from16 FALLBACK
-    op op_move_16 FALLBACK
-    op op_move_wide FALLBACK
-    op op_move_wide_from16 FALLBACK
-    op op_move_wide_16 FALLBACK
-    op op_move_object FALLBACK
-    op op_move_object_from16 FALLBACK
-    op op_move_object_16 FALLBACK
-    op op_move_result FALLBACK
-    op op_move_result_wide FALLBACK
-    op op_move_result_object FALLBACK
-    op op_move_exception FALLBACK
-    op op_return_void FALLBACK
-    op op_return FALLBACK
-    op op_return_wide FALLBACK
-    op op_return_object FALLBACK
-    op op_const_4 FALLBACK
-    op op_const_16 FALLBACK
-    op op_const FALLBACK
-    op op_const_high16 FALLBACK
-    op op_const_wide_16 FALLBACK
-    op op_const_wide_32 FALLBACK
-    op op_const_wide FALLBACK
-    op op_const_wide_high16 FALLBACK
-    op op_const_string FALLBACK
-    op op_const_string_jumbo FALLBACK
-    op op_const_class FALLBACK
-    op op_monitor_enter FALLBACK
-    op op_monitor_exit FALLBACK
-    op op_check_cast FALLBACK
-    op op_instance_of FALLBACK
-    op op_array_length FALLBACK
-    op op_new_instance FALLBACK
-    op op_new_array FALLBACK
-    op op_filled_new_array FALLBACK
-    op op_filled_new_array_range FALLBACK
-    op op_fill_array_data FALLBACK
-    op op_throw FALLBACK
-    op op_goto FALLBACK
-    op op_goto_16 FALLBACK
-    op op_goto_32 FALLBACK
-    op op_packed_switch FALLBACK
-    op op_sparse_switch FALLBACK
-    op op_cmpl_float FALLBACK
-    op op_cmpg_float FALLBACK
-    op op_cmpl_double FALLBACK
-    op op_cmpg_double FALLBACK
-    op op_cmp_long FALLBACK
-    op op_if_eq FALLBACK
-    op op_if_ne FALLBACK
-    op op_if_lt FALLBACK
-    op op_if_ge FALLBACK
-    op op_if_gt FALLBACK
-    op op_if_le FALLBACK
-    op op_if_eqz FALLBACK
-    op op_if_nez FALLBACK
-    op op_if_ltz FALLBACK
-    op op_if_gez FALLBACK
-    op op_if_gtz FALLBACK
-    op op_if_lez FALLBACK
-    op_unused_3e FALLBACK
-    op_unused_3f FALLBACK
-    op_unused_40 FALLBACK
-    op_unused_41 FALLBACK
-    op_unused_42 FALLBACK
-    op_unused_43 FALLBACK
-    op op_aget FALLBACK
-    op op_aget_wide FALLBACK
-    op op_aget_object FALLBACK
-    op op_aget_boolean FALLBACK
-    op op_aget_byte FALLBACK
-    op op_aget_char FALLBACK
-    op op_aget_short FALLBACK
-    op op_aput FALLBACK
-    op op_aput_wide FALLBACK
-    op op_aput_object FALLBACK
-    op op_aput_boolean FALLBACK
-    op op_aput_byte FALLBACK
-    op op_aput_char FALLBACK
-    op op_aput_short FALLBACK
-    op op_iget FALLBACK
-    op op_iget_wide FALLBACK
-    op op_iget_object FALLBACK
-    op op_iget_boolean FALLBACK
-    op op_iget_byte FALLBACK
-    op op_iget_char FALLBACK
-    op op_iget_short FALLBACK
-    op op_iput FALLBACK
-    op op_iput_wide FALLBACK
-    op op_iput_object FALLBACK
-    op op_iput_boolean FALLBACK
-    op op_iput_byte FALLBACK
-    op op_iput_char FALLBACK
-    op op_iput_short FALLBACK
-    op op_sget FALLBACK
-    op op_sget_wide FALLBACK
-    op op_sget_object FALLBACK
-    op op_sget_boolean FALLBACK
-    op op_sget_byte FALLBACK
-    op op_sget_char FALLBACK
-    op op_sget_short FALLBACK
-    op op_sput FALLBACK
-    op op_sput_wide FALLBACK
-    op op_sput_object FALLBACK
-    op op_sput_boolean FALLBACK
-    op op_sput_byte FALLBACK
-    op op_sput_char FALLBACK
-    op op_sput_short FALLBACK
-    op op_invoke_virtual FALLBACK
-    op op_invoke_super FALLBACK
-    op op_invoke_direct FALLBACK
-    op op_invoke_static FALLBACK
-    op op_invoke_interface FALLBACK
-    op op_return_void_no_barrier FALLBACK
-    op op_invoke_virtual_range FALLBACK
-    op op_invoke_super_range FALLBACK
-    op op_invoke_direct_range FALLBACK
-    op op_invoke_static_range FALLBACK
-    op op_invoke_interface_range FALLBACK
-    op_unused_79 FALLBACK
-    op_unused_7a FALLBACK
-    op op_neg_int FALLBACK
-    op op_not_int FALLBACK
-    op op_neg_long FALLBACK
-    op op_not_long FALLBACK
-    op op_neg_float FALLBACK
-    op op_neg_double FALLBACK
-    op op_int_to_long FALLBACK
-    op op_int_to_float FALLBACK
-    op op_int_to_double FALLBACK
-    op op_long_to_int FALLBACK
-    op op_long_to_float FALLBACK
-    op op_long_to_double FALLBACK
-    op op_float_to_int FALLBACK
-    op op_float_to_long FALLBACK
-    op op_float_to_double FALLBACK
-    op op_double_to_int FALLBACK
-    op op_double_to_long FALLBACK
-    op op_double_to_float FALLBACK
-    op op_int_to_byte FALLBACK
-    op op_int_to_char FALLBACK
-    op op_int_to_short FALLBACK
-    op op_add_int FALLBACK
-    op op_sub_int FALLBACK
-    op op_mul_int FALLBACK
-    op op_div_int FALLBACK
-    op op_rem_int FALLBACK
-    op op_and_int FALLBACK
-    op op_or_int FALLBACK
-    op op_xor_int FALLBACK
-    op op_shl_int FALLBACK
-    op op_shr_int FALLBACK
-    op op_ushr_int FALLBACK
-    op op_add_long FALLBACK
-    op op_sub_long FALLBACK
-    op op_mul_long FALLBACK
-    op op_div_long FALLBACK
-    op op_rem_long FALLBACK
-    op op_and_long FALLBACK
-    op op_or_long FALLBACK
-    op op_xor_long FALLBACK
-    op op_shl_long FALLBACK
-    op op_shr_long FALLBACK
-    op op_ushr_long FALLBACK
-    op op_add_float FALLBACK
-    op op_sub_float FALLBACK
-    op op_mul_float FALLBACK
-    op op_div_float FALLBACK
-    op op_rem_float FALLBACK
-    op op_add_double FALLBACK
-    op op_sub_double FALLBACK
-    op op_mul_double FALLBACK
-    op op_div_double FALLBACK
-    op op_rem_double FALLBACK
-    op op_add_int_2addr FALLBACK
-    op op_sub_int_2addr FALLBACK
-    op op_mul_int_2addr FALLBACK
-    op op_div_int_2addr FALLBACK
-    op op_rem_int_2addr FALLBACK
-    op op_and_int_2addr FALLBACK
-    op op_or_int_2addr FALLBACK
-    op op_xor_int_2addr FALLBACK
-    op op_shl_int_2addr FALLBACK
-    op op_shr_int_2addr FALLBACK
-    op op_ushr_int_2addr FALLBACK
-    op op_add_long_2addr FALLBACK
-    op op_sub_long_2addr FALLBACK
-    op op_mul_long_2addr FALLBACK
-    op op_div_long_2addr FALLBACK
-    op op_rem_long_2addr FALLBACK
-    op op_and_long_2addr FALLBACK
-    op op_or_long_2addr FALLBACK
-    op op_xor_long_2addr FALLBACK
-    op op_shl_long_2addr FALLBACK
-    op op_shr_long_2addr FALLBACK
-    op op_ushr_long_2addr FALLBACK
-    op op_add_float_2addr FALLBACK
-    op op_sub_float_2addr FALLBACK
-    op op_mul_float_2addr FALLBACK
-    op op_div_float_2addr FALLBACK
-    op op_rem_float_2addr FALLBACK
-    op op_add_double_2addr FALLBACK
-    op op_sub_double_2addr FALLBACK
-    op op_mul_double_2addr FALLBACK
-    op op_div_double_2addr FALLBACK
-    op op_rem_double_2addr FALLBACK
-    op op_add_int_lit16 FALLBACK
-    op op_rsub_int FALLBACK
-    op op_mul_int_lit16 FALLBACK
-    op op_div_int_lit16 FALLBACK
-    op op_rem_int_lit16 FALLBACK
-    op op_and_int_lit16 FALLBACK
-    op op_or_int_lit16 FALLBACK
-    op op_xor_int_lit16 FALLBACK
-    op op_add_int_lit8 FALLBACK
-    op op_rsub_int_lit8 FALLBACK
-    op op_mul_int_lit8 FALLBACK
-    op op_div_int_lit8 FALLBACK
-    op op_rem_int_lit8 FALLBACK
-    op op_and_int_lit8 FALLBACK
-    op op_or_int_lit8 FALLBACK
-    op op_xor_int_lit8 FALLBACK
-    op op_shl_int_lit8 FALLBACK
-    op op_shr_int_lit8 FALLBACK
-    op op_ushr_int_lit8 FALLBACK
-    op op_iget_quick FALLBACK
-    op op_iget_wide_quick FALLBACK
-    op op_iget_object_quick FALLBACK
-    op op_iput_quick FALLBACK
-    op op_iput_wide_quick FALLBACK
-    op op_iput_object_quick FALLBACK
-    op op_invoke_virtual_quick FALLBACK
-    op op_invoke_virtual_range_quick FALLBACK
-    op op_iput_boolean_quick FALLBACK
-    op op_iput_byte_quick FALLBACK
-    op op_iput_char_quick FALLBACK
-    op op_iput_short_quick FALLBACK
-    op op_iget_boolean_quick FALLBACK
-    op op_iget_byte_quick FALLBACK
-    op op_iget_char_quick FALLBACK
-    op op_iget_short_quick FALLBACK
-    op_unused_f3 FALLBACK
-    op_unused_f4 FALLBACK
-    op_unused_f5 FALLBACK
-    op_unused_f6 FALLBACK
-    op_unused_f7 FALLBACK
-    op_unused_f8 FALLBACK
-    op_unused_f9 FALLBACK
-    op_unused_fa FALLBACK
-    op_unused_fb FALLBACK
-    op_unused_fc FALLBACK
-    op_unused_fd FALLBACK
-    op_unused_fe FALLBACK
-    op_unused_ff FALLBACK
+    # op op_nop FALLBACK
+    # op op_move FALLBACK
+    # op op_move_from16 FALLBACK
+    # op op_move_16 FALLBACK
+    # op op_move_wide FALLBACK
+    # op op_move_wide_from16 FALLBACK
+    # op op_move_wide_16 FALLBACK
+    # op op_move_object FALLBACK
+    # op op_move_object_from16 FALLBACK
+    # op op_move_object_16 FALLBACK
+    # op op_move_result FALLBACK
+    # op op_move_result_wide FALLBACK
+    # op op_move_result_object FALLBACK
+    # op op_move_exception FALLBACK
+    # op op_return_void FALLBACK
+    # op op_return FALLBACK
+    # op op_return_wide FALLBACK
+    # op op_return_object FALLBACK
+    # op op_const_4 FALLBACK
+    # op op_const_16 FALLBACK
+    # op op_const FALLBACK
+    # op op_const_high16 FALLBACK
+    # op op_const_wide_16 FALLBACK
+    # op op_const_wide_32 FALLBACK
+    # op op_const_wide FALLBACK
+    # op op_const_wide_high16 FALLBACK
+    # op op_const_string FALLBACK
+    # op op_const_string_jumbo FALLBACK
+    # op op_const_class FALLBACK
+    # op op_monitor_enter FALLBACK
+    # op op_monitor_exit FALLBACK
+    # op op_check_cast FALLBACK
+    # op op_instance_of FALLBACK
+    # op op_array_length FALLBACK
+    # op op_new_instance FALLBACK
+    # op op_new_array FALLBACK
+    # op op_filled_new_array FALLBACK
+    # op op_filled_new_array_range FALLBACK
+    # op op_fill_array_data FALLBACK
+    # op op_throw FALLBACK
+    # op op_goto FALLBACK
+    # op op_goto_16 FALLBACK
+    # op op_goto_32 FALLBACK
+    # op op_packed_switch FALLBACK
+    # op op_sparse_switch FALLBACK
+    # op op_cmpl_float FALLBACK
+    # op op_cmpg_float FALLBACK
+    # op op_cmpl_double FALLBACK
+    # op op_cmpg_double FALLBACK
+    # op op_cmp_long FALLBACK
+    # op op_if_eq FALLBACK
+    # op op_if_ne FALLBACK
+    # op op_if_lt FALLBACK
+    # op op_if_ge FALLBACK
+    # op op_if_gt FALLBACK
+    # op op_if_le FALLBACK
+    # op op_if_eqz FALLBACK
+    # op op_if_nez FALLBACK
+    # op op_if_ltz FALLBACK
+    # op op_if_gez FALLBACK
+    # op op_if_gtz FALLBACK
+    # op op_if_lez FALLBACK
+    # op op_unused_3e FALLBACK
+    # op op_unused_3f FALLBACK
+    # op op_unused_40 FALLBACK
+    # op op_unused_41 FALLBACK
+    # op op_unused_42 FALLBACK
+    # op op_unused_43 FALLBACK
+    # op op_aget FALLBACK
+    # op op_aget_wide FALLBACK
+    # op op_aget_object FALLBACK
+    # op op_aget_boolean FALLBACK
+    # op op_aget_byte FALLBACK
+    # op op_aget_char FALLBACK
+    # op op_aget_short FALLBACK
+    # op op_aput FALLBACK
+    # op op_aput_wide FALLBACK
+    # op op_aput_object FALLBACK
+    # op op_aput_boolean FALLBACK
+    # op op_aput_byte FALLBACK
+    # op op_aput_char FALLBACK
+    # op op_aput_short FALLBACK
+    # op op_iget FALLBACK
+    # op op_iget_wide FALLBACK
+    # op op_iget_object FALLBACK
+    # op op_iget_boolean FALLBACK
+    # op op_iget_byte FALLBACK
+    # op op_iget_char FALLBACK
+    # op op_iget_short FALLBACK
+    # op op_iput FALLBACK
+    # op op_iput_wide FALLBACK
+    # op op_iput_object FALLBACK
+    # op op_iput_boolean FALLBACK
+    # op op_iput_byte FALLBACK
+    # op op_iput_char FALLBACK
+    # op op_iput_short FALLBACK
+    # op op_sget FALLBACK
+    # op op_sget_wide FALLBACK
+    # op op_sget_object FALLBACK
+    # op op_sget_boolean FALLBACK
+    # op op_sget_byte FALLBACK
+    # op op_sget_char FALLBACK
+    # op op_sget_short FALLBACK
+    # op op_sput FALLBACK
+    # op op_sput_wide FALLBACK
+    # op op_sput_object FALLBACK
+    # op op_sput_boolean FALLBACK
+    # op op_sput_byte FALLBACK
+    # op op_sput_char FALLBACK
+    # op op_sput_short FALLBACK
+    # op op_invoke_virtual FALLBACK
+    # op op_invoke_super FALLBACK
+    # op op_invoke_direct FALLBACK
+    # op op_invoke_static FALLBACK
+    # op op_invoke_interface FALLBACK
+    # op op_return_void_no_barrier FALLBACK
+    # op op_invoke_virtual_range FALLBACK
+    # op op_invoke_super_range FALLBACK
+    # op op_invoke_direct_range FALLBACK
+    # op op_invoke_static_range FALLBACK
+    # op op_invoke_interface_range FALLBACK
+    # op op_unused_79 FALLBACK
+    # op op_unused_7a FALLBACK
+    # op op_neg_int FALLBACK
+    # op op_not_int FALLBACK
+    # op op_neg_long FALLBACK
+    # op op_not_long FALLBACK
+    # op op_neg_float FALLBACK
+    # op op_neg_double FALLBACK
+    # op op_int_to_long FALLBACK
+    # op op_int_to_float FALLBACK
+    # op op_int_to_double FALLBACK
+    # op op_long_to_int FALLBACK
+    # op op_long_to_float FALLBACK
+    # op op_long_to_double FALLBACK
+    # op op_float_to_int FALLBACK
+    # op op_float_to_long FALLBACK
+    # op op_float_to_double FALLBACK
+    # op op_double_to_int FALLBACK
+    # op op_double_to_long FALLBACK
+    # op op_double_to_float FALLBACK
+    # op op_int_to_byte FALLBACK
+    # op op_int_to_char FALLBACK
+    # op op_int_to_short FALLBACK
+    # op op_add_int FALLBACK
+    # op op_sub_int FALLBACK
+    # op op_mul_int FALLBACK
+    # op op_div_int FALLBACK
+    # op op_rem_int FALLBACK
+    # op op_and_int FALLBACK
+    # op op_or_int FALLBACK
+    # op op_xor_int FALLBACK
+    # op op_shl_int FALLBACK
+    # op op_shr_int FALLBACK
+    # op op_ushr_int FALLBACK
+    # op op_add_long FALLBACK
+    # op op_sub_long FALLBACK
+    # op op_mul_long FALLBACK
+    # op op_div_long FALLBACK
+    # op op_rem_long FALLBACK
+    # op op_and_long FALLBACK
+    # op op_or_long FALLBACK
+    # op op_xor_long FALLBACK
+    # op op_shl_long FALLBACK
+    # op op_shr_long FALLBACK
+    # op op_ushr_long FALLBACK
+    # op op_add_float FALLBACK
+    # op op_sub_float FALLBACK
+    # op op_mul_float FALLBACK
+    # op op_div_float FALLBACK
+    # op op_rem_float FALLBACK
+    # op op_add_double FALLBACK
+    # op op_sub_double FALLBACK
+    # op op_mul_double FALLBACK
+    # op op_div_double FALLBACK
+    # op op_rem_double FALLBACK
+    # op op_add_int_2addr FALLBACK
+    # op op_sub_int_2addr FALLBACK
+    # op op_mul_int_2addr FALLBACK
+    # op op_div_int_2addr FALLBACK
+    # op op_rem_int_2addr FALLBACK
+    # op op_and_int_2addr FALLBACK
+    # op op_or_int_2addr FALLBACK
+    # op op_xor_int_2addr FALLBACK
+    # op op_shl_int_2addr FALLBACK
+    # op op_shr_int_2addr FALLBACK
+    # op op_ushr_int_2addr FALLBACK
+    # op op_add_long_2addr FALLBACK
+    # op op_sub_long_2addr FALLBACK
+    # op op_mul_long_2addr FALLBACK
+    # op op_div_long_2addr FALLBACK
+    # op op_rem_long_2addr FALLBACK
+    # op op_and_long_2addr FALLBACK
+    # op op_or_long_2addr FALLBACK
+    # op op_xor_long_2addr FALLBACK
+    # op op_shl_long_2addr FALLBACK
+    # op op_shr_long_2addr FALLBACK
+    # op op_ushr_long_2addr FALLBACK
+    # op op_add_float_2addr FALLBACK
+    # op op_sub_float_2addr FALLBACK
+    # op op_mul_float_2addr FALLBACK
+    # op op_div_float_2addr FALLBACK
+    # op op_rem_float_2addr FALLBACK
+    # op op_add_double_2addr FALLBACK
+    # op op_sub_double_2addr FALLBACK
+    # op op_mul_double_2addr FALLBACK
+    # op op_div_double_2addr FALLBACK
+    # op op_rem_double_2addr FALLBACK
+    # op op_add_int_lit16 FALLBACK
+    # op op_rsub_int FALLBACK
+    # op op_mul_int_lit16 FALLBACK
+    # op op_div_int_lit16 FALLBACK
+    # op op_rem_int_lit16 FALLBACK
+    # op op_and_int_lit16 FALLBACK
+    # op op_or_int_lit16 FALLBACK
+    # op op_xor_int_lit16 FALLBACK
+    # op op_add_int_lit8 FALLBACK
+    # op op_rsub_int_lit8 FALLBACK
+    # op op_mul_int_lit8 FALLBACK
+    # op op_div_int_lit8 FALLBACK
+    # op op_rem_int_lit8 FALLBACK
+    # op op_and_int_lit8 FALLBACK
+    # op op_or_int_lit8 FALLBACK
+    # op op_xor_int_lit8 FALLBACK
+    # op op_shl_int_lit8 FALLBACK
+    # op op_shr_int_lit8 FALLBACK
+    # op op_ushr_int_lit8 FALLBACK
+    # op op_iget_quick FALLBACK
+    # op op_iget_wide_quick FALLBACK
+    # op op_iget_object_quick FALLBACK
+    # op op_iput_quick FALLBACK
+    # op op_iput_wide_quick FALLBACK
+    # op op_iput_object_quick FALLBACK
+    # op op_invoke_virtual_quick FALLBACK
+    # op op_invoke_virtual_range_quick FALLBACK
+    # op op_iput_boolean_quick FALLBACK
+    # op op_iput_byte_quick FALLBACK
+    # op op_iput_char_quick FALLBACK
+    # op op_iput_short_quick FALLBACK
+    # op op_iget_boolean_quick FALLBACK
+    # op op_iget_byte_quick FALLBACK
+    # op op_iget_char_quick FALLBACK
+    # op op_iget_short_quick FALLBACK
+    op op_invoke_lambda FALLBACK
+    # op op_unused_f4 FALLBACK
+    op op_capture_variable FALLBACK
+    op op_create_lambda FALLBACK
+    op op_liberate_variable FALLBACK
+    op op_box_lambda FALLBACK
+    op op_unbox_lambda FALLBACK
+    # op op_unused_fa FALLBACK
+    # op op_unused_fb FALLBACK
+    # op op_unused_fc FALLBACK
+    # op op_unused_fd FALLBACK
+    # op op_unused_fe FALLBACK
+    # op op_unused_ff FALLBACK
 op-end
 
 # common subroutines for asm

diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index 0afd276..3e2a222 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc

@@ -488,6 +488,14 @@
             << self->IsExceptionPending();
 }
 
+extern "C" void MterpLogOSR(Thread* self, ShadowFrame* shadow_frame, int32_t offset)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  UNUSED(self);
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  uint16_t inst_data = inst->Fetch16(0);
+  LOG(INFO) << "OSR: " << inst->Opcode(inst_data) << ", offset = " << offset;
+}
+
 extern "C" void MterpLogSuspendFallback(Thread* self, ShadowFrame* shadow_frame, uint32_t flags)
   SHARED_REQUIRES(Locks::mutator_lock_) {
   UNUSED(self);
@@ -618,5 +626,14 @@
   return obj->GetFieldObject<mirror::Object>(MemberOffset(field_offset));
 }
 
+extern "C" bool  MterpProfileBranch(Thread* self, ShadowFrame* shadow_frame, int32_t offset)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (offset <= 0) {
+    const auto* const instrumentation = Runtime::Current()->GetInstrumentation();
+    instrumentation->Branch(self, shadow_frame->GetMethod(), shadow_frame->GetDexPC(), offset);
+  }
+  return false;  // TDB - return true if need to trigger on-stack replacement.
+}
+
 }  // namespace interpreter
 }  // namespace art

diff --git a/runtime/interpreter/mterp/out/mterp_arm.S b/runtime/interpreter/mterp/out/mterp_arm.S
index 78c784b..9091b6f 100644
--- a/runtime/interpreter/mterp/out/mterp_arm.S
+++ b/runtime/interpreter/mterp/out/mterp_arm.S

@@ -92,6 +92,8 @@
  */
 #include "asm_support.h"
 
+#define MTERP_PROFILE_BRANCHES 1
+
 /* During bringup, we'll use the shadow frame model instead of rFP */
 /* single-purpose registers, given names for clarity */
 #define rPC     r4
@@ -1100,10 +1102,18 @@
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     mov     r0, rINST, lsl #16          @ r0<- AAxx0000
-    movs    r1, r0, asr #24             @ r1<- ssssssAA (sign-extended)
-    add     r2, r1, r1                  @ r2<- byte offset, set flags
+    movs    rINST, r0, asr #24          @ rINST<- ssssssAA (sign-extended)
+#if MTERP_PROFILE_BRANCHES
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    add     r2, rINST, rINST            @ r2<- byte offset, set flags
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
        @ If backwards branch refresh rIBASE
     bmi     MterpCheckSuspendAndContinue
@@ -1130,9 +1140,17 @@
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
-    FETCH_S r0, 1                       @ r0<- ssssAAAA (sign-extended)
+    FETCH_S rINST, 1                    @ rINST<- ssssAAAA (sign-extended)
+#if MTERP_PROFILE_BRANCHES
+    mov     r0, rINST
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpOnStackReplacement     @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, r0, r0                  @ r1<- byte offset, flags set
+    adds    r1, rINST, rINST            @ r1<- byte offset, flags set
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1167,9 +1185,17 @@
 #else
     FETCH r0, 1                         @ r0<- aaaa (lo)
     FETCH r1, 2                         @ r1<- AAAA (hi)
+    orr     rINST, r0, r1, lsl #16      @ rINST<- AAAAaaaa
+#if MTERP_PROFILE_BRANCHES
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpOnStackReplacement     @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    orr     r0, r0, r1, lsl #16         @ r0<- AAAAaaaa
-    adds    r1, r0, r0                  @ r1<- byte offset
+    adds    r1, rINST, rINST            @ r1<- byte offset
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1211,8 +1237,17 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      MterpDoPackedSwitch                       @ r0<- code-unit branch offset
+    mov     rINST, r0
+#if MTERP_PROFILE_BRANCHES
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, r0, r0                  @ r1<- byte offset; clear V
+    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1255,8 +1290,17 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      MterpDoSparseSwitch                       @ r0<- code-unit branch offset
+    mov     rINST, r0
+#if MTERP_PROFILE_BRANCHES
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, r0, r0                  @ r1<- byte offset; clear V
+    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1493,10 +1537,18 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movne r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    movne rINST, #2              @ rINST<- BYTE branch dist for not-taken
+#if MTERP_PROFILE_BRANCHES
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1536,10 +1588,18 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    moveq r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    moveq rINST, #2              @ rINST<- BYTE branch dist for not-taken
+#if MTERP_PROFILE_BRANCHES
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1579,10 +1639,18 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movge r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    movge rINST, #2              @ rINST<- BYTE branch dist for not-taken
+#if MTERP_PROFILE_BRANCHES
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1622,10 +1690,18 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movlt r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    movlt rINST, #2              @ rINST<- BYTE branch dist for not-taken
+#if MTERP_PROFILE_BRANCHES
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1665,10 +1741,18 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movle r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    movle rINST, #2              @ rINST<- BYTE branch dist for not-taken
+#if MTERP_PROFILE_BRANCHES
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1708,10 +1792,18 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movgt r1, #2                 @ r1<- BYTE branch dist for not-taken
-    adds    r2, r1, r1                  @ convert to bytes, check sign
+    movgt rINST, #2              @ rINST<- BYTE branch dist for not-taken
+#if MTERP_PROFILE_BRANCHES
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1746,11 +1838,19 @@
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movne r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    movne rINST, #2              @ rINST<- inst branch dist for not-taken
+#if MTERP_PROFILE_BRANCHES
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1785,11 +1885,19 @@
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    moveq r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    moveq rINST, #2              @ rINST<- inst branch dist for not-taken
+#if MTERP_PROFILE_BRANCHES
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1824,11 +1932,19 @@
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movge r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    movge rINST, #2              @ rINST<- inst branch dist for not-taken
+#if MTERP_PROFILE_BRANCHES
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1863,11 +1979,19 @@
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movlt r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    movlt rINST, #2              @ rINST<- inst branch dist for not-taken
+#if MTERP_PROFILE_BRANCHES
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1902,11 +2026,19 @@
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movle r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    movle rINST, #2              @ rINST<- inst branch dist for not-taken
+#if MTERP_PROFILE_BRANCHES
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1941,11 +2073,19 @@
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movgt r1, #2                 @ r1<- inst branch dist for not-taken
-    adds    r1, r1, r1                  @ convert to bytes & set flags
+    movgt rINST, #2              @ rINST<- inst branch dist for not-taken
+#if MTERP_PROFILE_BRANCHES
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -12177,6 +12317,18 @@
     GOTO_OPCODE ip                      @ jump to next instruction
 
 /*
+ * On-stack replacement pending.
+ * Branch offset in rINST on entry.
+ */
+MterpOnStackReplacement:
+#if MTERP_LOGGING
+    mov r0, rSELF
+    add r1, rFP, #OFF_FP_SHADOWFRAME
+    mov r2, rINST
+    bl MterpLogOSR
+#endif
+    b MterpFallback                     @ Let the reference interpreter deal with it.
+/*
  * Bail out to reference interpreter.
  */
 MterpFallback:

diff --git a/runtime/interpreter/mterp/out/mterp_arm64.S b/runtime/interpreter/mterp/out/mterp_arm64.S
new file mode 100644
index 0000000..220041f
--- /dev/null
+++ b/runtime/interpreter/mterp/out/mterp_arm64.S

@@ -0,0 +1,11862 @@
+/*
+ * This file was generated automatically by gen-mterp.py for 'arm64'.
+ *
+ * --> DO NOT EDIT <--
+ */
+
+/* File: arm64/header.S */
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+  Art assembly interpreter notes:
+
+  First validate assembly code by implementing ExecuteXXXImpl() style body (doesn't
+  handle invoke, allows higher-level code to create frame & shadow frame.
+
+  Once that's working, support direct entry code & eliminate shadow frame (and
+  excess locals allocation.
+
+  Some (hopefully) temporary ugliness.  We'll treat xFP as pointing to the
+  base of the vreg array within the shadow frame.  Access the other fields,
+  dex_pc_, method_ and number_of_vregs_ via negative offsets.  For now, we'll continue
+  the shadow frame mechanism of double-storing object references - via xFP &
+  number_of_vregs_.
+
+ */
+
+/*
+ARM64 Runtime register usage conventions.
+
+  r0     : w0 is 32-bit return register and x0 is 64-bit.
+  r0-r7  : Argument registers.
+  r8-r15 : Caller save registers (used as temporary registers).
+  r16-r17: Also known as ip0-ip1, respectively. Used as scratch registers by
+           the linker, by the trampolines and other stubs (the backend uses
+           these as temporary registers).
+  r18    : Caller save register (used as temporary register).
+  r19    : Pointer to thread-local storage.
+  r20-r29: Callee save registers.
+  r30    : (lr) is reserved (the link register).
+  rsp    : (sp) is reserved (the stack pointer).
+  rzr    : (zr) is reserved (the zero register).
+
+  Floating-point registers
+  v0-v31
+
+  v0     : s0 is return register for singles (32-bit) and d0 for doubles (64-bit).
+           This is analogous to the C/C++ (hard-float) calling convention.
+  v0-v7  : Floating-point argument registers in both Dalvik and C/C++ conventions.
+           Also used as temporary and codegen scratch registers.
+
+  v0-v7 and v16-v31 : trashed across C calls.
+  v8-v15 : bottom 64-bits preserved across C calls (d8-d15 are preserved).
+
+  v16-v31: Used as codegen temp/scratch.
+  v8-v15 : Can be used for promotion.
+
+  Must maintain 16-byte stack alignment.
+
+Mterp notes:
+
+The following registers have fixed assignments:
+
+  reg nick      purpose
+  x20  xPC       interpreted program counter, used for fetching instructions
+  x21  xFP       interpreted frame pointer, used for accessing locals and args
+  x22  xSELF     self (Thread) pointer
+  x23  xINST     first 16-bit code unit of current instruction
+  x24  xIBASE    interpreted instruction base pointer, used for computed goto
+  x25  xREFS     base of object references in shadow frame  (ideally, we'll get rid of this later).
+  x16  ip        scratch reg
+  x17  ip2       scratch reg (used by macros)
+
+Macros are provided for common operations.  They MUST NOT alter unspecified registers or condition
+codes.
+*/
+
+/*
+ * This is a #include, not a %include, because we want the C pre-processor
+ * to expand the macros into assembler assignment statements.
+ */
+#include "asm_support.h"
+
+#define MTERP_PROFILE_BRANCHES 1
+
+/* During bringup, we'll use the shadow frame model instead of xFP */
+/* single-purpose registers, given names for clarity */
+#define xPC     x20
+#define xFP     x21
+#define xSELF   x22
+#define xINST   x23
+#define wINST   w23
+#define xIBASE  x24
+#define xREFS   x25
+#define ip      x16
+#define ip2     x17
+
+/*
+ * Instead of holding a pointer to the shadow frame, we keep xFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+
+/*
+ *
+ * The reference interpreter performs explicit suspect checks, which is somewhat wasteful.
+ * Dalvik's interpreter folded suspend checks into the jump table mechanism, and eventually
+ * mterp should do so as well.
+ */
+#define MTERP_SUSPEND 0
+
+/*
+ * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
+ * be done *before* something throws.
+ *
+ * It's okay to do this more than once.
+ *
+ * NOTE: the fast interpreter keeps track of dex pc as a direct pointer to the mapped
+ * dex byte codes.  However, the rest of the runtime expects dex pc to be an instruction
+ * offset into the code_items_[] array.  For effiency, we will "export" the
+ * current dex pc as a direct pointer using the EXPORT_PC macro, and rely on GetDexPC
+ * to convert to a dex pc when needed.
+ */
+.macro EXPORT_PC
+    str  xPC, [xFP, #OFF_FP_DEX_PC_PTR]
+.endm
+
+/*
+ * Fetch the next instruction from xPC into wINST.  Does not advance xPC.
+ */
+.macro FETCH_INST
+    ldrh    wINST, [xPC]
+.endm
+
+/*
+ * Fetch the next instruction from the specified offset.  Advances xPC
+ * to point to the next instruction.  "_count" is in 16-bit code units.
+ *
+ * Because of the limited size of immediate constants on ARM, this is only
+ * suitable for small forward movements (i.e. don't try to implement "goto"
+ * with this).
+ *
+ * This must come AFTER anything that can throw an exception, or the
+ * exception catch may miss.  (This also implies that it must come after
+ * EXPORT_PC.)
+ */
+.macro FETCH_ADVANCE_INST count
+    ldrh    wINST, [xPC, #((\count)*2)]!
+.endm
+
+/*
+ * The operation performed here is similar to FETCH_ADVANCE_INST, except the
+ * src and dest registers are parameterized (not hard-wired to xPC and xINST).
+ */
+.macro PREFETCH_ADVANCE_INST dreg, sreg, count
+    ldrh    \dreg, [\sreg, #((\count)*2)]!
+.endm
+
+/*
+ * Similar to FETCH_ADVANCE_INST, but does not update xPC.  Used to load
+ * xINST ahead of possible exception point.  Be sure to manually advance xPC
+ * later.
+ */
+.macro PREFETCH_INST count
+    ldrh    wINST, [xPC, #((\count)*2)]
+.endm
+
+/* Advance xPC by some number of code units. */
+.macro ADVANCE count
+  add  xPC, xPC, #((\count)*2)
+.endm
+
+/*
+ * Fetch the next instruction from an offset specified by _reg and advance xPC.
+ * xPC to point to the next instruction.  "_reg" must specify the distance
+ * in bytes, *not* 16-bit code units, and may be a signed value.  Must not set flags.
+ *
+ */
+.macro FETCH_ADVANCE_INST_RB reg
+    add     xPC, xPC, \reg, sxtw
+    ldrh    wINST, [xPC]
+.endm
+
+/*
+ * Fetch a half-word code unit from an offset past the current PC.  The
+ * "_count" value is in 16-bit code units.  Does not advance xPC.
+ *
+ * The "_S" variant works the same but treats the value as signed.
+ */
+.macro FETCH reg, count
+    ldrh    \reg, [xPC, #((\count)*2)]
+.endm
+
+.macro FETCH_S reg, count
+    ldrsh   \reg, [xPC, #((\count)*2)]
+.endm
+
+/*
+ * Fetch one byte from an offset past the current PC.  Pass in the same
+ * "_count" as you would for FETCH, and an additional 0/1 indicating which
+ * byte of the halfword you want (lo/hi).
+ */
+.macro FETCH_B reg, count, byte
+    ldrb     \reg, [xPC, #((\count)*2+(\byte))]
+.endm
+
+/*
+ * Put the instruction's opcode field into the specified register.
+ */
+.macro GET_INST_OPCODE reg
+    and     \reg, xINST, #255
+.endm
+
+/*
+ * Put the prefetched instruction's opcode field into the specified register.
+ */
+.macro GET_PREFETCHED_OPCODE oreg, ireg
+    and     \oreg, \ireg, #255
+.endm
+
+/*
+ * Begin executing the opcode in _reg.  Clobbers reg
+ */
+
+.macro GOTO_OPCODE reg
+    add     \reg, xIBASE, \reg, lsl #7
+    br      \reg
+.endm
+.macro GOTO_OPCODE_BASE base,reg
+    add     \reg, \base, \reg, lsl #7
+    br      \reg
+.endm
+
+/*
+ * Get/set the 32-bit value from a Dalvik register.
+ */
+.macro GET_VREG reg, vreg
+    ldr     \reg, [xFP, \vreg, uxtw #2]
+.endm
+.macro SET_VREG reg, vreg
+    str     \reg, [xFP, \vreg, uxtw #2]
+    str     wzr, [xREFS, \vreg, uxtw #2]
+.endm
+.macro SET_VREG_OBJECT reg, vreg, tmpreg
+    str     \reg, [xFP, \vreg, uxtw #2]
+    str     \reg, [xREFS, \vreg, uxtw #2]
+.endm
+
+/*
+ * Get/set the 64-bit value from a Dalvik register.
+ * TUNING: can we do better here?
+ */
+.macro GET_VREG_WIDE reg, vreg
+    add     ip2, xFP, \vreg, lsl #2
+    ldr     \reg, [ip2]
+.endm
+.macro SET_VREG_WIDE reg, vreg
+    add     ip2, xFP, \vreg, lsl #2
+    str     \reg, [ip2]
+    add     ip2, xREFS, \vreg, lsl #2
+    str     xzr, [ip2]
+.endm
+
+/*
+ * Convert a virtual register index into an address.
+ */
+.macro VREG_INDEX_TO_ADDR reg, vreg
+    add     \reg, xFP, \vreg, lsl #2   /* WARNING/FIXME: handle shadow frame vreg zero if store */
+.endm
+
+/*
+ * Refresh handler table.
+ */
+.macro REFRESH_IBASE
+  ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
+.endm
+
+/* File: arm64/entry.S */
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+    .text
+
+/*
+ * Interpreter entry point.
+ * On entry:
+ *  x0  Thread* self/
+ *  x1  code_item
+ *  x2  ShadowFrame
+ *  x3  JValue* result_register
+ *
+ */
+    .global ExecuteMterpImpl
+    .type   ExecuteMterpImpl, %function
+    .balign 16
+
+ExecuteMterpImpl:
+    .cfi_startproc
+    stp     xIBASE, xREFS, [sp, #-64]!
+    stp     xSELF, xINST, [sp, #16]
+    stp     xPC, xFP, [sp, #32]
+    stp     fp, lr, [sp, #48]
+    add     fp, sp, #48
+
+    /* Remember the return register */
+    str     x3, [x2, #SHADOWFRAME_RESULT_REGISTER_OFFSET]
+
+    /* Remember the code_item */
+    str     x1, [x2, #SHADOWFRAME_CODE_ITEM_OFFSET]
+
+    /* set up "named" registers */
+    mov     xSELF, x0
+    ldr     w0, [x2, #SHADOWFRAME_NUMBER_OF_VREGS_OFFSET]
+    add     xFP, x2, #SHADOWFRAME_VREGS_OFFSET     // point to insns[] (i.e. - the dalivk byte code).
+    add     xREFS, xFP, w0, lsl #2                 // point to reference array in shadow frame
+    ldr     w0, [x2, #SHADOWFRAME_DEX_PC_OFFSET]   // Get starting dex_pc.
+    add     xPC, x1, #CODEITEM_INSNS_OFFSET        // Point to base of insns[]
+    add     xPC, xPC, w0, lsl #1                   // Create direct pointer to 1st dex opcode
+    EXPORT_PC
+
+    /* Starting ibase */
+    ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
+
+    /* start executing the instruction at rPC */
+    FETCH_INST                          // load wINST from rPC
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* NOTE: no fallthrough */
+
+
+    .global artMterpAsmInstructionStart
+    .type   artMterpAsmInstructionStart, %function
+artMterpAsmInstructionStart = .L_op_nop
+    .text
+
+/* ------------------------------ */
+    .balign 128
+.L_op_nop: /* 0x00 */
+/* File: arm64/op_nop.S */
+    FETCH_ADVANCE_INST 1                // advance to next instr, load rINST
+    GET_INST_OPCODE ip                  // ip<- opcode from rINST
+    GOTO_OPCODE ip                      // execute it
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move: /* 0x01 */
+/* File: arm64/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    lsr     w1, wINST, #12              // x1<- B from 15:12
+    ubfx    w0, wINST, #8, #4           // x0<- A from 11:8
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    GET_VREG w2, w1                     // x2<- fp[B]
+    GET_INST_OPCODE ip                  // ip<- opcode from wINST
+    .if 0
+    SET_VREG_OBJECT w2, w0              // fp[A]<- x2
+    .else
+    SET_VREG w2, w0                     // fp[A]<- x2
+    .endif
+    GOTO_OPCODE ip                      // execute next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_from16: /* 0x02 */
+/* File: arm64/op_move_from16.S */
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    FETCH w1, 1                         // r1<- BBBB
+    lsr     w0, wINST, #8               // r0<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    GET_VREG w2, w1                     // r2<- fp[BBBB]
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    .if 0
+    SET_VREG_OBJECT w2, w0              // fp[AA]<- r2
+    .else
+    SET_VREG w2, w0                     // fp[AA]<- r2
+    .endif
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_16: /* 0x03 */
+/* File: arm64/op_move_16.S */
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    FETCH w1, 2                         // w1<- BBBB
+    FETCH w0, 1                         // w0<- AAAA
+    FETCH_ADVANCE_INST 3                // advance xPC, load xINST
+    GET_VREG w2, w1                     // w2<- fp[BBBB]
+    GET_INST_OPCODE ip                  // extract opcode from xINST
+    .if 0
+    SET_VREG_OBJECT w2, w0              // fp[AAAA]<- w2
+    .else
+    SET_VREG w2, w0                     // fp[AAAA]<- w2
+    .endif
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide: /* 0x04 */
+/* File: arm64/op_move_wide.S */
+    /* move-wide vA, vB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE  x3, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE  x3, w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide_from16: /* 0x05 */
+/* File: arm64/op_move_wide_from16.S */
+    /* move-wide/from16 vAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    FETCH w3, 1                         // w3<- BBBB
+    lsr     w2, wINST, #8               // w2<- AA
+    GET_VREG_WIDE x3, w3
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x3, w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide_16: /* 0x06 */
+/* File: arm64/op_move_wide_16.S */
+    /* move-wide/16 vAAAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    FETCH w3, 2                         // w3<- BBBB
+    FETCH w2, 1                         // w2<- AAAA
+    GET_VREG_WIDE x3, w3
+    FETCH_ADVANCE_INST 3                // advance rPC, load rINST
+    SET_VREG_WIDE x3, w2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object: /* 0x07 */
+/* File: arm64/op_move_object.S */
+/* File: arm64/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    lsr     w1, wINST, #12              // x1<- B from 15:12
+    ubfx    w0, wINST, #8, #4           // x0<- A from 11:8
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    GET_VREG w2, w1                     // x2<- fp[B]
+    GET_INST_OPCODE ip                  // ip<- opcode from wINST
+    .if 1
+    SET_VREG_OBJECT w2, w0              // fp[A]<- x2
+    .else
+    SET_VREG w2, w0                     // fp[A]<- x2
+    .endif
+    GOTO_OPCODE ip                      // execute next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object_from16: /* 0x08 */
+/* File: arm64/op_move_object_from16.S */
+/* File: arm64/op_move_from16.S */
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    FETCH w1, 1                         // r1<- BBBB
+    lsr     w0, wINST, #8               // r0<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    GET_VREG w2, w1                     // r2<- fp[BBBB]
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    .if 1
+    SET_VREG_OBJECT w2, w0              // fp[AA]<- r2
+    .else
+    SET_VREG w2, w0                     // fp[AA]<- r2
+    .endif
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object_16: /* 0x09 */
+/* File: arm64/op_move_object_16.S */
+/* File: arm64/op_move_16.S */
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    FETCH w1, 2                         // w1<- BBBB
+    FETCH w0, 1                         // w0<- AAAA
+    FETCH_ADVANCE_INST 3                // advance xPC, load xINST
+    GET_VREG w2, w1                     // w2<- fp[BBBB]
+    GET_INST_OPCODE ip                  // extract opcode from xINST
+    .if 1
+    SET_VREG_OBJECT w2, w0              // fp[AAAA]<- w2
+    .else
+    SET_VREG w2, w0                     // fp[AAAA]<- w2
+    .endif
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result: /* 0x0a */
+/* File: arm64/op_move_result.S */
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    lsr     w2, wINST, #8               // r2<- AA
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    ldr     x0, [xFP, #OFF_FP_RESULT_REGISTER]  // get pointer to result JType.
+    ldr     w0, [x0]                    // r0 <- result.i.
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    .if 0
+    SET_VREG_OBJECT w0, w2, w1          // fp[AA]<- r0
+    .else
+    SET_VREG w0, w2                     // fp[AA]<- r0
+    .endif
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result_wide: /* 0x0b */
+/* File: arm64/op_move_result_wide.S */
+    /* for: move-result-wide */
+    /* op vAA */
+    lsr     w2, wINST, #8               // r2<- AA
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    ldr     x0, [xFP, #OFF_FP_RESULT_REGISTER]  // get pointer to result JType.
+    ldr     x0, [x0]                    // r0 <- result.i.
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x0, x2                // fp[AA]<- r0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result_object: /* 0x0c */
+/* File: arm64/op_move_result_object.S */
+/* File: arm64/op_move_result.S */
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    lsr     w2, wINST, #8               // r2<- AA
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    ldr     x0, [xFP, #OFF_FP_RESULT_REGISTER]  // get pointer to result JType.
+    ldr     w0, [x0]                    // r0 <- result.i.
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    .if 1
+    SET_VREG_OBJECT w0, w2, w1          // fp[AA]<- r0
+    .else
+    SET_VREG w0, w2                     // fp[AA]<- r0
+    .endif
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_exception: /* 0x0d */
+/* File: arm64/op_move_exception.S */
+    /* move-exception vAA */
+    lsr     w2, wINST, #8               // w2<- AA
+    ldr     x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    mov     x1, #0                      // w1<- 0
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    SET_VREG_OBJECT w3, w2              // fp[AA]<- exception obj
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    str     x1, [xSELF, #THREAD_EXCEPTION_OFFSET]  // clear exception
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_void: /* 0x0e */
+/* File: arm64/op_return_void.S */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     x0, xSELF
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .Lop_return_void_check
+.Lop_return_void_return:
+    mov     x0, #0
+    b       MterpReturn
+.Lop_return_void_check:
+    bl      MterpSuspendCheck           // (self)
+    b       .Lop_return_void_return
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return: /* 0x0f */
+/* File: arm64/op_return.S */
+    /*
+     * Return a 32-bit value.
+     *
+     * for: return, return-object
+     */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     x0, xSELF
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .Lop_return_check
+.Lop_return_return:
+    lsr     w2, wINST, #8               // r2<- AA
+    GET_VREG w0, w2                     // r0<- vAA
+    b       MterpReturn
+.Lop_return_check:
+    bl      MterpSuspendCheck           // (self)
+    b       .Lop_return_return
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_wide: /* 0x10 */
+/* File: arm64/op_return_wide.S */
+    /*
+     * Return a 64-bit value.
+     */
+    /* return-wide vAA */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     x0, xSELF
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .Lop_return_wide_check
+.Lop_return_wide_return:
+    lsr     w2, wINST, #8               // w2<- AA
+    GET_VREG_WIDE x0, w2                // x0<- vAA
+    b       MterpReturn
+.Lop_return_wide_check:
+    bl      MterpSuspendCheck           // (self)
+    b       .Lop_return_wide_return
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_object: /* 0x11 */
+/* File: arm64/op_return_object.S */
+/* File: arm64/op_return.S */
+    /*
+     * Return a 32-bit value.
+     *
+     * for: return, return-object
+     */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     x0, xSELF
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .Lop_return_object_check
+.Lop_return_object_return:
+    lsr     w2, wINST, #8               // r2<- AA
+    GET_VREG w0, w2                     // r0<- vAA
+    b       MterpReturn
+.Lop_return_object_check:
+    bl      MterpSuspendCheck           // (self)
+    b       .Lop_return_object_return
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_4: /* 0x12 */
+/* File: arm64/op_const_4.S */
+    /* const/4 vA, #+B */
+    lsl     w1, wINST, #16              // w1<- Bxxx0000
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    FETCH_ADVANCE_INST 1                // advance xPC, load wINST
+    asr     w1, w1, #28                 // w1<- sssssssB (sign-extended)
+    GET_INST_OPCODE ip                  // ip<- opcode from xINST
+    SET_VREG w1, w0                     // fp[A]<- w1
+    GOTO_OPCODE ip                      // execute next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_16: /* 0x13 */
+/* File: arm64/op_const_16.S */
+    /* const/16 vAA, #+BBBB */
+    FETCH_S w0, 1                       // w0<- ssssBBBB (sign-extended
+    lsr     w3, wINST, #8               // w3<- AA
+    FETCH_ADVANCE_INST 2                // advance xPC, load wINST
+    SET_VREG w0, w3                     // vAA<- w0
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const: /* 0x14 */
+/* File: arm64/op_const.S */
+    /* const vAA, #+BBBBbbbb */
+    lsr     w3, wINST, #8               // w3<- AA
+    FETCH w0, 1                         // w0<- bbbb (low
+    FETCH w1, 2                         // w1<- BBBB (high
+    FETCH_ADVANCE_INST 3                // advance rPC, load wINST
+    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG w0, w3                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_high16: /* 0x15 */
+/* File: arm64/op_const_high16.S */
+    /* const/high16 vAA, #+BBBB0000 */
+    FETCH   w0, 1                       // r0<- 0000BBBB (zero-extended
+    lsr     w3, wINST, #8               // r3<- AA
+    lsl     w0, w0, #16                 // r0<- BBBB0000
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    SET_VREG w0, w3                     // vAA<- r0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_16: /* 0x16 */
+/* File: arm64/op_const_wide_16.S */
+    /* const-wide/16 vAA, #+BBBB */
+    FETCH_S w0, 1                       // w0<- ssssBBBB (sign-extended
+    lsr     w3, wINST, #8               // w3<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    sbfm    x0, x0, 0, 31
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w3
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_32: /* 0x17 */
+/* File: arm64/op_const_wide_32.S */
+    /* const-wide/32 vAA, #+BBBBbbbb */
+    FETCH w0, 1                         // w0<- 0000bbbb (low)
+    lsr     w3, wINST, #8               // w3<- AA
+    FETCH_S w2, 2                       // w2<- ssssBBBB (high)
+    FETCH_ADVANCE_INST 3                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    orr     w0, w0, w2, lsl #16         // w0<- BBBBbbbb
+    sbfm    x0, x0, 0, 31
+    SET_VREG_WIDE x0, w3
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide: /* 0x18 */
+/* File: arm64/op_const_wide.S */
+    /* const-wide vAA, #+HHHHhhhhBBBBbbbb */
+    FETCH w0, 1                         // w0<- bbbb (low)
+    FETCH w1, 2                         // w1<- BBBB (low middle)
+    FETCH w2, 3                         // w2<- hhhh (high middle)
+    FETCH w3, 4                         // w3<- HHHH (high)
+    lsr     w4, wINST, #8               // r4<- AA
+    FETCH_ADVANCE_INST 5                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    orr     w0, w0, w1, lsl #16         // w0<-         BBBBbbbb
+    orr     x0, x0, x2, lsl #32         // w0<-     hhhhBBBBbbbb
+    orr     x0, x0, x3, lsl #48         // w0<- HHHHhhhhBBBBbbbb
+    SET_VREG_WIDE x0, w4
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_high16: /* 0x19 */
+/* File: arm64/op_const_wide_high16.S */
+    /* const-wide/high16 vAA, #+BBBB000000000000 */
+    FETCH w0, 1                         // w0<- 0000BBBB (zero-extended)
+    lsr     w1, wINST, #8               // w1<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    lsl     x0, x0, #48
+    SET_VREG_WIDE x0, w1
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_string: /* 0x1a */
+/* File: arm64/op_const_string.S */
+    /* const/string vAA, String//BBBB */
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- BBBB
+    lsr     w1, wINST, #8               // w1<- AA
+    add     x2, xFP, #OFF_FP_SHADOWFRAME
+    mov     x3, xSELF
+    bl      MterpConstString            // (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 2                     // load rINST
+    cbnz    w0, MterpPossibleException  // let reference interpreter deal with it.
+    ADVANCE 2                           // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_string_jumbo: /* 0x1b */
+/* File: arm64/op_const_string_jumbo.S */
+    /* const/string vAA, String//BBBBBBBB */
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- bbbb (low
+    FETCH w2, 2                         // w2<- BBBB (high
+    lsr     w1, wINST, #8               // w1<- AA
+    orr     w0, w0, w2, lsl #16         // w1<- BBBBbbbb
+    add     x2, xFP, #OFF_FP_SHADOWFRAME
+    mov     x3, xSELF
+    bl      MterpConstString            // (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 3                     // advance rPC
+    cbnz    w0, MterpPossibleException      // let reference interpreter deal with it.
+    ADVANCE 3                           // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_class: /* 0x1c */
+/* File: arm64/op_const_class.S */
+    /* const/class vAA, Class//BBBB */
+    EXPORT_PC
+    FETCH   w0, 1                       // w0<- BBBB
+    lsr     w1, wINST, #8               // w1<- AA
+    add     x2, xFP, #OFF_FP_SHADOWFRAME
+    mov     x3, xSELF
+    bl      MterpConstClass             // (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 2
+    cbnz    w0, MterpPossibleException
+    ADVANCE 2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_monitor_enter: /* 0x1d */
+/* File: arm64/op_monitor_enter.S */
+    /*
+     * Synchronize on an object.
+     */
+    /* monitor-enter vAA */
+    EXPORT_PC
+    lsr      w2, wINST, #8               // w2<- AA
+    GET_VREG w0, w2                      // w0<- vAA (object)
+    mov      x1, xSELF                   // w1<- self
+    bl       artLockObjectFromCode
+    cbnz     w0, MterpException
+    FETCH_ADVANCE_INST 1
+    GET_INST_OPCODE ip                   // extract opcode from rINST
+    GOTO_OPCODE ip                       // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_monitor_exit: /* 0x1e */
+/* File: arm64/op_monitor_exit.S */
+    /*
+     * Unlock an object.
+     *
+     * Exceptions that occur when unlocking a monitor need to appear as
+     * if they happened at the following instruction.  See the Dalvik
+     * instruction spec.
+     */
+    /* monitor-exit vAA */
+    EXPORT_PC
+    lsr      w2, wINST, #8              // w2<- AA
+    GET_VREG w0, w2                     // w0<- vAA (object)
+    mov      x1, xSELF                  // w0<- self
+    bl       artUnlockObjectFromCode    // w0<- success for unlock(self, obj)
+    cbnz     w0, MterpException
+    FETCH_ADVANCE_INST 1                // before throw: advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_check_cast: /* 0x1f */
+/* File: arm64/op_check_cast.S */
+    /*
+     * Check to see if a cast from one class to another is allowed.
+     */
+    /* check-cast vAA, class//BBBB */
+    EXPORT_PC
+    FETCH    w0, 1                      // w0<- BBBB
+    lsr      w1, wINST, #8              // w1<- AA
+    VREG_INDEX_TO_ADDR x1, w1           // w1<- &object
+    ldr      x2, [xFP, #OFF_FP_METHOD]  // w2<- method
+    mov      x3, xSELF                  // w3<- self
+    bl       MterpCheckCast             // (index, &obj, method, self)
+    PREFETCH_INST 2
+    cbnz     w0, MterpPossibleException
+    ADVANCE  2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_instance_of: /* 0x20 */
+/* File: arm64/op_instance_of.S */
+    /*
+     * Check to see if an object reference is an instance of a class.
+     *
+     * Most common situation is a non-null object, being compared against
+     * an already-resolved class.
+     */
+    /* instance-of vA, vB, class//CCCC */
+    EXPORT_PC
+    FETCH     w0, 1                     // w0<- CCCC
+    lsr       w1, wINST, #12            // w1<- B
+    VREG_INDEX_TO_ADDR x1, w1           // w1<- &object
+    ldr       x2, [xFP, #OFF_FP_METHOD] // w2<- method
+    mov       x3, xSELF                 // w3<- self
+    bl        MterpInstanceOf           // (index, &obj, method, self)
+    ldr       x1, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr       w2, wINST, #8             // w2<- A+
+    and       w2, w2, #15               // w2<- A
+    PREFETCH_INST 2
+    cbnz      x1, MterpException
+    ADVANCE 2                           // advance rPC
+    SET_VREG w0, w2                     // vA<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_array_length: /* 0x21 */
+/* File: arm64/op_array_length.S */
+    /*
+     * Return the length of an array.
+     */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG w0, w1                     // w0<- vB (object ref)
+    cbz     w0, common_errNullObject    // yup, fail
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- array length
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w3, w2                     // vB<- length
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_new_instance: /* 0x22 */
+/* File: arm64/op_new_instance.S */
+    /*
+     * Create a new instance of a class.
+     */
+    /* new-instance vAA, class//BBBB */
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xSELF
+    mov     w2, wINST
+    bl      MterpNewInstance           // (shadow_frame, self, inst_data)
+    cbz     w0, MterpPossibleException
+    FETCH_ADVANCE_INST 2               // advance rPC, load rINST
+    GET_INST_OPCODE ip                 // extract opcode from rINST
+    GOTO_OPCODE ip                     // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_new_array: /* 0x23 */
+/* File: arm64/op_new_array.S */
+    /*
+     * Allocate an array of objects, specified with the array class
+     * and a count.
+     *
+     * The verifier guarantees that this is an array class, so we don't
+     * check for it here.
+     */
+    /* new-array vA, vB, class//CCCC */
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     w2, wINST
+    mov     x3, xSELF
+    bl      MterpNewArray
+    cbz     w0, MterpPossibleException
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_filled_new_array: /* 0x24 */
+/* File: arm64/op_filled_new_array.S */
+    /*
+     * Create a new array with elements filled from registers.
+     *
+     * for: filled-new-array, filled-new-array/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class//CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type//BBBB */
+    .extern MterpFilledNewArray
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     x2, xSELF
+    bl      MterpFilledNewArray
+    cbz     w0, MterpPossibleException
+    FETCH_ADVANCE_INST 3                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_filled_new_array_range: /* 0x25 */
+/* File: arm64/op_filled_new_array_range.S */
+/* File: arm64/op_filled_new_array.S */
+    /*
+     * Create a new array with elements filled from registers.
+     *
+     * for: filled-new-array, filled-new-array/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class//CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type//BBBB */
+    .extern MterpFilledNewArrayRange
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     x2, xSELF
+    bl      MterpFilledNewArrayRange
+    cbz     w0, MterpPossibleException
+    FETCH_ADVANCE_INST 3                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_fill_array_data: /* 0x26 */
+/* File: arm64/op_fill_array_data.S */
+    /* fill-array-data vAA, +BBBBBBBB */
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- bbbb (lo)
+    FETCH w1, 2                         // w1<- BBBB (hi)
+    lsr     w3, wINST, #8               // w3<- AA
+    orr     w1, w0, w1, lsl #16         // w1<- BBBBbbbb
+    GET_VREG w0, w3                     // w0<- vAA (array object)
+    add     x1, xPC, w1, lsl #1         // w1<- PC + BBBBbbbb*2 (array data off.)
+    bl      MterpFillArrayData          // (obj, payload)
+    cbz     w0, MterpPossibleException      // exception?
+    FETCH_ADVANCE_INST 3                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_throw: /* 0x27 */
+/* File: arm64/op_throw.S */
+    /*
+     * Throw an exception object in the current thread.
+     */
+    /* throw vAA */
+    EXPORT_PC
+    lsr      w2, wINST, #8               // r2<- AA
+    GET_VREG w1, w2                      // r1<- vAA (exception object)
+    cbz      w1, common_errNullObject
+    str      x1, [xSELF, #THREAD_EXCEPTION_OFFSET]  // thread->exception<- obj
+    b        MterpException
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto: /* 0x28 */
+/* File: arm64/op_goto.S */
+    /*
+     * Unconditional branch, 8-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     */
+    /* goto +AA */
+    /* tuning: use sbfx for 6t2+ targets */
+#if MTERP_SUSPEND
+    mov     w0, wINST, lsl #16          // w0<- AAxx0000
+    movs    w1, w0, asr #24             // w1<- ssssssAA (sign-extended)
+    add     w2, w1, w1                  // w2<- byte offset, set flags
+       // If backwards branch refresh rIBASE
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    lsl     w0, wINST, #16              // w0<- AAxx0000
+    asr     wINST, w0, #24              // wINST<- ssssssAA (sign-extended)
+#if MTERP_PROFILE_BRANCHES
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]  // Preload flags for MterpCheckSuspendAndContinue
+    adds    w1, wINST, wINST            // Convert dalvik offset to byte offset, setting flags
+    FETCH_ADVANCE_INST_RB w1            // load wINST and advance xPC
+       // If backwards branch refresh rIBASE
+    b.mi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto_16: /* 0x29 */
+/* File: arm64/op_goto_16.S */
+    /*
+     * Unconditional branch, 16-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     */
+    /* goto/16 +AAAA */
+#if MTERP_SUSPEND
+    FETCH_S w0, 1                       // w0<- ssssAAAA (sign-extended)
+    adds    w1, w0, w0                  // w1<- byte offset, flags set
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load rINST
+    ldrmi   xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    FETCH_S wINST, 1                    // wINST<- ssssAAAA (sign-extended)
+#if MTERP_PROFILE_BRANCHES
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w1, wINST, wINST            // w1<- byte offset, flags set
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load rINST
+    b.mi    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto_32: /* 0x2a */
+/* File: arm64/op_goto_32.S */
+    /*
+     * Unconditional branch, 32-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     *
+     * Unlike most opcodes, this one is allowed to branch to itself, so
+     * our "backward branch" test must be "<=0" instead of "<0".  Because
+     * we need the V bit set, we'll use an adds to convert from Dalvik
+     * offset to byte offset.
+     */
+    /* goto/32 +AAAAAAAA */
+#if MTERP_SUSPEND
+    FETCH w0, 1                         // w0<- aaaa (lo)
+    FETCH w1, 2                         // w1<- AAAA (hi)
+    orr     w0, w0, w1, lsl #16         // w0<- AAAAaaaa
+    adds    w1, w0, w0                  // w1<- byte offset
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load xINST
+    ldrle   xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
+    GET_INST_OPCODE ip                  // extract opcode from xINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    FETCH w0, 1                         // w0<- aaaa (lo)
+    FETCH w1, 2                         // w1<- AAAA (hi)
+    orr     wINST, w0, w1, lsl #16      // wINST<- AAAAaaaa
+#if MTERP_PROFILE_BRANCHES
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w1, wINST, wINST            // w1<- byte offset
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load xINST
+    b.le    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from xINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+/* ------------------------------ */
+    .balign 128
+.L_op_packed_switch: /* 0x2b */
+/* File: arm64/op_packed_switch.S */
+    /*
+     * Handle a packed-switch or sparse-switch instruction.  In both cases
+     * we decode it and hand it off to a helper function.
+     *
+     * We don't really expect backward branches in a switch statement, but
+     * they're perfectly legal, so we check for them here.
+     *
+     * for: packed-switch, sparse-switch
+     */
+    /* op vAA, +BBBB */
+#if MTERP_SUSPEND
+    FETCH w0, 1                         // w0<- bbbb (lo)
+    FETCH w1, 2                         // w1<- BBBB (hi)
+    mov     w3, wINST, lsr #8           // w3<- AA
+    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
+    GET_VREG w1, w3                     // w1<- vAA
+    add     w0, rPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
+    bl      MterpDoPackedSwitch                       // w0<- code-unit branch offset
+    adds    w1, w0, w0                  // w1<- byte offset; clear V
+    ldrle   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    FETCH w0, 1                         // w0<- bbbb (lo)
+    FETCH w1, 2                         // w1<- BBBB (hi)
+    lsr     w3, wINST, #8               // w3<- AA
+    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
+    GET_VREG w1, w3                     // w1<- vAA
+    add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
+    bl      MterpDoPackedSwitch                       // w0<- code-unit branch offset
+    sbfm    xINST, x0, 0, 31
+#if MTERP_PROFILE_BRANCHES
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xINST
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w1, wINST, wINST            // w1<- byte offset; clear V
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    b.le    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sparse_switch: /* 0x2c */
+/* File: arm64/op_sparse_switch.S */
+/* File: arm64/op_packed_switch.S */
+    /*
+     * Handle a packed-switch or sparse-switch instruction.  In both cases
+     * we decode it and hand it off to a helper function.
+     *
+     * We don't really expect backward branches in a switch statement, but
+     * they're perfectly legal, so we check for them here.
+     *
+     * for: packed-switch, sparse-switch
+     */
+    /* op vAA, +BBBB */
+#if MTERP_SUSPEND
+    FETCH w0, 1                         // w0<- bbbb (lo)
+    FETCH w1, 2                         // w1<- BBBB (hi)
+    mov     w3, wINST, lsr #8           // w3<- AA
+    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
+    GET_VREG w1, w3                     // w1<- vAA
+    add     w0, rPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
+    bl      MterpDoSparseSwitch                       // w0<- code-unit branch offset
+    adds    w1, w0, w0                  // w1<- byte offset; clear V
+    ldrle   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    FETCH w0, 1                         // w0<- bbbb (lo)
+    FETCH w1, 2                         // w1<- BBBB (hi)
+    lsr     w3, wINST, #8               // w3<- AA
+    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
+    GET_VREG w1, w3                     // w1<- vAA
+    add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
+    bl      MterpDoSparseSwitch                       // w0<- code-unit branch offset
+    sbfm    xINST, x0, 0, 31
+#if MTERP_PROFILE_BRANCHES
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xINST
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w1, wINST, wINST            // w1<- byte offset; clear V
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    b.le    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpl_float: /* 0x2d */
+/* File: arm64/op_cmpl_float.S */
+/* File: arm64/fcmp.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG s1, w2
+    GET_VREG s2, w3
+    mov     w0, #-1
+    fcmp s1, s2
+    csneg w0, w0, w0, le
+    csel w0, wzr, w0, eq
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w4                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpg_float: /* 0x2e */
+/* File: arm64/op_cmpg_float.S */
+/* File: arm64/fcmp.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG s1, w2
+    GET_VREG s2, w3
+    mov     w0, #1
+    fcmp s1, s2
+    csneg w0, w0, w0, pl
+    csel w0, wzr, w0, eq
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w4                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpl_double: /* 0x2f */
+/* File: arm64/op_cmpl_double.S */
+/* File: arm64/fcmp.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG_WIDE d1, w2
+    GET_VREG_WIDE d2, w3
+    mov     w0, #-1
+    fcmp d1, d2
+    csneg w0, w0, w0, le
+    csel w0, wzr, w0, eq
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w4                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpg_double: /* 0x30 */
+/* File: arm64/op_cmpg_double.S */
+/* File: arm64/fcmp.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG_WIDE d1, w2
+    GET_VREG_WIDE d2, w3
+    mov     w0, #1
+    fcmp d1, d2
+    csneg w0, w0, w0, pl
+    csel w0, wzr, w0, eq
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w4                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmp_long: /* 0x31 */
+/* File: arm64/op_cmp_long.S */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG_WIDE x1, w2
+    GET_VREG_WIDE x2, w3
+    cmp     x1, x2
+    csinc   w0, wzr, wzr, eq
+    csneg   w0, w0, w0, ge
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    SET_VREG w0, w4
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_eq: /* 0x32 */
+/* File: arm64/op_if_eq.S */
+/* File: arm64/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+#if MTERP_SUSPEND
+    mov     w1, wINST, lsr #12          // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    cmp     w2, w3                      // compare (vA, vB)
+    moveq w1, #2                 // w1<- BYTE branch dist for not-taken
+    adds    w2, w1, w1                  // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    mov     w0, #2                      // Offset if branch not taken
+    cmp     w2, w3                      // compare (vA, vB)
+    csel    wINST, w1, w0, eq // Branch if true, stashing result in callee save reg.
+#if MTERP_PROFILE_BRANCHES
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ne: /* 0x33 */
+/* File: arm64/op_if_ne.S */
+/* File: arm64/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+#if MTERP_SUSPEND
+    mov     w1, wINST, lsr #12          // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    cmp     w2, w3                      // compare (vA, vB)
+    movne w1, #2                 // w1<- BYTE branch dist for not-taken
+    adds    w2, w1, w1                  // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    mov     w0, #2                      // Offset if branch not taken
+    cmp     w2, w3                      // compare (vA, vB)
+    csel    wINST, w1, w0, ne // Branch if true, stashing result in callee save reg.
+#if MTERP_PROFILE_BRANCHES
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_lt: /* 0x34 */
+/* File: arm64/op_if_lt.S */
+/* File: arm64/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+#if MTERP_SUSPEND
+    mov     w1, wINST, lsr #12          // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    cmp     w2, w3                      // compare (vA, vB)
+    movlt w1, #2                 // w1<- BYTE branch dist for not-taken
+    adds    w2, w1, w1                  // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    mov     w0, #2                      // Offset if branch not taken
+    cmp     w2, w3                      // compare (vA, vB)
+    csel    wINST, w1, w0, lt // Branch if true, stashing result in callee save reg.
+#if MTERP_PROFILE_BRANCHES
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ge: /* 0x35 */
+/* File: arm64/op_if_ge.S */
+/* File: arm64/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+#if MTERP_SUSPEND
+    mov     w1, wINST, lsr #12          // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    cmp     w2, w3                      // compare (vA, vB)
+    movge w1, #2                 // w1<- BYTE branch dist for not-taken
+    adds    w2, w1, w1                  // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    mov     w0, #2                      // Offset if branch not taken
+    cmp     w2, w3                      // compare (vA, vB)
+    csel    wINST, w1, w0, ge // Branch if true, stashing result in callee save reg.
+#if MTERP_PROFILE_BRANCHES
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gt: /* 0x36 */
+/* File: arm64/op_if_gt.S */
+/* File: arm64/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+#if MTERP_SUSPEND
+    mov     w1, wINST, lsr #12          // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    cmp     w2, w3                      // compare (vA, vB)
+    movgt w1, #2                 // w1<- BYTE branch dist for not-taken
+    adds    w2, w1, w1                  // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    mov     w0, #2                      // Offset if branch not taken
+    cmp     w2, w3                      // compare (vA, vB)
+    csel    wINST, w1, w0, gt // Branch if true, stashing result in callee save reg.
+#if MTERP_PROFILE_BRANCHES
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_le: /* 0x37 */
+/* File: arm64/op_if_le.S */
+/* File: arm64/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+#if MTERP_SUSPEND
+    mov     w1, wINST, lsr #12          // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    cmp     w2, w3                      // compare (vA, vB)
+    movle w1, #2                 // w1<- BYTE branch dist for not-taken
+    adds    w2, w1, w1                  // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    GET_VREG w3, w1                     // w3<- vB
+    GET_VREG w2, w0                     // w2<- vA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    mov     w0, #2                      // Offset if branch not taken
+    cmp     w2, w3                      // compare (vA, vB)
+    csel    wINST, w1, w0, le // Branch if true, stashing result in callee save reg.
+#if MTERP_PROFILE_BRANCHES
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_eqz: /* 0x38 */
+/* File: arm64/op_if_eqz.S */
+/* File: arm64/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+#if MTERP_SUSPEND
+    mov     w0, wINST, lsr #8           // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    cmp     w2, #0                      // compare (vA, 0)
+    moveq w1, #2                 // w1<- inst branch dist for not-taken
+    adds    w1, w1, w1                  // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    lsr     w0, wINST, #8               // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    mov     w0, #2                      // Branch offset if not taken
+    cmp     w2, #0                      // compare (vA, 0)
+    csel    wINST, w1, w0, eq // Branch if true, stashing result in callee save reg
+#if MTERP_PROFILE_BRANCHES
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_nez: /* 0x39 */
+/* File: arm64/op_if_nez.S */
+/* File: arm64/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+#if MTERP_SUSPEND
+    mov     w0, wINST, lsr #8           // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    cmp     w2, #0                      // compare (vA, 0)
+    movne w1, #2                 // w1<- inst branch dist for not-taken
+    adds    w1, w1, w1                  // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    lsr     w0, wINST, #8               // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    mov     w0, #2                      // Branch offset if not taken
+    cmp     w2, #0                      // compare (vA, 0)
+    csel    wINST, w1, w0, ne // Branch if true, stashing result in callee save reg
+#if MTERP_PROFILE_BRANCHES
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ltz: /* 0x3a */
+/* File: arm64/op_if_ltz.S */
+/* File: arm64/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+#if MTERP_SUSPEND
+    mov     w0, wINST, lsr #8           // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    cmp     w2, #0                      // compare (vA, 0)
+    movlt w1, #2                 // w1<- inst branch dist for not-taken
+    adds    w1, w1, w1                  // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    lsr     w0, wINST, #8               // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    mov     w0, #2                      // Branch offset if not taken
+    cmp     w2, #0                      // compare (vA, 0)
+    csel    wINST, w1, w0, lt // Branch if true, stashing result in callee save reg
+#if MTERP_PROFILE_BRANCHES
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gez: /* 0x3b */
+/* File: arm64/op_if_gez.S */
+/* File: arm64/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+#if MTERP_SUSPEND
+    mov     w0, wINST, lsr #8           // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    cmp     w2, #0                      // compare (vA, 0)
+    movge w1, #2                 // w1<- inst branch dist for not-taken
+    adds    w1, w1, w1                  // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    lsr     w0, wINST, #8               // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    mov     w0, #2                      // Branch offset if not taken
+    cmp     w2, #0                      // compare (vA, 0)
+    csel    wINST, w1, w0, ge // Branch if true, stashing result in callee save reg
+#if MTERP_PROFILE_BRANCHES
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gtz: /* 0x3c */
+/* File: arm64/op_if_gtz.S */
+/* File: arm64/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+#if MTERP_SUSPEND
+    mov     w0, wINST, lsr #8           // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    cmp     w2, #0                      // compare (vA, 0)
+    movgt w1, #2                 // w1<- inst branch dist for not-taken
+    adds    w1, w1, w1                  // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    lsr     w0, wINST, #8               // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    mov     w0, #2                      // Branch offset if not taken
+    cmp     w2, #0                      // compare (vA, 0)
+    csel    wINST, w1, w0, gt // Branch if true, stashing result in callee save reg
+#if MTERP_PROFILE_BRANCHES
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_lez: /* 0x3d */
+/* File: arm64/op_if_lez.S */
+/* File: arm64/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+#if MTERP_SUSPEND
+    mov     w0, wINST, lsr #8           // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    cmp     w2, #0                      // compare (vA, 0)
+    movle w1, #2                 // w1<- inst branch dist for not-taken
+    adds    w1, w1, w1                  // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    lsr     w0, wINST, #8               // w0<- AA
+    GET_VREG w2, w0                     // w2<- vAA
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    mov     w0, #2                      // Branch offset if not taken
+    cmp     w2, #0                      // compare (vA, 0)
+    csel    wINST, w1, w0, le // Branch if true, stashing result in callee save reg
+#if MTERP_PROFILE_BRANCHES
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_3e: /* 0x3e */
+/* File: arm64/op_unused_3e.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_3f: /* 0x3f */
+/* File: arm64/op_unused_3f.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_40: /* 0x40 */
+/* File: arm64/op_unused_40.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_41: /* 0x41 */
+/* File: arm64/op_unused_41.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_42: /* 0x42 */
+/* File: arm64/op_unused_42.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_43: /* 0x43 */
+/* File: arm64/op_unused_43.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget: /* 0x44 */
+/* File: arm64/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     x0, common_errNullObject    // bail if null array object.
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- arrayObj->length
+    add     x0, x0, w1, uxtw #2    // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    ldr   w2, [x0, #MIRROR_INT_ARRAY_DATA_OFFSET]     // w2<- vBB[vCC]
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w2, w9                     // vAA<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_wide: /* 0x45 */
+/* File: arm64/op_aget_wide.S */
+    /*
+     * Array get, 64 bits.  vAA <- vBB[vCC].
+     *
+     */
+    /* aget-wide vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     w0, common_errNullObject        // yes, bail
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- arrayObj->length
+    add     x0, x0, w1, lsl #3          // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    ldr     x2, [x0, #MIRROR_WIDE_ARRAY_DATA_OFFSET]  // x2<- vBB[vCC]
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x2, w4
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_object: /* 0x46 */
+/* File: arm64/op_aget_object.S */
+    /*
+     * Array object get.  vAA <- vBB[vCC].
+     *
+     * for: aget-object
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    FETCH_B w3, 1, 1                    // w3<- CC
+    EXPORT_PC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    bl       artAGetObjectFromMterp     // (array, index)
+    ldr      x1, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr      w2, wINST, #8               // w9<- AA
+    PREFETCH_INST 2
+    cbnz     w1, MterpException
+    SET_VREG_OBJECT w0, w2
+    ADVANCE 2
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_boolean: /* 0x47 */
+/* File: arm64/op_aget_boolean.S */
+/* File: arm64/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     x0, common_errNullObject    // bail if null array object.
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- arrayObj->length
+    add     x0, x0, w1, uxtw #0    // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    ldrb   w2, [x0, #MIRROR_BOOLEAN_ARRAY_DATA_OFFSET]     // w2<- vBB[vCC]
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w2, w9                     // vAA<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_byte: /* 0x48 */
+/* File: arm64/op_aget_byte.S */
+/* File: arm64/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     x0, common_errNullObject    // bail if null array object.
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- arrayObj->length
+    add     x0, x0, w1, uxtw #0    // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    ldrsb   w2, [x0, #MIRROR_BYTE_ARRAY_DATA_OFFSET]     // w2<- vBB[vCC]
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w2, w9                     // vAA<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_char: /* 0x49 */
+/* File: arm64/op_aget_char.S */
+/* File: arm64/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     x0, common_errNullObject    // bail if null array object.
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- arrayObj->length
+    add     x0, x0, w1, uxtw #1    // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    ldrh   w2, [x0, #MIRROR_CHAR_ARRAY_DATA_OFFSET]     // w2<- vBB[vCC]
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w2, w9                     // vAA<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_short: /* 0x4a */
+/* File: arm64/op_aget_short.S */
+/* File: arm64/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     x0, common_errNullObject    // bail if null array object.
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- arrayObj->length
+    add     x0, x0, w1, uxtw #1    // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    ldrsh   w2, [x0, #MIRROR_SHORT_ARRAY_DATA_OFFSET]     // w2<- vBB[vCC]
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w2, w9                     // vAA<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput: /* 0x4b */
+/* File: arm64/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     w0, common_errNullObject    // bail if null
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]     // w3<- arrayObj->length
+    add     x0, x0, w1, lsl #2     // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_VREG w2, w9                     // w2<- vAA
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    str  w2, [x0, #MIRROR_INT_ARRAY_DATA_OFFSET]     // vBB[vCC]<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_wide: /* 0x4c */
+/* File: arm64/op_aput_wide.S */
+    /*
+     * Array put, 64 bits.  vBB[vCC] <- vAA.
+     *
+     */
+    /* aput-wide vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    and     w2, w0, #255                // w2<- BB
+    lsr     w3, w0, #8                  // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     w0, common_errNullObject    // bail if null
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]    // w3<- arrayObj->length
+    add     x0, x0, w1, lsl #3          // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    GET_VREG_WIDE x1, w4
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    str     x1, [x0, #MIRROR_WIDE_ARRAY_DATA_OFFSET]
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_object: /* 0x4d */
+/* File: arm64/op_aput_object.S */
+    /*
+     * Store an object into an array.  vBB[vCC] <- vAA.
+     */
+    /* op vAA, vBB, vCC */
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     w2, wINST
+    bl      MterpAputObject
+    cbz     w0, MterpPossibleException
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_boolean: /* 0x4e */
+/* File: arm64/op_aput_boolean.S */
+/* File: arm64/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     w0, common_errNullObject    // bail if null
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]     // w3<- arrayObj->length
+    add     x0, x0, w1, lsl #0     // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_VREG w2, w9                     // w2<- vAA
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    strb  w2, [x0, #MIRROR_BOOLEAN_ARRAY_DATA_OFFSET]     // vBB[vCC]<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_byte: /* 0x4f */
+/* File: arm64/op_aput_byte.S */
+/* File: arm64/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     w0, common_errNullObject    // bail if null
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]     // w3<- arrayObj->length
+    add     x0, x0, w1, lsl #0     // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_VREG w2, w9                     // w2<- vAA
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    strb  w2, [x0, #MIRROR_BYTE_ARRAY_DATA_OFFSET]     // vBB[vCC]<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_char: /* 0x50 */
+/* File: arm64/op_aput_char.S */
+/* File: arm64/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     w0, common_errNullObject    // bail if null
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]     // w3<- arrayObj->length
+    add     x0, x0, w1, lsl #1     // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_VREG w2, w9                     // w2<- vAA
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    strh  w2, [x0, #MIRROR_CHAR_ARRAY_DATA_OFFSET]     // vBB[vCC]<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_short: /* 0x51 */
+/* File: arm64/op_aput_short.S */
+/* File: arm64/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B w2, 1, 0                    // w2<- BB
+    lsr     w9, wINST, #8               // w9<- AA
+    FETCH_B w3, 1, 1                    // w3<- CC
+    GET_VREG w0, w2                     // w0<- vBB (array object)
+    GET_VREG w1, w3                     // w1<- vCC (requested index)
+    cbz     w0, common_errNullObject    // bail if null
+    ldr     w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]     // w3<- arrayObj->length
+    add     x0, x0, w1, lsl #1     // w0<- arrayObj + index*width
+    cmp     w1, w3                      // compare unsigned index, length
+    bcs     common_errArrayIndex        // index >= length, bail
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_VREG w2, w9                     // w2<- vAA
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    strh  w2, [x0, #MIRROR_SHORT_ARRAY_DATA_OFFSET]     // vBB[vCC]<- w2
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget: /* 0x52 */
+/* File: arm64/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    w0, 1                         // w0<- field ref CCCC
+    lsr      w1, wINST, #12                // w1<- B
+    GET_VREG w1, w1                        // w1<- fp[B], the object pointer
+    ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
+    mov      x3, xSELF                     // w3<- self
+    bl       artGet32InstanceFromCode
+    ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     w2, wINST, #8, #4             // w2<- A
+    PREFETCH_INST 2
+    cbnz     x3, MterpPossibleException    // bail out
+    .if 0
+    SET_VREG_OBJECT w0, w2                 // fp[A]<- w0
+    .else
+    SET_VREG w0, w2                        // fp[A]<- w0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     // extract opcode from rINST
+    GOTO_OPCODE ip                         // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_wide: /* 0x53 */
+/* File: arm64/op_iget_wide.S */
+    /*
+     * 64-bit instance field get.
+     *
+     * for: iget-wide
+     */
+    EXPORT_PC
+    FETCH    w0, 1                         // w0<- field ref CCCC
+    lsr      w1, wINST, #12                // w1<- B
+    GET_VREG w1, w1                        // w1<- fp[B], the object pointer
+    ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
+    mov      x3, xSELF                     // w3<- self
+    bl       artGet64InstanceFromCode
+    ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     w2, wINST, #8, #4             // w2<- A
+    PREFETCH_INST 2
+    cmp      w3, #0
+    cbnz     w3, MterpException            // bail out
+    SET_VREG_WIDE x0, w2
+    ADVANCE 2
+    GET_INST_OPCODE ip                     // extract opcode from wINST
+    GOTO_OPCODE ip                         // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_object: /* 0x54 */
+/* File: arm64/op_iget_object.S */
+/* File: arm64/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    w0, 1                         // w0<- field ref CCCC
+    lsr      w1, wINST, #12                // w1<- B
+    GET_VREG w1, w1                        // w1<- fp[B], the object pointer
+    ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
+    mov      x3, xSELF                     // w3<- self
+    bl       artGetObjInstanceFromCode
+    ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     w2, wINST, #8, #4             // w2<- A
+    PREFETCH_INST 2
+    cbnz     x3, MterpPossibleException    // bail out
+    .if 1
+    SET_VREG_OBJECT w0, w2                 // fp[A]<- w0
+    .else
+    SET_VREG w0, w2                        // fp[A]<- w0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     // extract opcode from rINST
+    GOTO_OPCODE ip                         // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_boolean: /* 0x55 */
+/* File: arm64/op_iget_boolean.S */
+/* File: arm64/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    w0, 1                         // w0<- field ref CCCC
+    lsr      w1, wINST, #12                // w1<- B
+    GET_VREG w1, w1                        // w1<- fp[B], the object pointer
+    ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
+    mov      x3, xSELF                     // w3<- self
+    bl       artGetBooleanInstanceFromCode
+    ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     w2, wINST, #8, #4             // w2<- A
+    PREFETCH_INST 2
+    cbnz     x3, MterpPossibleException    // bail out
+    .if 0
+    SET_VREG_OBJECT w0, w2                 // fp[A]<- w0
+    .else
+    SET_VREG w0, w2                        // fp[A]<- w0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     // extract opcode from rINST
+    GOTO_OPCODE ip                         // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_byte: /* 0x56 */
+/* File: arm64/op_iget_byte.S */
+/* File: arm64/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    w0, 1                         // w0<- field ref CCCC
+    lsr      w1, wINST, #12                // w1<- B
+    GET_VREG w1, w1                        // w1<- fp[B], the object pointer
+    ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
+    mov      x3, xSELF                     // w3<- self
+    bl       artGetByteInstanceFromCode
+    ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     w2, wINST, #8, #4             // w2<- A
+    PREFETCH_INST 2
+    cbnz     x3, MterpPossibleException    // bail out
+    .if 0
+    SET_VREG_OBJECT w0, w2                 // fp[A]<- w0
+    .else
+    SET_VREG w0, w2                        // fp[A]<- w0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     // extract opcode from rINST
+    GOTO_OPCODE ip                         // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_char: /* 0x57 */
+/* File: arm64/op_iget_char.S */
+/* File: arm64/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    w0, 1                         // w0<- field ref CCCC
+    lsr      w1, wINST, #12                // w1<- B
+    GET_VREG w1, w1                        // w1<- fp[B], the object pointer
+    ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
+    mov      x3, xSELF                     // w3<- self
+    bl       artGetCharInstanceFromCode
+    ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     w2, wINST, #8, #4             // w2<- A
+    PREFETCH_INST 2
+    cbnz     x3, MterpPossibleException    // bail out
+    .if 0
+    SET_VREG_OBJECT w0, w2                 // fp[A]<- w0
+    .else
+    SET_VREG w0, w2                        // fp[A]<- w0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     // extract opcode from rINST
+    GOTO_OPCODE ip                         // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_short: /* 0x58 */
+/* File: arm64/op_iget_short.S */
+/* File: arm64/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    w0, 1                         // w0<- field ref CCCC
+    lsr      w1, wINST, #12                // w1<- B
+    GET_VREG w1, w1                        // w1<- fp[B], the object pointer
+    ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
+    mov      x3, xSELF                     // w3<- self
+    bl       artGetShortInstanceFromCode
+    ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     w2, wINST, #8, #4             // w2<- A
+    PREFETCH_INST 2
+    cbnz     x3, MterpPossibleException    // bail out
+    .if 0
+    SET_VREG_OBJECT w0, w2                 // fp[A]<- w0
+    .else
+    SET_VREG w0, w2                        // fp[A]<- w0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     // extract opcode from rINST
+    GOTO_OPCODE ip                         // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput: /* 0x59 */
+/* File: arm64/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field//CCCC */
+    .extern artSet32InstanceFromMterp
+    EXPORT_PC
+    FETCH    w0, 1                      // w0<- field ref CCCC
+    lsr      w1, wINST, #12             // w1<- B
+    GET_VREG w1, w1                     // w1<- fp[B], the object pointer
+    ubfx     w2, wINST, #8, #4          // w2<- A
+    GET_VREG w2, w2                     // w2<- fp[A]
+    ldr      x3, [xFP, #OFF_FP_METHOD]  // w3<- referrer
+    PREFETCH_INST 2
+    bl       artSet32InstanceFromMterp
+    cbnz     w0, MterpPossibleException
+    ADVANCE  2                          // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_wide: /* 0x5a */
+/* File: arm64/op_iput_wide.S */
+    /* iput-wide vA, vB, field//CCCC */
+    .extern artSet64InstanceFromMterp
+    EXPORT_PC
+    FETCH    w0, 1                      // w0<- field ref CCCC
+    lsr      w1, wINST, #12             // w1<- B
+    GET_VREG w1, w1                     // w1<- fp[B], the object pointer
+    ubfx     w2, wINST, #8, #4          // w2<- A
+    add      x2, xFP, x2, lsl #2        // w2<- &fp[A]
+    ldr      x3, [xFP, #OFF_FP_METHOD]  // w3<- referrer
+    PREFETCH_INST 2
+    bl       artSet64InstanceFromMterp
+    cbnz     w0, MterpPossibleException
+    ADVANCE  2                          // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_object: /* 0x5b */
+/* File: arm64/op_iput_object.S */
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     w2, wINST
+    mov     x3, xSELF
+    bl      MterpIputObject
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_boolean: /* 0x5c */
+/* File: arm64/op_iput_boolean.S */
+/* File: arm64/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field//CCCC */
+    .extern artSet8InstanceFromMterp
+    EXPORT_PC
+    FETCH    w0, 1                      // w0<- field ref CCCC
+    lsr      w1, wINST, #12             // w1<- B
+    GET_VREG w1, w1                     // w1<- fp[B], the object pointer
+    ubfx     w2, wINST, #8, #4          // w2<- A
+    GET_VREG w2, w2                     // w2<- fp[A]
+    ldr      x3, [xFP, #OFF_FP_METHOD]  // w3<- referrer
+    PREFETCH_INST 2
+    bl       artSet8InstanceFromMterp
+    cbnz     w0, MterpPossibleException
+    ADVANCE  2                          // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_byte: /* 0x5d */
+/* File: arm64/op_iput_byte.S */
+/* File: arm64/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field//CCCC */
+    .extern artSet8InstanceFromMterp
+    EXPORT_PC
+    FETCH    w0, 1                      // w0<- field ref CCCC
+    lsr      w1, wINST, #12             // w1<- B
+    GET_VREG w1, w1                     // w1<- fp[B], the object pointer
+    ubfx     w2, wINST, #8, #4          // w2<- A
+    GET_VREG w2, w2                     // w2<- fp[A]
+    ldr      x3, [xFP, #OFF_FP_METHOD]  // w3<- referrer
+    PREFETCH_INST 2
+    bl       artSet8InstanceFromMterp
+    cbnz     w0, MterpPossibleException
+    ADVANCE  2                          // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_char: /* 0x5e */
+/* File: arm64/op_iput_char.S */
+/* File: arm64/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field//CCCC */
+    .extern artSet16InstanceFromMterp
+    EXPORT_PC
+    FETCH    w0, 1                      // w0<- field ref CCCC
+    lsr      w1, wINST, #12             // w1<- B
+    GET_VREG w1, w1                     // w1<- fp[B], the object pointer
+    ubfx     w2, wINST, #8, #4          // w2<- A
+    GET_VREG w2, w2                     // w2<- fp[A]
+    ldr      x3, [xFP, #OFF_FP_METHOD]  // w3<- referrer
+    PREFETCH_INST 2
+    bl       artSet16InstanceFromMterp
+    cbnz     w0, MterpPossibleException
+    ADVANCE  2                          // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_short: /* 0x5f */
+/* File: arm64/op_iput_short.S */
+/* File: arm64/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field//CCCC */
+    .extern artSet16InstanceFromMterp
+    EXPORT_PC
+    FETCH    w0, 1                      // w0<- field ref CCCC
+    lsr      w1, wINST, #12             // w1<- B
+    GET_VREG w1, w1                     // w1<- fp[B], the object pointer
+    ubfx     w2, wINST, #8, #4          // w2<- A
+    GET_VREG w2, w2                     // w2<- fp[A]
+    ldr      x3, [xFP, #OFF_FP_METHOD]  // w3<- referrer
+    PREFETCH_INST 2
+    bl       artSet16InstanceFromMterp
+    cbnz     w0, MterpPossibleException
+    ADVANCE  2                          // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget: /* 0x60 */
+/* File: arm64/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field//BBBB */
+
+    .extern artGet32StaticFromCode
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- field ref BBBB
+    ldr   x1, [xFP, #OFF_FP_METHOD]
+    mov   x2, xSELF
+    bl    artGet32StaticFromCode
+    ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr   w2, wINST, #8                 // w2<- AA
+    
+    PREFETCH_INST 2
+    cbnz  x3, MterpException            // bail out
+.if 0
+    SET_VREG_OBJECT w0, w2              // fp[AA]<- w0
+.else
+    SET_VREG w0, w2                     // fp[AA]<- w0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_wide: /* 0x61 */
+/* File: arm64/op_sget_wide.S */
+    /*
+     * SGET_WIDE handler wrapper.
+     *
+     */
+    /* sget-wide vAA, field//BBBB */
+
+    .extern artGet64StaticFromCode
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- field ref BBBB
+    ldr   x1, [xFP, #OFF_FP_METHOD]
+    mov   x2, xSELF
+    bl    artGet64StaticFromCode
+    ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr   w4, wINST, #8                 // w4<- AA
+    cbnz  x3, MterpException            // bail out
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    SET_VREG_WIDE x0, w4
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_object: /* 0x62 */
+/* File: arm64/op_sget_object.S */
+/* File: arm64/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field//BBBB */
+
+    .extern artGetObjStaticFromCode
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- field ref BBBB
+    ldr   x1, [xFP, #OFF_FP_METHOD]
+    mov   x2, xSELF
+    bl    artGetObjStaticFromCode
+    ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr   w2, wINST, #8                 // w2<- AA
+    
+    PREFETCH_INST 2
+    cbnz  x3, MterpException            // bail out
+.if 1
+    SET_VREG_OBJECT w0, w2              // fp[AA]<- w0
+.else
+    SET_VREG w0, w2                     // fp[AA]<- w0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_boolean: /* 0x63 */
+/* File: arm64/op_sget_boolean.S */
+/* File: arm64/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field//BBBB */
+
+    .extern artGetBooleanStaticFromCode
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- field ref BBBB
+    ldr   x1, [xFP, #OFF_FP_METHOD]
+    mov   x2, xSELF
+    bl    artGetBooleanStaticFromCode
+    ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr   w2, wINST, #8                 // w2<- AA
+    uxtb w0, w0
+    PREFETCH_INST 2
+    cbnz  x3, MterpException            // bail out
+.if 0
+    SET_VREG_OBJECT w0, w2              // fp[AA]<- w0
+.else
+    SET_VREG w0, w2                     // fp[AA]<- w0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_byte: /* 0x64 */
+/* File: arm64/op_sget_byte.S */
+/* File: arm64/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field//BBBB */
+
+    .extern artGetByteStaticFromCode
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- field ref BBBB
+    ldr   x1, [xFP, #OFF_FP_METHOD]
+    mov   x2, xSELF
+    bl    artGetByteStaticFromCode
+    ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr   w2, wINST, #8                 // w2<- AA
+    sxtb w0, w0
+    PREFETCH_INST 2
+    cbnz  x3, MterpException            // bail out
+.if 0
+    SET_VREG_OBJECT w0, w2              // fp[AA]<- w0
+.else
+    SET_VREG w0, w2                     // fp[AA]<- w0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_char: /* 0x65 */
+/* File: arm64/op_sget_char.S */
+/* File: arm64/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field//BBBB */
+
+    .extern artGetCharStaticFromCode
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- field ref BBBB
+    ldr   x1, [xFP, #OFF_FP_METHOD]
+    mov   x2, xSELF
+    bl    artGetCharStaticFromCode
+    ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr   w2, wINST, #8                 // w2<- AA
+    uxth w0, w0
+    PREFETCH_INST 2
+    cbnz  x3, MterpException            // bail out
+.if 0
+    SET_VREG_OBJECT w0, w2              // fp[AA]<- w0
+.else
+    SET_VREG w0, w2                     // fp[AA]<- w0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_short: /* 0x66 */
+/* File: arm64/op_sget_short.S */
+/* File: arm64/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field//BBBB */
+
+    .extern artGetShortStaticFromCode
+    EXPORT_PC
+    FETCH w0, 1                         // w0<- field ref BBBB
+    ldr   x1, [xFP, #OFF_FP_METHOD]
+    mov   x2, xSELF
+    bl    artGetShortStaticFromCode
+    ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    lsr   w2, wINST, #8                 // w2<- AA
+    sxth w0, w0
+    PREFETCH_INST 2
+    cbnz  x3, MterpException            // bail out
+.if 0
+    SET_VREG_OBJECT w0, w2              // fp[AA]<- w0
+.else
+    SET_VREG w0, w2                     // fp[AA]<- w0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput: /* 0x67 */
+/* File: arm64/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field//BBBB */
+    EXPORT_PC
+    FETCH   w0, 1                       // r0<- field ref BBBB
+    lsr     w3, wINST, #8               // r3<- AA
+    GET_VREG w1, w3                     // r1<= fp[AA]
+    ldr     x2, [xFP, #OFF_FP_METHOD]
+    mov     x3, xSELF
+    PREFETCH_INST 2                     // Get next inst, but don't advance rPC
+    bl      artSet32StaticFromCode
+    cbnz    w0, MterpException          // 0 on success
+    ADVANCE 2                           // Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_wide: /* 0x68 */
+/* File: arm64/op_sput_wide.S */
+    /*
+     * SPUT_WIDE handler wrapper.
+     *
+     */
+    /* sput-wide vAA, field//BBBB */
+    .extern artSet64IndirectStaticFromMterp
+    EXPORT_PC
+    FETCH   w0, 1                       // w0<- field ref BBBB
+    ldr     x1, [xFP, #OFF_FP_METHOD]
+    lsr     w2, wINST, #8               // w3<- AA
+    add     x2, xFP, w2, lsl #2
+    mov     x3, xSELF
+    PREFETCH_INST 2                     // Get next inst, but don't advance rPC
+    bl      artSet64IndirectStaticFromMterp
+    cbnz    w0, MterpException          // 0 on success, -1 on failure
+    ADVANCE 2                           // Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_object: /* 0x69 */
+/* File: arm64/op_sput_object.S */
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     x2, xINST
+    mov     x3, xSELF
+    bl      MterpSputObject
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_boolean: /* 0x6a */
+/* File: arm64/op_sput_boolean.S */
+/* File: arm64/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field//BBBB */
+    EXPORT_PC
+    FETCH   w0, 1                       // r0<- field ref BBBB
+    lsr     w3, wINST, #8               // r3<- AA
+    GET_VREG w1, w3                     // r1<= fp[AA]
+    ldr     x2, [xFP, #OFF_FP_METHOD]
+    mov     x3, xSELF
+    PREFETCH_INST 2                     // Get next inst, but don't advance rPC
+    bl      artSet8StaticFromCode
+    cbnz    w0, MterpException          // 0 on success
+    ADVANCE 2                           // Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_byte: /* 0x6b */
+/* File: arm64/op_sput_byte.S */
+/* File: arm64/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field//BBBB */
+    EXPORT_PC
+    FETCH   w0, 1                       // r0<- field ref BBBB
+    lsr     w3, wINST, #8               // r3<- AA
+    GET_VREG w1, w3                     // r1<= fp[AA]
+    ldr     x2, [xFP, #OFF_FP_METHOD]
+    mov     x3, xSELF
+    PREFETCH_INST 2                     // Get next inst, but don't advance rPC
+    bl      artSet8StaticFromCode
+    cbnz    w0, MterpException          // 0 on success
+    ADVANCE 2                           // Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_char: /* 0x6c */
+/* File: arm64/op_sput_char.S */
+/* File: arm64/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field//BBBB */
+    EXPORT_PC
+    FETCH   w0, 1                       // r0<- field ref BBBB
+    lsr     w3, wINST, #8               // r3<- AA
+    GET_VREG w1, w3                     // r1<= fp[AA]
+    ldr     x2, [xFP, #OFF_FP_METHOD]
+    mov     x3, xSELF
+    PREFETCH_INST 2                     // Get next inst, but don't advance rPC
+    bl      artSet16StaticFromCode
+    cbnz    w0, MterpException          // 0 on success
+    ADVANCE 2                           // Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_short: /* 0x6d */
+/* File: arm64/op_sput_short.S */
+/* File: arm64/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field//BBBB */
+    EXPORT_PC
+    FETCH   w0, 1                       // r0<- field ref BBBB
+    lsr     w3, wINST, #8               // r3<- AA
+    GET_VREG w1, w3                     // r1<= fp[AA]
+    ldr     x2, [xFP, #OFF_FP_METHOD]
+    mov     x3, xSELF
+    PREFETCH_INST 2                     // Get next inst, but don't advance rPC
+    bl      artSet16StaticFromCode
+    cbnz    w0, MterpException          // 0 on success
+    ADVANCE 2                           // Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual: /* 0x6e */
+/* File: arm64/op_invoke_virtual.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtual
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    // and     x3, xINST, 0xFFFF
+    mov     x3, xINST
+    bl      MterpInvokeVirtual
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+    /*
+     * Handle a virtual method call.
+     *
+     * for: invoke-virtual, invoke-virtual/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_super: /* 0x6f */
+/* File: arm64/op_invoke_super.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeSuper
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    // and     x3, xINST, 0xFFFF
+    mov     x3, xINST
+    bl      MterpInvokeSuper
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+    /*
+     * Handle a "super" method call.
+     *
+     * for: invoke-super, invoke-super/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_direct: /* 0x70 */
+/* File: arm64/op_invoke_direct.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeDirect
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    // and     x3, xINST, 0xFFFF
+    mov     x3, xINST
+    bl      MterpInvokeDirect
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_static: /* 0x71 */
+/* File: arm64/op_invoke_static.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeStatic
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    // and     x3, xINST, 0xFFFF
+    mov     x3, xINST
+    bl      MterpInvokeStatic
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_interface: /* 0x72 */
+/* File: arm64/op_invoke_interface.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeInterface
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    // and     x3, xINST, 0xFFFF
+    mov     x3, xINST
+    bl      MterpInvokeInterface
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+    /*
+     * Handle an interface method call.
+     *
+     * for: invoke-interface, invoke-interface/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_void_no_barrier: /* 0x73 */
+/* File: arm64/op_return_void_no_barrier.S */
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    mov     x0, xSELF
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .Lop_return_void_no_barrier_check
+.Lop_return_void_no_barrier_return:
+    mov     x0, #0
+    b       MterpReturn
+.Lop_return_void_no_barrier_check:
+    bl      MterpSuspendCheck           // (self)
+    b       .Lop_return_void_no_barrier_return
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_range: /* 0x74 */
+/* File: arm64/op_invoke_virtual_range.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualRange
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    // and     x3, xINST, 0xFFFF
+    mov     x3, xINST
+    bl      MterpInvokeVirtualRange
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_super_range: /* 0x75 */
+/* File: arm64/op_invoke_super_range.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeSuperRange
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    // and     x3, xINST, 0xFFFF
+    mov     x3, xINST
+    bl      MterpInvokeSuperRange
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_direct_range: /* 0x76 */
+/* File: arm64/op_invoke_direct_range.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeDirectRange
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    // and     x3, xINST, 0xFFFF
+    mov     x3, xINST
+    bl      MterpInvokeDirectRange
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_static_range: /* 0x77 */
+/* File: arm64/op_invoke_static_range.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeStaticRange
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    // and     x3, xINST, 0xFFFF
+    mov     x3, xINST
+    bl      MterpInvokeStaticRange
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_interface_range: /* 0x78 */
+/* File: arm64/op_invoke_interface_range.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeInterfaceRange
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    // and     x3, xINST, 0xFFFF
+    mov     x3, xINST
+    bl      MterpInvokeInterfaceRange
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_79: /* 0x79 */
+/* File: arm64/op_unused_79.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_7a: /* 0x7a */
+/* File: arm64/op_unused_7a.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_int: /* 0x7b */
+/* File: arm64/op_neg_int.S */
+/* File: arm64/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op w0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    GET_VREG w0, w3                     // w0<- vB
+    ubfx    w9, wINST, #8, #4           // w9<- A
+                               // optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    sub     w0, wzr, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_not_int: /* 0x7c */
+/* File: arm64/op_not_int.S */
+/* File: arm64/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op w0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    GET_VREG w0, w3                     // w0<- vB
+    ubfx    w9, wINST, #8, #4           // w9<- A
+                               // optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    mvn     w0, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_long: /* 0x7d */
+/* File: arm64/op_neg_long.S */
+/* File: arm64/unopWide.S */
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op x0".
+     *
+     * For: neg-long, not-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w4, wINST, #8, #4           // w4<- A
+    GET_VREG_WIDE x0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    
+    sub x0, xzr, x0
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x0, w4
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-11 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_not_long: /* 0x7e */
+/* File: arm64/op_not_long.S */
+/* File: arm64/unopWide.S */
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op x0".
+     *
+     * For: neg-long, not-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w4, wINST, #8, #4           // w4<- A
+    GET_VREG_WIDE x0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    
+    mvn     x0, x0
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x0, w4
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-11 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_float: /* 0x7f */
+/* File: arm64/op_neg_float.S */
+/* File: arm64/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op w0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    GET_VREG w0, w3                     // w0<- vB
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    mov w4, #0x80000000                           // optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    add     w0, w0, w4                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_double: /* 0x80 */
+/* File: arm64/op_neg_double.S */
+/* File: arm64/unopWide.S */
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op x0".
+     *
+     * For: neg-long, not-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w4, wINST, #8, #4           // w4<- A
+    GET_VREG_WIDE x0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    mov x1, #0x8000000000000000
+    add     x0, x0, x1
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x0, w4
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-11 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_long: /* 0x81 */
+/* File: arm64/op_int_to_long.S */
+/* File: arm64/funopWider.S */
+    /*
+     * Generic 32bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "x0 = op w0".
+     *
+     * For: int-to-double, float-to-double, float-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w4, wINST, #8               // w4<- A+
+    GET_VREG w0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    and     w4, w4, #15                 // w4<- A
+    sbfm x0, x0, 0, 31                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x0, w4           // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_float: /* 0x82 */
+/* File: arm64/op_int_to_float.S */
+/* File: arm64/funopNarrow.S */
+    /*
+     * Generic 32bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "s0 = op w0".
+     *
+     * For: int-to-float, float-to-int
+     * TODO: refactor all of the conversions - parameterize width and use same template.
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w4, wINST, #8               // w4<- A+
+    GET_VREG w0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    and     w4, w4, #15                 // w4<- A
+    scvtf s0, w0                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG s0, w4                // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_double: /* 0x83 */
+/* File: arm64/op_int_to_double.S */
+/* File: arm64/funopWider.S */
+    /*
+     * Generic 32bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "d0 = op w0".
+     *
+     * For: int-to-double, float-to-double, float-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w4, wINST, #8               // w4<- A+
+    GET_VREG w0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    and     w4, w4, #15                 // w4<- A
+    scvtf d0, w0                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE d0, w4           // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_int: /* 0x84 */
+/* File: arm64/op_long_to_int.S */
+/* File: arm64/funopNarrower.S */
+    /*
+     * Generic 64bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "w0 = op x0".
+     *
+     * For: int-to-double, float-to-double, float-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w4, wINST, #8               // w4<- A+
+    GET_VREG_WIDE x0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    and     w4, w4, #15                 // w4<- A
+                                  // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG w0, w4                // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_float: /* 0x85 */
+/* File: arm64/op_long_to_float.S */
+/* File: arm64/funopNarrower.S */
+    /*
+     * Generic 64bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "s0 = op x0".
+     *
+     * For: int-to-double, float-to-double, float-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w4, wINST, #8               // w4<- A+
+    GET_VREG_WIDE x0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    and     w4, w4, #15                 // w4<- A
+    scvtf s0, x0                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG s0, w4                // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_double: /* 0x86 */
+/* File: arm64/op_long_to_double.S */
+/* File: arm64/funopWide.S */
+    /*
+     * Generic 64bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "d0 = op x0".
+     *
+     * For: long-to-double, double-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w4, wINST, #8               // w4<- A+
+    GET_VREG_WIDE x0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    and     w4, w4, #15                 // w4<- A
+    scvtf d0, x0                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE d0, w4           // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_int: /* 0x87 */
+/* File: arm64/op_float_to_int.S */
+/* File: arm64/funopNarrow.S */
+    /*
+     * Generic 32bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "w0 = op s0".
+     *
+     * For: int-to-float, float-to-int
+     * TODO: refactor all of the conversions - parameterize width and use same template.
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w4, wINST, #8               // w4<- A+
+    GET_VREG s0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    and     w4, w4, #15                 // w4<- A
+    fcvtzs w0, s0                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG w0, w4                // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_long: /* 0x88 */
+/* File: arm64/op_float_to_long.S */
+/* File: arm64/funopWider.S */
+    /*
+     * Generic 32bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "x0 = op s0".
+     *
+     * For: int-to-double, float-to-double, float-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w4, wINST, #8               // w4<- A+
+    GET_VREG s0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    and     w4, w4, #15                 // w4<- A
+    fcvtzs x0, s0                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x0, w4           // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_double: /* 0x89 */
+/* File: arm64/op_float_to_double.S */
+/* File: arm64/funopWider.S */
+    /*
+     * Generic 32bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "d0 = op s0".
+     *
+     * For: int-to-double, float-to-double, float-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w4, wINST, #8               // w4<- A+
+    GET_VREG s0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    and     w4, w4, #15                 // w4<- A
+    fcvt  d0, s0                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE d0, w4           // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_int: /* 0x8a */
+/* File: arm64/op_double_to_int.S */
+/* File: arm64/funopNarrower.S */
+    /*
+     * Generic 64bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "w0 = op d0".
+     *
+     * For: int-to-double, float-to-double, float-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w4, wINST, #8               // w4<- A+
+    GET_VREG_WIDE d0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    and     w4, w4, #15                 // w4<- A
+    fcvtzs w0, d0                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG w0, w4                // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_long: /* 0x8b */
+/* File: arm64/op_double_to_long.S */
+/* File: arm64/funopWide.S */
+    /*
+     * Generic 64bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "x0 = op d0".
+     *
+     * For: long-to-double, double-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w4, wINST, #8               // w4<- A+
+    GET_VREG_WIDE d0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    and     w4, w4, #15                 // w4<- A
+    fcvtzs x0, d0                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x0, w4           // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_float: /* 0x8c */
+/* File: arm64/op_double_to_float.S */
+/* File: arm64/funopNarrower.S */
+    /*
+     * Generic 64bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "s0 = op d0".
+     *
+     * For: int-to-double, float-to-double, float-to-long
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w4, wINST, #8               // w4<- A+
+    GET_VREG_WIDE d0, w3
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    and     w4, w4, #15                 // w4<- A
+    fcvt s0, d0                              // d0<- op
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG s0, w4                // vA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_byte: /* 0x8d */
+/* File: arm64/op_int_to_byte.S */
+/* File: arm64/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op w0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    GET_VREG w0, w3                     // w0<- vB
+    ubfx    w9, wINST, #8, #4           // w9<- A
+                               // optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    sxtb    w0, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_char: /* 0x8e */
+/* File: arm64/op_int_to_char.S */
+/* File: arm64/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op w0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    GET_VREG w0, w3                     // w0<- vB
+    ubfx    w9, wINST, #8, #4           // w9<- A
+                               // optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    uxth    w0, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_short: /* 0x8f */
+/* File: arm64/op_int_to_short.S */
+/* File: arm64/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op w0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    GET_VREG w0, w3                     // w0<- vB
+    ubfx    w9, wINST, #8, #4           // w9<- A
+                               // optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    sxth    w0, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                     // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int: /* 0x90 */
+/* File: arm64/op_add_int.S */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 0
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    add     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_int: /* 0x91 */
+/* File: arm64/op_sub_int.S */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 0
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    sub     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int: /* 0x92 */
+/* File: arm64/op_mul_int.S */
+/* must be "mul w0, w1, w0" -- "w0, w0, w1" is illegal */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 0
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    mul     w0, w1, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int: /* 0x93 */
+/* File: arm64/op_div_int.S */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 1
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    sdiv     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int: /* 0x94 */
+/* File: arm64/op_rem_int.S */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 1
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    sdiv     w2, w0, w1                           // optional op; may set condition codes
+    msub w0, w2, w1, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int: /* 0x95 */
+/* File: arm64/op_and_int.S */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 0
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    and     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int: /* 0x96 */
+/* File: arm64/op_or_int.S */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 0
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    orr     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int: /* 0x97 */
+/* File: arm64/op_xor_int.S */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 0
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    eor     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int: /* 0x98 */
+/* File: arm64/op_shl_int.S */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 0
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    and     w1, w1, #31                           // optional op; may set condition codes
+    lsl     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int: /* 0x99 */
+/* File: arm64/op_shr_int.S */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 0
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    and     w1, w1, #31                           // optional op; may set condition codes
+    asr     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int: /* 0x9a */
+/* File: arm64/op_ushr_int.S */
+/* File: arm64/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w9, wINST, #8               // w9<- AA
+    lsr     w3, w0, #8                  // w3<- CC
+    and     w2, w0, #255                // w2<- BB
+    GET_VREG w1, w3                     // w1<- vCC
+    GET_VREG w0, w2                     // w0<- vBB
+    .if 0
+    cbz     w1, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    and     w1, w1, #31                           // optional op; may set condition codes
+    lsr     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_long: /* 0x9b */
+/* File: arm64/op_add_long.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x2, w2               // w2<- vCC
+    GET_VREG_WIDE x1, w1               // w1<- vBB
+    .if 0
+    cbz     x2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    add x0, x1, x2                              // x0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w4           // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_long: /* 0x9c */
+/* File: arm64/op_sub_long.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x2, w2               // w2<- vCC
+    GET_VREG_WIDE x1, w1               // w1<- vBB
+    .if 0
+    cbz     x2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    sub x0, x1, x2                              // x0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w4           // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_long: /* 0x9d */
+/* File: arm64/op_mul_long.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x2, w2               // w2<- vCC
+    GET_VREG_WIDE x1, w1               // w1<- vBB
+    .if 0
+    cbz     x2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    mul x0, x1, x2                              // x0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w4           // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_long: /* 0x9e */
+/* File: arm64/op_div_long.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x2, w2               // w2<- vCC
+    GET_VREG_WIDE x1, w1               // w1<- vBB
+    .if 1
+    cbz     x2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    sdiv x0, x1, x2                              // x0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w4           // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_long: /* 0x9f */
+/* File: arm64/op_rem_long.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x2, w2               // w2<- vCC
+    GET_VREG_WIDE x1, w1               // w1<- vBB
+    .if 1
+    cbz     x2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    sdiv x3, x1, x2
+    msub x0, x3, x2, x1                              // x0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w4           // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_long: /* 0xa0 */
+/* File: arm64/op_and_long.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x2, w2               // w2<- vCC
+    GET_VREG_WIDE x1, w1               // w1<- vBB
+    .if 0
+    cbz     x2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    and x0, x1, x2                              // x0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w4           // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_long: /* 0xa1 */
+/* File: arm64/op_or_long.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x2, w2               // w2<- vCC
+    GET_VREG_WIDE x1, w1               // w1<- vBB
+    .if 0
+    cbz     x2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    orr x0, x1, x2                              // x0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w4           // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_long: /* 0xa2 */
+/* File: arm64/op_xor_long.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x2, w2               // w2<- vCC
+    GET_VREG_WIDE x1, w1               // w1<- vBB
+    .if 0
+    cbz     x2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    eor x0, x1, x2                              // x0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w4           // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_long: /* 0xa3 */
+/* File: arm64/op_shl_long.S */
+/* File: arm64/shiftWide.S */
+    /*
+     * 64-bit shift operation.
+     *
+     * For: shl-long, shr-long, ushr-long
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr      w3, wINST, #8               // w3<- AA
+    lsr      w2, w0, #8                  // w2<- CC
+    GET_VREG w2, w2                     // w2<- vCC (shift count)
+    and      w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x1, w1                // x1<- vBB
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    and      x2, x2, #63                 // Mask low 6
+    lsl  x0, x1, x2                 // Do the shift.
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w3                // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_long: /* 0xa4 */
+/* File: arm64/op_shr_long.S */
+/* File: arm64/shiftWide.S */
+    /*
+     * 64-bit shift operation.
+     *
+     * For: shl-long, shr-long, ushr-long
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr      w3, wINST, #8               // w3<- AA
+    lsr      w2, w0, #8                  // w2<- CC
+    GET_VREG w2, w2                     // w2<- vCC (shift count)
+    and      w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x1, w1                // x1<- vBB
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    and      x2, x2, #63                 // Mask low 6
+    asr  x0, x1, x2                 // Do the shift.
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w3                // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_long: /* 0xa5 */
+/* File: arm64/op_ushr_long.S */
+/* File: arm64/shiftWide.S */
+    /*
+     * 64-bit shift operation.
+     *
+     * For: shl-long, shr-long, ushr-long
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr      w3, wINST, #8               // w3<- AA
+    lsr      w2, w0, #8                  // w2<- CC
+    GET_VREG w2, w2                     // w2<- vCC (shift count)
+    and      w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE x1, w1                // x1<- vBB
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    and      x2, x2, #63                 // Mask low 6
+    lsr  x0, x1, x2                 // Do the shift.
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w3                // vAA<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_float: /* 0xa6 */
+/* File: arm64/op_add_float.S */
+/* File: arm64/fbinop.S */
+    /*:
+     * Generic 32-bit floating-point operation.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     * form: <op> s0, s0, s1
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH w0, 1                         // r0<- CCBB
+    lsr     w1, w0, #8                  // r2<- CC
+    and     w0, w0, #255                // r1<- BB
+    GET_VREG  s1, w1
+    GET_VREG  s0, w0
+    fadd   s0, s0, s1                              // s0<- op
+    lsr     w1, wINST, #8               // r1<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG  s0, w1
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_float: /* 0xa7 */
+/* File: arm64/op_sub_float.S */
+/* File: arm64/fbinop.S */
+    /*:
+     * Generic 32-bit floating-point operation.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     * form: <op> s0, s0, s1
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH w0, 1                         // r0<- CCBB
+    lsr     w1, w0, #8                  // r2<- CC
+    and     w0, w0, #255                // r1<- BB
+    GET_VREG  s1, w1
+    GET_VREG  s0, w0
+    fsub   s0, s0, s1                              // s0<- op
+    lsr     w1, wINST, #8               // r1<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG  s0, w1
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_float: /* 0xa8 */
+/* File: arm64/op_mul_float.S */
+/* File: arm64/fbinop.S */
+    /*:
+     * Generic 32-bit floating-point operation.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     * form: <op> s0, s0, s1
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH w0, 1                         // r0<- CCBB
+    lsr     w1, w0, #8                  // r2<- CC
+    and     w0, w0, #255                // r1<- BB
+    GET_VREG  s1, w1
+    GET_VREG  s0, w0
+    fmul   s0, s0, s1                              // s0<- op
+    lsr     w1, wINST, #8               // r1<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG  s0, w1
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_float: /* 0xa9 */
+/* File: arm64/op_div_float.S */
+/* File: arm64/fbinop.S */
+    /*:
+     * Generic 32-bit floating-point operation.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     * form: <op> s0, s0, s1
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH w0, 1                         // r0<- CCBB
+    lsr     w1, w0, #8                  // r2<- CC
+    and     w0, w0, #255                // r1<- BB
+    GET_VREG  s1, w1
+    GET_VREG  s0, w0
+    fdiv   s0, s0, s1                              // s0<- op
+    lsr     w1, wINST, #8               // r1<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG  s0, w1
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_float: /* 0xaa */
+/* File: arm64/op_rem_float.S */
+/* EABI doesn't define a float remainder function, but libm does */
+/* File: arm64/fbinop.S */
+    /*:
+     * Generic 32-bit floating-point operation.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     * form: <op> s0, s0, s1
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH w0, 1                         // r0<- CCBB
+    lsr     w1, w0, #8                  // r2<- CC
+    and     w0, w0, #255                // r1<- BB
+    GET_VREG  s1, w1
+    GET_VREG  s0, w0
+    bl      fmodf                              // s0<- op
+    lsr     w1, wINST, #8               // r1<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG  s0, w1
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_double: /* 0xab */
+/* File: arm64/op_add_double.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE d2, w2               // w2<- vCC
+    GET_VREG_WIDE d1, w1               // w1<- vBB
+    .if 0
+    cbz     d2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    fadd d0, d1, d2                              // d0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w4           // vAA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_double: /* 0xac */
+/* File: arm64/op_sub_double.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE d2, w2               // w2<- vCC
+    GET_VREG_WIDE d1, w1               // w1<- vBB
+    .if 0
+    cbz     d2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    fsub d0, d1, d2                              // d0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w4           // vAA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_double: /* 0xad */
+/* File: arm64/op_mul_double.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE d2, w2               // w2<- vCC
+    GET_VREG_WIDE d1, w1               // w1<- vBB
+    .if 0
+    cbz     d2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    fmul d0, d1, d2                              // d0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w4           // vAA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_double: /* 0xae */
+/* File: arm64/op_div_double.S */
+/* File: arm64/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = x1 op x2".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than x0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long, sub-long, mul-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double, rem-double
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE d2, w2               // w2<- vCC
+    GET_VREG_WIDE d1, w1               // w1<- vBB
+    .if 0
+    cbz     d2, common_errDivideByZero  // is second operand zero?
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    fdiv d0, d1, d2                              // d0<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w4           // vAA<- d0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_double: /* 0xaf */
+/* File: arm64/op_rem_double.S */
+    /* rem vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_WIDE d1, w2                // d1<- vCC
+    GET_VREG_WIDE d0, w1                // d0<- vBB
+    bl  fmod
+    lsr     w4, wINST, #8               // w4<- AA
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w4                // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 11-14 instructions */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_2addr: /* 0xb0 */
+/* File: arm64/op_add_int_2addr.S */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    add     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_int_2addr: /* 0xb1 */
+/* File: arm64/op_sub_int_2addr.S */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    sub     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_2addr: /* 0xb2 */
+/* File: arm64/op_mul_int_2addr.S */
+/* must be "mul w0, w1, w0" -- "w0, w0, w1" is illegal */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    mul     w0, w1, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_2addr: /* 0xb3 */
+/* File: arm64/op_div_int_2addr.S */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 1
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    sdiv     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_2addr: /* 0xb4 */
+/* File: arm64/op_rem_int_2addr.S */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 1
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    sdiv     w2, w0, w1                           // optional op; may set condition codes
+    msub w0, w2, w1, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_2addr: /* 0xb5 */
+/* File: arm64/op_and_int_2addr.S */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    and     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_2addr: /* 0xb6 */
+/* File: arm64/op_or_int_2addr.S */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    orr     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_2addr: /* 0xb7 */
+/* File: arm64/op_xor_int_2addr.S */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    eor     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int_2addr: /* 0xb8 */
+/* File: arm64/op_shl_int_2addr.S */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    and     w1, w1, #31                           // optional op; may set condition codes
+    lsl     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int_2addr: /* 0xb9 */
+/* File: arm64/op_shr_int_2addr.S */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    and     w1, w1, #31                           // optional op; may set condition codes
+    asr     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int_2addr: /* 0xba */
+/* File: arm64/op_ushr_int_2addr.S */
+/* File: arm64/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w1, w3                     // w1<- vB
+    GET_VREG w0, w9                     // w0<- vA
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    and     w1, w1, #31                           // optional op; may set condition codes
+    lsr     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_long_2addr: /* 0xbb */
+/* File: arm64/op_add_long_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE x1, w1               // x1<- vB
+    GET_VREG_WIDE x0, w2               // x0<- vA
+    .if 0
+    cbz     x1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    add     x0, x0, x1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_long_2addr: /* 0xbc */
+/* File: arm64/op_sub_long_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE x1, w1               // x1<- vB
+    GET_VREG_WIDE x0, w2               // x0<- vA
+    .if 0
+    cbz     x1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    sub     x0, x0, x1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_long_2addr: /* 0xbd */
+/* File: arm64/op_mul_long_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE x1, w1               // x1<- vB
+    GET_VREG_WIDE x0, w2               // x0<- vA
+    .if 0
+    cbz     x1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    mul     x0, x0, x1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_long_2addr: /* 0xbe */
+/* File: arm64/op_div_long_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE x1, w1               // x1<- vB
+    GET_VREG_WIDE x0, w2               // x0<- vA
+    .if 1
+    cbz     x1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    sdiv     x0, x0, x1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_long_2addr: /* 0xbf */
+/* File: arm64/op_rem_long_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE x1, w1               // x1<- vB
+    GET_VREG_WIDE x0, w2               // x0<- vA
+    .if 1
+    cbz     x1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    sdiv x3, x0, x1
+    msub x0, x3, x1, x0                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_long_2addr: /* 0xc0 */
+/* File: arm64/op_and_long_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE x1, w1               // x1<- vB
+    GET_VREG_WIDE x0, w2               // x0<- vA
+    .if 0
+    cbz     x1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    and     x0, x0, x1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_long_2addr: /* 0xc1 */
+/* File: arm64/op_or_long_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE x1, w1               // x1<- vB
+    GET_VREG_WIDE x0, w2               // x0<- vA
+    .if 0
+    cbz     x1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    orr     x0, x0, x1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_long_2addr: /* 0xc2 */
+/* File: arm64/op_xor_long_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE x1, w1               // x1<- vB
+    GET_VREG_WIDE x0, w2               // x0<- vA
+    .if 0
+    cbz     x1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    eor     x0, x0, x1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_long_2addr: /* 0xc3 */
+/* File: arm64/op_shl_long_2addr.S */
+/* File: arm64/shiftWide2addr.S */
+    /*
+     * Generic 64-bit shift operation.
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG w1, w1                     // x1<- vB
+    GET_VREG_WIDE x0, w2                // x0<- vA
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    and     x1, x1, #63                 // Mask low 6 bits.
+    lsl x0, x0, x1
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_long_2addr: /* 0xc4 */
+/* File: arm64/op_shr_long_2addr.S */
+/* File: arm64/shiftWide2addr.S */
+    /*
+     * Generic 64-bit shift operation.
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG w1, w1                     // x1<- vB
+    GET_VREG_WIDE x0, w2                // x0<- vA
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    and     x1, x1, #63                 // Mask low 6 bits.
+    asr x0, x0, x1
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_long_2addr: /* 0xc5 */
+/* File: arm64/op_ushr_long_2addr.S */
+/* File: arm64/shiftWide2addr.S */
+    /*
+     * Generic 64-bit shift operation.
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG w1, w1                     // x1<- vB
+    GET_VREG_WIDE x0, w2                // x0<- vA
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    and     x1, x1, #63                 // Mask low 6 bits.
+    lsr x0, x0, x1
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE x0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_float_2addr: /* 0xc6 */
+/* File: arm64/op_add_float_2addr.S */
+/* File: arm64/fbinop2addr.S */
+    /*
+     * Generic 32-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "s2 = s0 op s1".
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w9, wINST, #8               // w9<- A+
+    and     w9, w9, #15                 // w9<- A
+    GET_VREG s1, w3
+    GET_VREG s0, w9
+    fadd   s2, s0, s1                              // s2<- op
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG s2, w9
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_float_2addr: /* 0xc7 */
+/* File: arm64/op_sub_float_2addr.S */
+/* File: arm64/fbinop2addr.S */
+    /*
+     * Generic 32-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "s2 = s0 op s1".
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w9, wINST, #8               // w9<- A+
+    and     w9, w9, #15                 // w9<- A
+    GET_VREG s1, w3
+    GET_VREG s0, w9
+    fsub   s2, s0, s1                              // s2<- op
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG s2, w9
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_float_2addr: /* 0xc8 */
+/* File: arm64/op_mul_float_2addr.S */
+/* File: arm64/fbinop2addr.S */
+    /*
+     * Generic 32-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "s2 = s0 op s1".
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w9, wINST, #8               // w9<- A+
+    and     w9, w9, #15                 // w9<- A
+    GET_VREG s1, w3
+    GET_VREG s0, w9
+    fmul   s2, s0, s1                              // s2<- op
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG s2, w9
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_float_2addr: /* 0xc9 */
+/* File: arm64/op_div_float_2addr.S */
+/* File: arm64/fbinop2addr.S */
+    /*
+     * Generic 32-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "s2 = s0 op s1".
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w9, wINST, #8               // w9<- A+
+    and     w9, w9, #15                 // w9<- A
+    GET_VREG s1, w3
+    GET_VREG s0, w9
+    fdiv   s2, s0, s1                              // s2<- op
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG s2, w9
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_float_2addr: /* 0xca */
+/* File: arm64/op_rem_float_2addr.S */
+    /* rem vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    lsr     w9, wINST, #8               // w9<- A+
+    and     w9, w9, #15                 // w9<- A
+    GET_VREG s1, w3
+    GET_VREG s0, w9
+    bl  fmodf
+    lsr     w9, wINST, #8               // w9<- A+
+    and     w9, w9, #15                 // w9<- A
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG s0, w9
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_double_2addr: /* 0xcb */
+/* File: arm64/op_add_double_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE d1, w1               // x1<- vB
+    GET_VREG_WIDE d0, w2               // x0<- vA
+    .if 0
+    cbz     d1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    fadd     d0, d0, d1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_double_2addr: /* 0xcc */
+/* File: arm64/op_sub_double_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE d1, w1               // x1<- vB
+    GET_VREG_WIDE d0, w2               // x0<- vA
+    .if 0
+    cbz     d1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    fsub     d0, d0, d1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_double_2addr: /* 0xcd */
+/* File: arm64/op_mul_double_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE d1, w1               // x1<- vB
+    GET_VREG_WIDE d0, w2               // x0<- vA
+    .if 0
+    cbz     d1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    fmul     d0, d0, d1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_double_2addr: /* 0xce */
+/* File: arm64/op_div_double_2addr.S */
+/* File: arm64/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "x0 = x0 op x1".
+     * This must not be a function call, as we keep w2 live across it.
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, mul-long/2addr, div-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr,
+     *      shl-long/2addr, shr-long/2addr, ushr-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr, rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE d1, w1               // x1<- vB
+    GET_VREG_WIDE d0, w2               // x0<- vA
+    .if 0
+    cbz     d1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    
+    fdiv     d0, d0, d1                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w2               // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_double_2addr: /* 0xcf */
+/* File: arm64/op_rem_double_2addr.S */
+    /* rem vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_WIDE d1, w1                // d1<- vB
+    GET_VREG_WIDE d0, w2                // d0<- vA
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    bl fmod
+    ubfx    w2, wINST, #8, #4           // w2<- A (need to reload - killed across call)
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_WIDE d0, w2                // vAA<- result
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_lit16: /* 0xd0 */
+/* File: arm64/op_add_int_lit16.S */
+/* File: arm64/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S w1, 1                       // w1<- ssssCCCC (sign-extended)
+    lsr     w2, wINST, #12              // w2<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w0, w2                     // w0<- vB
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    add     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rsub_int: /* 0xd1 */
+/* File: arm64/op_rsub_int.S */
+/* this op is "rsub-int", but can be thought of as "rsub-int/lit16" */
+/* File: arm64/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S w1, 1                       // w1<- ssssCCCC (sign-extended)
+    lsr     w2, wINST, #12              // w2<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w0, w2                     // w0<- vB
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    sub     w0, w1, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_lit16: /* 0xd2 */
+/* File: arm64/op_mul_int_lit16.S */
+/* must be "mul w0, w1, w0" -- "w0, w0, w1" is illegal */
+/* File: arm64/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S w1, 1                       // w1<- ssssCCCC (sign-extended)
+    lsr     w2, wINST, #12              // w2<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w0, w2                     // w0<- vB
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    mul     w0, w1, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_lit16: /* 0xd3 */
+/* File: arm64/op_div_int_lit16.S */
+/* File: arm64/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S w1, 1                       // w1<- ssssCCCC (sign-extended)
+    lsr     w2, wINST, #12              // w2<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w0, w2                     // w0<- vB
+    .if 1
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    sdiv w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_lit16: /* 0xd4 */
+/* File: arm64/op_rem_int_lit16.S */
+/* File: arm64/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S w1, 1                       // w1<- ssssCCCC (sign-extended)
+    lsr     w2, wINST, #12              // w2<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w0, w2                     // w0<- vB
+    .if 1
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    sdiv w3, w0, w1
+    msub w0, w3, w1, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_lit16: /* 0xd5 */
+/* File: arm64/op_and_int_lit16.S */
+/* File: arm64/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S w1, 1                       // w1<- ssssCCCC (sign-extended)
+    lsr     w2, wINST, #12              // w2<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w0, w2                     // w0<- vB
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    and     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_lit16: /* 0xd6 */
+/* File: arm64/op_or_int_lit16.S */
+/* File: arm64/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S w1, 1                       // w1<- ssssCCCC (sign-extended)
+    lsr     w2, wINST, #12              // w2<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w0, w2                     // w0<- vB
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    orr     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_lit16: /* 0xd7 */
+/* File: arm64/op_xor_int_lit16.S */
+/* File: arm64/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S w1, 1                       // w1<- ssssCCCC (sign-extended)
+    lsr     w2, wINST, #12              // w2<- B
+    ubfx    w9, wINST, #8, #4           // w9<- A
+    GET_VREG w0, w2                     // w0<- vB
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    eor     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_lit8: /* 0xd8 */
+/* File: arm64/op_add_int_lit8.S */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    add     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rsub_int_lit8: /* 0xd9 */
+/* File: arm64/op_rsub_int_lit8.S */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    sub     w0, w1, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_lit8: /* 0xda */
+/* File: arm64/op_mul_int_lit8.S */
+/* must be "mul w0, w1, w0" -- "w0, w0, w1" is illegal */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    mul     w0, w1, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_lit8: /* 0xdb */
+/* File: arm64/op_div_int_lit8.S */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    .if 1
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    sdiv     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_lit8: /* 0xdc */
+/* File: arm64/op_rem_int_lit8.S */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    .if 1
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    sdiv w3, w0, w1                           // optional op; may set condition codes
+    msub w0, w3, w1, w0                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_lit8: /* 0xdd */
+/* File: arm64/op_and_int_lit8.S */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    and     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_lit8: /* 0xde */
+/* File: arm64/op_or_int_lit8.S */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    orr     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_lit8: /* 0xdf */
+/* File: arm64/op_xor_int_lit8.S */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+                               // optional op; may set condition codes
+    eor     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int_lit8: /* 0xe0 */
+/* File: arm64/op_shl_int_lit8.S */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    and     w1, w1, #31                           // optional op; may set condition codes
+    lsl     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int_lit8: /* 0xe1 */
+/* File: arm64/op_shr_int_lit8.S */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    and     w1, w1, #31                           // optional op; may set condition codes
+    asr     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int_lit8: /* 0xe2 */
+/* File: arm64/op_ushr_int_lit8.S */
+/* File: arm64/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = w0 op w1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than w0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (w1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    lsr     w9, wINST, #8               // w9<- AA
+    and     w2, w3, #255                // w2<- BB
+    GET_VREG w0, w2                     // w0<- vBB
+    asr    w1, w3, #8                   // w1<- ssssssCC (sign extended)
+    .if 0
+    cbz     w1, common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    and     w1, w1, #31                           // optional op; may set condition codes
+    lsr     w0, w0, w1                              // w0<- op, w0-w3 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG w0, w9                // vAA<- w0
+    GOTO_OPCODE ip                      // jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_quick: /* 0xe3 */
+/* File: arm64/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- object we're operating on
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cmp     x3, #0                      // check object for null
+    beq     common_errNullObject        // object was null
+    ldr   w0, [x3, x1]                // w0<- obj.field
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    SET_VREG w0, w2                     // fp[A]<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_wide_quick: /* 0xe4 */
+/* File: arm64/op_iget_wide_quick.S */
+    /* iget-wide-quick vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w4, 1                         // w4<- field byte offset
+    GET_VREG w3, w2                     // w3<- object we're operating on
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cbz     w3, common_errNullObject        // object was null
+    add     x4, x3, x4                  // create direct pointer
+    ldr     x0, [x4]
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    SET_VREG_WIDE x0, w2
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_object_quick: /* 0xe5 */
+/* File: arm64/op_iget_object_quick.S */
+    /* For: iget-object-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    EXPORT_PC
+    GET_VREG w0, w2                     // w0<- object we're operating on
+    bl      artIGetObjectFromMterp      // (obj, offset)
+    ldr     x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    PREFETCH_INST 2
+    cbnz    w3, MterpPossibleException      // bail out
+    SET_VREG_OBJECT w0, w2              // fp[A]<- w0
+    ADVANCE 2                           // advance rPC
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_quick: /* 0xe6 */
+/* File: arm64/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- fp[B], the object pointer
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cmp     w3, #0                      // check object for null
+    cbz     w3, common_errNullObject    // object was null
+    GET_VREG w0, w2                     // w0<- fp[A]
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    str     w0, [x3, x1]             // obj.field<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_wide_quick: /* 0xe7 */
+/* File: arm64/op_iput_wide_quick.S */
+    /* iput-wide-quick vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w3, 1                         // w3<- field byte offset
+    GET_VREG w2, w2                     // w2<- fp[B], the object pointer
+    ubfx    w0, wINST, #8, #4           // w0<- A
+    cmp     w2, #0                      // check object for null
+    beq     common_errNullObject        // object was null
+    GET_VREG_WIDE x0, w0                // x0-< fp[A]
+    FETCH_ADVANCE_INST 2                // advance rPC, load wINST
+    add     x1, x2, x3                  // create a direct pointer
+    str     x0, [x1]
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_object_quick: /* 0xe8 */
+/* File: arm64/op_iput_object_quick.S */
+    EXPORT_PC
+    add     x0, xFP, #OFF_FP_SHADOWFRAME
+    mov     x1, xPC
+    mov     w2, wINST
+    bl      MterpIputObjectQuick
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_quick: /* 0xe9 */
+/* File: arm64/op_invoke_virtual_quick.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualQuick
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    // and     x3, xINST, 0xFFFF
+    mov     x3, xINST
+    bl      MterpInvokeVirtualQuick
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_range_quick: /* 0xea */
+/* File: arm64/op_invoke_virtual_range_quick.S */
+/* File: arm64/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualQuickRange
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xPC
+    // and     x3, xINST, 0xFFFF
+    mov     x3, xINST
+    bl      MterpInvokeVirtualQuickRange
+    cbz     w0, MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_boolean_quick: /* 0xeb */
+/* File: arm64/op_iput_boolean_quick.S */
+/* File: arm64/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- fp[B], the object pointer
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cmp     w3, #0                      // check object for null
+    cbz     w3, common_errNullObject    // object was null
+    GET_VREG w0, w2                     // w0<- fp[A]
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    strb     w0, [x3, x1]             // obj.field<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_byte_quick: /* 0xec */
+/* File: arm64/op_iput_byte_quick.S */
+/* File: arm64/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- fp[B], the object pointer
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cmp     w3, #0                      // check object for null
+    cbz     w3, common_errNullObject    // object was null
+    GET_VREG w0, w2                     // w0<- fp[A]
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    strb     w0, [x3, x1]             // obj.field<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_char_quick: /* 0xed */
+/* File: arm64/op_iput_char_quick.S */
+/* File: arm64/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- fp[B], the object pointer
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cmp     w3, #0                      // check object for null
+    cbz     w3, common_errNullObject    // object was null
+    GET_VREG w0, w2                     // w0<- fp[A]
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    strh     w0, [x3, x1]             // obj.field<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_short_quick: /* 0xee */
+/* File: arm64/op_iput_short_quick.S */
+/* File: arm64/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- fp[B], the object pointer
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cmp     w3, #0                      // check object for null
+    cbz     w3, common_errNullObject    // object was null
+    GET_VREG w0, w2                     // w0<- fp[A]
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    strh     w0, [x3, x1]             // obj.field<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_boolean_quick: /* 0xef */
+/* File: arm64/op_iget_boolean_quick.S */
+/* File: arm64/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- object we're operating on
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cmp     x3, #0                      // check object for null
+    beq     common_errNullObject        // object was null
+    ldrb   w0, [x3, x1]                // w0<- obj.field
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    SET_VREG w0, w2                     // fp[A]<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_byte_quick: /* 0xf0 */
+/* File: arm64/op_iget_byte_quick.S */
+/* File: arm64/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- object we're operating on
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cmp     x3, #0                      // check object for null
+    beq     common_errNullObject        // object was null
+    ldrsb   w0, [x3, x1]                // w0<- obj.field
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    SET_VREG w0, w2                     // fp[A]<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_char_quick: /* 0xf1 */
+/* File: arm64/op_iget_char_quick.S */
+/* File: arm64/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- object we're operating on
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cmp     x3, #0                      // check object for null
+    beq     common_errNullObject        // object was null
+    ldrh   w0, [x3, x1]                // w0<- obj.field
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    SET_VREG w0, w2                     // fp[A]<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_short_quick: /* 0xf2 */
+/* File: arm64/op_iget_short_quick.S */
+/* File: arm64/op_iget_quick.S */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
+    /* op vA, vB, offset//CCCC */
+    lsr     w2, wINST, #12              // w2<- B
+    FETCH w1, 1                         // w1<- field byte offset
+    GET_VREG w3, w2                     // w3<- object we're operating on
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    cmp     x3, #0                      // check object for null
+    beq     common_errNullObject        // object was null
+    ldrsh   w0, [x3, x1]                // w0<- obj.field
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    
+    SET_VREG w0, w2                     // fp[A]<- w0
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_lambda: /* 0xf3 */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f4: /* 0xf4 */
+/* File: arm64/op_unused_f4.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_capture_variable: /* 0xf5 */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_create_lambda: /* 0xf6 */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_liberate_variable: /* 0xf7 */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_box_lambda: /* 0xf8 */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unbox_lambda: /* 0xf9 */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fa: /* 0xfa */
+/* File: arm64/op_unused_fa.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fb: /* 0xfb */
+/* File: arm64/op_unused_fb.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fc: /* 0xfc */
+/* File: arm64/op_unused_fc.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fd: /* 0xfd */
+/* File: arm64/op_unused_fd.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fe: /* 0xfe */
+/* File: arm64/op_unused_fe.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_ff: /* 0xff */
+/* File: arm64/op_unused_ff.S */
+/* File: arm64/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+    .balign 128
+    .size   artMterpAsmInstructionStart, .-artMterpAsmInstructionStart
+    .global artMterpAsmInstructionEnd
+artMterpAsmInstructionEnd:
+
+/*
+ * ===========================================================================
+ *  Sister implementations
+ * ===========================================================================
+ */
+    .global artMterpAsmSisterStart
+    .type   artMterpAsmSisterStart, %function
+    .text
+    .balign 4
+artMterpAsmSisterStart:
+
+    .size   artMterpAsmSisterStart, .-artMterpAsmSisterStart
+    .global artMterpAsmSisterEnd
+artMterpAsmSisterEnd:
+
+
+    .global artMterpAsmAltInstructionStart
+    .type   artMterpAsmAltInstructionStart, %function
+    .text
+
+artMterpAsmAltInstructionStart = .L_ALT_op_nop
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_nop: /* 0x00 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (0 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move: /* 0x01 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (1 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_from16: /* 0x02 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (2 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_16: /* 0x03 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (3 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide: /* 0x04 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (4 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide_from16: /* 0x05 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (5 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide_16: /* 0x06 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (6 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object: /* 0x07 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (7 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object_from16: /* 0x08 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (8 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object_16: /* 0x09 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (9 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result: /* 0x0a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (10 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result_wide: /* 0x0b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (11 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result_object: /* 0x0c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (12 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_exception: /* 0x0d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (13 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_void: /* 0x0e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (14 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return: /* 0x0f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (15 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_wide: /* 0x10 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (16 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_object: /* 0x11 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (17 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_4: /* 0x12 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (18 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_16: /* 0x13 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (19 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const: /* 0x14 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (20 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_high16: /* 0x15 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (21 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_16: /* 0x16 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (22 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_32: /* 0x17 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (23 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide: /* 0x18 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (24 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_high16: /* 0x19 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (25 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_string: /* 0x1a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (26 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_string_jumbo: /* 0x1b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (27 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_class: /* 0x1c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (28 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_monitor_enter: /* 0x1d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (29 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_monitor_exit: /* 0x1e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (30 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_check_cast: /* 0x1f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (31 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_instance_of: /* 0x20 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (32 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_array_length: /* 0x21 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (33 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_new_instance: /* 0x22 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (34 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_new_array: /* 0x23 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (35 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_filled_new_array: /* 0x24 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (36 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_filled_new_array_range: /* 0x25 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (37 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_fill_array_data: /* 0x26 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (38 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_throw: /* 0x27 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (39 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto: /* 0x28 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (40 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto_16: /* 0x29 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (41 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto_32: /* 0x2a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (42 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_packed_switch: /* 0x2b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (43 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sparse_switch: /* 0x2c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (44 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpl_float: /* 0x2d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (45 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpg_float: /* 0x2e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (46 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpl_double: /* 0x2f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (47 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpg_double: /* 0x30 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (48 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmp_long: /* 0x31 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (49 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_eq: /* 0x32 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (50 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ne: /* 0x33 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (51 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_lt: /* 0x34 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (52 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ge: /* 0x35 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (53 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gt: /* 0x36 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (54 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_le: /* 0x37 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (55 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_eqz: /* 0x38 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (56 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_nez: /* 0x39 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (57 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ltz: /* 0x3a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (58 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gez: /* 0x3b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (59 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gtz: /* 0x3c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (60 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_lez: /* 0x3d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (61 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_3e: /* 0x3e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (62 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_3f: /* 0x3f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (63 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_40: /* 0x40 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (64 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_41: /* 0x41 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (65 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_42: /* 0x42 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (66 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_43: /* 0x43 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (67 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget: /* 0x44 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (68 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_wide: /* 0x45 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (69 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_object: /* 0x46 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (70 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_boolean: /* 0x47 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (71 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_byte: /* 0x48 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (72 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_char: /* 0x49 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (73 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_short: /* 0x4a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (74 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput: /* 0x4b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (75 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_wide: /* 0x4c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (76 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_object: /* 0x4d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (77 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_boolean: /* 0x4e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (78 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_byte: /* 0x4f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (79 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_char: /* 0x50 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (80 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_short: /* 0x51 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (81 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget: /* 0x52 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (82 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_wide: /* 0x53 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (83 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_object: /* 0x54 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (84 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_boolean: /* 0x55 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (85 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_byte: /* 0x56 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (86 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_char: /* 0x57 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (87 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_short: /* 0x58 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (88 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput: /* 0x59 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (89 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_wide: /* 0x5a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (90 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_object: /* 0x5b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (91 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_boolean: /* 0x5c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (92 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_byte: /* 0x5d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (93 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_char: /* 0x5e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (94 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_short: /* 0x5f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (95 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget: /* 0x60 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (96 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_wide: /* 0x61 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (97 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_object: /* 0x62 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (98 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_boolean: /* 0x63 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (99 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_byte: /* 0x64 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (100 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_char: /* 0x65 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (101 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_short: /* 0x66 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (102 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput: /* 0x67 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (103 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_wide: /* 0x68 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (104 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_object: /* 0x69 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (105 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_boolean: /* 0x6a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (106 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_byte: /* 0x6b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (107 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_char: /* 0x6c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (108 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_short: /* 0x6d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (109 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual: /* 0x6e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (110 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_super: /* 0x6f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (111 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_direct: /* 0x70 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (112 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_static: /* 0x71 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (113 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_interface: /* 0x72 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (114 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_void_no_barrier: /* 0x73 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (115 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_range: /* 0x74 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (116 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_super_range: /* 0x75 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (117 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_direct_range: /* 0x76 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (118 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_static_range: /* 0x77 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (119 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_interface_range: /* 0x78 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (120 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_79: /* 0x79 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (121 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_7a: /* 0x7a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (122 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_int: /* 0x7b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (123 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_not_int: /* 0x7c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (124 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_long: /* 0x7d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (125 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_not_long: /* 0x7e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (126 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_float: /* 0x7f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (127 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_double: /* 0x80 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (128 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_long: /* 0x81 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (129 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_float: /* 0x82 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (130 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_double: /* 0x83 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (131 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_int: /* 0x84 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (132 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_float: /* 0x85 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (133 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_double: /* 0x86 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (134 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_int: /* 0x87 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (135 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_long: /* 0x88 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (136 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_double: /* 0x89 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (137 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_int: /* 0x8a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (138 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_long: /* 0x8b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (139 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_float: /* 0x8c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (140 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_byte: /* 0x8d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (141 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_char: /* 0x8e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (142 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_short: /* 0x8f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (143 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int: /* 0x90 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (144 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_int: /* 0x91 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (145 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int: /* 0x92 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (146 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int: /* 0x93 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (147 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int: /* 0x94 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (148 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int: /* 0x95 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (149 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int: /* 0x96 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (150 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int: /* 0x97 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (151 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int: /* 0x98 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (152 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int: /* 0x99 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (153 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int: /* 0x9a */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (154 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_long: /* 0x9b */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (155 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_long: /* 0x9c */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (156 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_long: /* 0x9d */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (157 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_long: /* 0x9e */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (158 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_long: /* 0x9f */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (159 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_long: /* 0xa0 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (160 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_long: /* 0xa1 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (161 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_long: /* 0xa2 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (162 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_long: /* 0xa3 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (163 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_long: /* 0xa4 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (164 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_long: /* 0xa5 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (165 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_float: /* 0xa6 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (166 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_float: /* 0xa7 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (167 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_float: /* 0xa8 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (168 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_float: /* 0xa9 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (169 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_float: /* 0xaa */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (170 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_double: /* 0xab */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (171 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_double: /* 0xac */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (172 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_double: /* 0xad */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (173 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_double: /* 0xae */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (174 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_double: /* 0xaf */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (175 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_2addr: /* 0xb0 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (176 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_int_2addr: /* 0xb1 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (177 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_2addr: /* 0xb2 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (178 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_2addr: /* 0xb3 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (179 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_2addr: /* 0xb4 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (180 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_2addr: /* 0xb5 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (181 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_2addr: /* 0xb6 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (182 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_2addr: /* 0xb7 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (183 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int_2addr: /* 0xb8 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (184 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int_2addr: /* 0xb9 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (185 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int_2addr: /* 0xba */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (186 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_long_2addr: /* 0xbb */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (187 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_long_2addr: /* 0xbc */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (188 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_long_2addr: /* 0xbd */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (189 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_long_2addr: /* 0xbe */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (190 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_long_2addr: /* 0xbf */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (191 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_long_2addr: /* 0xc0 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (192 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_long_2addr: /* 0xc1 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (193 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_long_2addr: /* 0xc2 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (194 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_long_2addr: /* 0xc3 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (195 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_long_2addr: /* 0xc4 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (196 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_long_2addr: /* 0xc5 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (197 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_float_2addr: /* 0xc6 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (198 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_float_2addr: /* 0xc7 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (199 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_float_2addr: /* 0xc8 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (200 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_float_2addr: /* 0xc9 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (201 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_float_2addr: /* 0xca */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (202 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_double_2addr: /* 0xcb */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (203 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_double_2addr: /* 0xcc */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (204 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_double_2addr: /* 0xcd */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (205 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_double_2addr: /* 0xce */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (206 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_double_2addr: /* 0xcf */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (207 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_lit16: /* 0xd0 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (208 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rsub_int: /* 0xd1 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (209 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_lit16: /* 0xd2 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (210 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_lit16: /* 0xd3 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (211 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_lit16: /* 0xd4 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (212 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_lit16: /* 0xd5 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (213 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_lit16: /* 0xd6 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (214 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_lit16: /* 0xd7 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (215 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_lit8: /* 0xd8 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (216 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rsub_int_lit8: /* 0xd9 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (217 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_lit8: /* 0xda */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (218 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_lit8: /* 0xdb */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (219 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_lit8: /* 0xdc */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (220 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_lit8: /* 0xdd */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (221 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_lit8: /* 0xde */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (222 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_lit8: /* 0xdf */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (223 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int_lit8: /* 0xe0 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (224 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int_lit8: /* 0xe1 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (225 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int_lit8: /* 0xe2 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (226 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_quick: /* 0xe3 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (227 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_wide_quick: /* 0xe4 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (228 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_object_quick: /* 0xe5 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (229 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_quick: /* 0xe6 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (230 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_wide_quick: /* 0xe7 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (231 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_object_quick: /* 0xe8 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (232 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_quick: /* 0xe9 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (233 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_range_quick: /* 0xea */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (234 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_boolean_quick: /* 0xeb */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (235 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_byte_quick: /* 0xec */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (236 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_char_quick: /* 0xed */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (237 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_short_quick: /* 0xee */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (238 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_boolean_quick: /* 0xef */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (239 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_byte_quick: /* 0xf0 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (240 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_char_quick: /* 0xf1 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (241 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_short_quick: /* 0xf2 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (242 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_lambda: /* 0xf3 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (243 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f4: /* 0xf4 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (244 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_capture_variable: /* 0xf5 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (245 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_create_lambda: /* 0xf6 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (246 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_liberate_variable: /* 0xf7 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (247 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_box_lambda: /* 0xf8 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (248 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unbox_lambda: /* 0xf9 */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (249 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fa: /* 0xfa */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (250 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fb: /* 0xfb */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (251 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fc: /* 0xfc */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (252 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fd: /* 0xfd */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (253 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fe: /* 0xfe */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (254 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_ff: /* 0xff */
+/* File: arm64/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]            // refresh IBASE.
+    adr    lr, artMterpAsmInstructionStart + (255 * 128)       // Addr of primary handler.
+    mov    x0, xSELF
+    add    x1, xFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     // (self, shadow_frame) Note: tail call.
+
+    .balign 128
+    .size   artMterpAsmAltInstructionStart, .-artMterpAsmAltInstructionStart
+    .global artMterpAsmAltInstructionEnd
+artMterpAsmAltInstructionEnd:
+/* File: arm64/footer.S */
+/*
+ * ===========================================================================
+ *  Common subroutines and data
+ * ===========================================================================
+ */
+
+
+/*
+ * We've detected a condition that will result in an exception, but the exception
+ * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
+ * TUNING: for consistency, we may want to just go ahead and handle these here.
+ */
+#define MTERP_LOGGING 0
+common_errDivideByZero:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogDivideByZeroException
+#endif
+    b MterpCommonFallback
+
+common_errArrayIndex:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogArrayIndexException
+#endif
+    b MterpCommonFallback
+
+common_errNegativeArraySize:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNegativeArraySizeException
+#endif
+    b MterpCommonFallback
+
+common_errNoSuchMethod:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNoSuchMethodException
+#endif
+    b MterpCommonFallback
+
+common_errNullObject:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNullObjectException
+#endif
+    b MterpCommonFallback
+
+common_exceptionThrown:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogExceptionThrownException
+#endif
+    b MterpCommonFallback
+
+MterpSuspendFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    ldr  x2, [xSELF, #THREAD_FLAGS_OFFSET]
+    bl MterpLogSuspendFallback
+#endif
+    b MterpCommonFallback
+
+/*
+ * If we're here, something is out of the ordinary.  If there is a pending
+ * exception, handle it.  Otherwise, roll back and retry with the reference
+ * interpreter.
+ */
+MterpPossibleException:
+    ldr     x0, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    cbz     x0, MterpFallback                       // If not, fall back to reference interpreter.
+    /* intentional fallthrough - handle pending exception. */
+/*
+ * On return from a runtime helper routine, we've found a pending exception.
+ * Can we handle it here - or need to bail out to caller?
+ *
+ */
+MterpException:
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    bl      MterpHandleException                    // (self, shadow_frame)
+    cbz     w0, MterpExceptionReturn                // no local catch, back to caller.
+    ldr     x0, [xFP, #OFF_FP_CODE_ITEM]
+    ldr     w1, [xFP, #OFF_FP_DEX_PC]
+    ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
+    add     xPC, x0, #CODEITEM_INSNS_OFFSET
+    add     xPC, xPC, x1, lsl #1                    // generate new dex_pc_ptr
+    str     xPC, [xFP, #OFF_FP_DEX_PC_PTR]
+    /* resume execution at catch block */
+    FETCH_INST
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+    /* NOTE: no fallthrough */
+
+/*
+ * Check for suspend check request.  Assumes wINST already loaded, xPC advanced and
+ * still needs to get the opcode and branch to it, and flags are in lr.
+ */
+MterpCheckSuspendAndContinue:
+    ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh xIBASE
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    check1
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+check1:
+    EXPORT_PC
+    mov     x0, xSELF
+    bl      MterpSuspendCheck           // (self)
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/*
+ * On-stack replacement pending.
+ * Branch offset in wINST on entry.
+ */
+MterpOnStackReplacement:
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm x2, xINST, 0, 31
+    bl MterpLogOSR
+#endif
+    b MterpFallback                     // Let the reference interpreter deal with it.
+
+/*
+ * Bail out to reference interpreter.
+ */
+MterpFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogFallback
+#endif
+MterpCommonFallback:
+    mov     x0, #0                                  // signal retry with reference interpreter.
+    b       MterpDone
+
+/*
+ * We pushed some registers on the stack in ExecuteMterpImpl, then saved
+ * SP and LR.  Here we restore SP, restore the registers, and then restore
+ * LR to PC.
+ *
+ * On entry:
+ *  uint32_t* xFP  (should still be live, pointer to base of vregs)
+ */
+MterpExceptionReturn:
+    mov     x0, #1                                  // signal return to caller.
+    b MterpDone
+MterpReturn:
+    ldr     x2, [xFP, #OFF_FP_RESULT_REGISTER]
+    ldr     lr, [xSELF, #THREAD_FLAGS_OFFSET]
+    str     x0, [x2]
+    mov     x0, xSELF
+    ands    lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.eq    check2
+    bl      MterpSuspendCheck                       // (self)
+check2:
+    mov     x0, #1                                  // signal return to caller.
+MterpDone:
+    ldp     fp, lr, [sp, #48]
+    ldp     xPC, xFP, [sp, #32]
+    ldp     xSELF, xINST, [sp, #16]
+    ldp     xIBASE, xREFS, [sp], #64
+    ret
+
+    .cfi_endproc
+    .size   ExecuteMterpImpl, .-ExecuteMterpImpl
+
+

diff --git a/runtime/interpreter/mterp/rebuild.sh b/runtime/interpreter/mterp/rebuild.sh
index 8b26976..ac87945 100755
--- a/runtime/interpreter/mterp/rebuild.sh
+++ b/runtime/interpreter/mterp/rebuild.sh

@@ -21,4 +21,4 @@
 set -e
 
 # for arch in arm x86 mips arm64 x86_64 mips64; do TARGET_ARCH_EXT=$arch make -f Makefile_mterp; done
-for arch in arm x86; do TARGET_ARCH_EXT=$arch make -f Makefile_mterp; done
+for arch in arm x86 arm64 ; do TARGET_ARCH_EXT=$arch make -f Makefile_mterp; done

diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 4e0146c..fa5c41d 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc

@@ -239,7 +239,7 @@
 
 void Jit::DumpTypeInfoForLoadedTypes(ClassLinker* linker) {
   struct CollectClasses : public ClassVisitor {
-    bool Visit(mirror::Class* klass) override {
+    bool operator()(mirror::Class* klass) override {
       classes_.push_back(klass);
       return true;
     }

diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index b3439f7..c6fa15d 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h

@@ -370,15 +370,17 @@
   }
 }
 
-template<typename T>
+template<typename T, VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline T PointerArray::GetElementPtrSize(uint32_t idx, size_t ptr_size) {
   // C style casts here since we sometimes have T be a pointer, or sometimes an integer
   // (for stack traces).
   if (ptr_size == 8) {
-    return (T)static_cast<uintptr_t>(AsLongArray()->GetWithoutChecks(idx));
+    return (T)static_cast<uintptr_t>(
+        AsLongArray<kVerifyFlags, kReadBarrierOption>()->GetWithoutChecks(idx));
   }
   DCHECK_EQ(ptr_size, 4u);
-  return (T)static_cast<uintptr_t>(AsIntArray()->GetWithoutChecks(idx));
+  return (T)static_cast<uintptr_t>(
+      AsIntArray<kVerifyFlags, kReadBarrierOption>()->GetWithoutChecks(idx));
 }
 
 template<bool kTransactionActive, bool kUnchecked>
@@ -401,12 +403,12 @@
                                                     ptr_size);
 }
 
-template <typename Visitor>
+template <VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption, typename Visitor>
 inline void PointerArray::Fixup(mirror::PointerArray* dest,
                                 size_t pointer_size,
                                 const Visitor& visitor) {
   for (size_t i = 0, count = GetLength(); i < count; ++i) {
-    void* ptr = GetElementPtrSize<void*>(i, pointer_size);
+    void* ptr = GetElementPtrSize<void*, kVerifyFlags, kReadBarrierOption>(i, pointer_size);
     void* new_ptr = visitor(ptr);
     if (ptr != new_ptr) {
       dest->SetElementPtrSize<false, true>(i, new_ptr, pointer_size);

diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 2bd6c5b..9a21ec2 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h

@@ -183,7 +183,9 @@
 // Either an IntArray or a LongArray.
 class PointerArray : public Array {
  public:
-  template<typename T>
+  template<typename T,
+           VerifyObjectFlags kVerifyFlags = kVerifyNone,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   T GetElementPtrSize(uint32_t idx, size_t ptr_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -196,7 +198,9 @@
 
   // Fixup the pointers in the dest arrays by passing our pointers through the visitor. Only copies
   // to dest if visitor(source_ptr) != source_ptr.
-  template <typename Visitor>
+  template <VerifyObjectFlags kVerifyFlags = kVerifyNone,
+            ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
+            typename Visitor>
   void Fixup(mirror::PointerArray* dest, size_t pointer_size, const Visitor& visitor)
       SHARED_REQUIRES(Locks::mutator_lock_);
 };

diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index d5783c0..422832e 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h

@@ -253,14 +253,16 @@
       EmbeddedImTableOffset(pointer_size).Uint32Value() + i * ImTableEntrySize(pointer_size));
 }
 
+template <VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline ArtMethod* Class::GetEmbeddedImTableEntry(uint32_t i, size_t pointer_size) {
-  DCHECK(ShouldHaveEmbeddedImtAndVTable());
+  DCHECK((ShouldHaveEmbeddedImtAndVTable<kVerifyFlags, kReadBarrierOption>()));
   return GetFieldPtrWithSize<ArtMethod*>(
       EmbeddedImTableEntryOffset(i, pointer_size), pointer_size);
 }
 
+template <VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline void Class::SetEmbeddedImTableEntry(uint32_t i, ArtMethod* method, size_t pointer_size) {
-  DCHECK(ShouldHaveEmbeddedImtAndVTable());
+  DCHECK((ShouldHaveEmbeddedImtAndVTable<kVerifyFlags, kReadBarrierOption>()));
   SetFieldPtrWithSize<false>(EmbeddedImTableEntryOffset(i, pointer_size), method, pointer_size);
 }
 
@@ -538,10 +540,11 @@
       : ClassOffset();
 }
 
+template <VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline MemberOffset Class::GetFirstReferenceStaticFieldOffset(size_t pointer_size) {
   DCHECK(IsResolved());
   uint32_t base = sizeof(mirror::Class);  // Static fields come after the class.
-  if (ShouldHaveEmbeddedImtAndVTable()) {
+  if (ShouldHaveEmbeddedImtAndVTable<kVerifyFlags, kReadBarrierOption>()) {
     // Static fields come after the embedded tables.
     base = mirror::Class::ComputeClassSize(
         true, GetEmbeddedVTableLength(), 0, 0, 0, 0, 0, pointer_size);
@@ -1057,7 +1060,7 @@
   return arr != nullptr ? arr->size() : 0u;
 }
 
-template <typename Visitor>
+template <VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption, typename Visitor>
 inline void Class::FixupNativePointers(mirror::Class* dest,
                                        size_t pointer_size,
                                        const Visitor& visitor) {
@@ -1085,7 +1088,7 @@
     dest->SetDexCacheStrings(new_strings);
   }
   // Fix up embedded tables.
-  if (!IsTemp() && ShouldHaveEmbeddedImtAndVTable()) {
+  if (!IsTemp() && ShouldHaveEmbeddedImtAndVTable<kVerifyNone, kReadBarrierOption>()) {
     for (int32_t i = 0, count = GetEmbeddedVTableLength(); i < count; ++i) {
       ArtMethod* method = GetEmbeddedVTableEntry(i, pointer_size);
       ArtMethod* new_method = visitor(method);
@@ -1094,10 +1097,13 @@
       }
     }
     for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
-      ArtMethod* method = GetEmbeddedImTableEntry(i, pointer_size);
+      ArtMethod* method = GetEmbeddedImTableEntry<kVerifyFlags, kReadBarrierOption>(i,
+                                                                                    pointer_size);
       ArtMethod* new_method = visitor(method);
       if (method != new_method) {
-        dest->SetEmbeddedImTableEntry(i, new_method, pointer_size);
+        dest->SetEmbeddedImTableEntry<kVerifyFlags, kReadBarrierOption>(i,
+                                                                        new_method,
+                                                                        pointer_size);
       }
     }
   }

diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 18e9c5f..388a231 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h

@@ -448,7 +448,6 @@
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-
   bool IsArrayClass() SHARED_REQUIRES(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
@@ -493,9 +492,11 @@
     return !IsPrimitive() && !IsInterface() && !IsAbstract() && !IsArrayClass();
   }
 
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsInstantiable() SHARED_REQUIRES(Locks::mutator_lock_) {
     return (!IsPrimitive() && !IsInterface() && !IsAbstract()) ||
-        (IsAbstract() && IsArrayClass());
+        (IsAbstract() && IsArrayClass<kVerifyFlags, kReadBarrierOption>());
   }
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
@@ -814,8 +815,10 @@
     return MemberOffset(sizeof(Class));
   }
 
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool ShouldHaveEmbeddedImtAndVTable() SHARED_REQUIRES(Locks::mutator_lock_) {
-    return IsInstantiable();
+    return IsInstantiable<kVerifyFlags, kReadBarrierOption>();
   }
 
   bool HasVTable() SHARED_REQUIRES(Locks::mutator_lock_);
@@ -824,9 +827,13 @@
 
   static MemberOffset EmbeddedVTableEntryOffset(uint32_t i, size_t pointer_size);
 
+  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ArtMethod* GetEmbeddedImTableEntry(uint32_t i, size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   void SetEmbeddedImTableEntry(uint32_t i, ArtMethod* method, size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -1019,6 +1026,8 @@
   }
 
   // Get the offset of the first reference static field. Other reference static fields follow.
+  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   MemberOffset GetFirstReferenceStaticFieldOffset(size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -1242,7 +1251,9 @@
   // the corresponding entry in dest if visitor(obj) != obj to prevent dirty memory. Dest should be
   // initialized to a copy of *this to prevent issues. Does not visit the ArtMethod and ArtField
   // roots.
-  template <typename Visitor>
+  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+            ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
+            typename Visitor>
   void FixupNativePointers(mirror::Class* dest, size_t pointer_size, const Visitor& visitor)
       SHARED_REQUIRES(Locks::mutator_lock_);
 

diff --git a/runtime/mirror/iftable.h b/runtime/mirror/iftable.h
index 605deac..d6571f2 100644
--- a/runtime/mirror/iftable.h
+++ b/runtime/mirror/iftable.h

@@ -43,8 +43,11 @@
     return method_array;
   }
 
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   size_t GetMethodArrayCount(int32_t i) SHARED_REQUIRES(Locks::mutator_lock_) {
-    auto* method_array = down_cast<PointerArray*>(Get((i * kMax) + kMethodArray));
+    auto* method_array = down_cast<PointerArray*>(
+        Get<kVerifyFlags, kReadBarrierOption>((i * kMax) + kMethodArray));
     return method_array == nullptr ? 0u : method_array->GetLength();
   }
 

diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 760de9a..eb391be 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h

@@ -255,16 +255,17 @@
   return down_cast<Class*>(this);
 }
 
-template<VerifyObjectFlags kVerifyFlags>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Object::IsObjectArray() {
   constexpr auto kNewFlags = static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis);
-  return IsArrayInstance<kVerifyFlags>() &&
-      !GetClass<kNewFlags>()->template GetComponentType<kNewFlags>()->IsPrimitive();
+  return IsArrayInstance<kVerifyFlags, kReadBarrierOption>() &&
+      !GetClass<kNewFlags, kReadBarrierOption>()->
+          template GetComponentType<kNewFlags, kReadBarrierOption>()->IsPrimitive();
 }
 
-template<class T, VerifyObjectFlags kVerifyFlags>
+template<class T, VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline ObjectArray<T>* Object::AsObjectArray() {
-  DCHECK(IsObjectArray<kVerifyFlags>());
+  DCHECK((IsObjectArray<kVerifyFlags, kReadBarrierOption>()));
   return down_cast<ObjectArray<T>*>(this);
 }
 
@@ -274,14 +275,14 @@
       template IsArrayClass<kVerifyFlags, kReadBarrierOption>();
 }
 
-template<VerifyObjectFlags kVerifyFlags>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Object::IsReferenceInstance() {
-  return GetClass<kVerifyFlags>()->IsTypeOfReferenceClass();
+  return GetClass<kVerifyFlags, kReadBarrierOption>()->IsTypeOfReferenceClass();
 }
 
-template<VerifyObjectFlags kVerifyFlags>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline Reference* Object::AsReference() {
-  DCHECK(IsReferenceInstance<kVerifyFlags>());
+  DCHECK((IsReferenceInstance<kVerifyFlags, kReadBarrierOption>()));
   return down_cast<Reference*>(this);
 }
 
@@ -341,29 +342,31 @@
   return down_cast<ShortArray*>(this);
 }
 
-template<VerifyObjectFlags kVerifyFlags>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Object::IsIntArray() {
   constexpr auto kNewFlags = static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis);
-  auto* component_type = GetClass<kVerifyFlags>()->GetComponentType();
+  mirror::Class* klass = GetClass<kVerifyFlags, kReadBarrierOption>();
+  mirror::Class* component_type = klass->GetComponentType<kVerifyFlags, kReadBarrierOption>();
   return component_type != nullptr && component_type->template IsPrimitiveInt<kNewFlags>();
 }
 
-template<VerifyObjectFlags kVerifyFlags>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline IntArray* Object::AsIntArray() {
-  DCHECK(IsIntArray<kVerifyFlags>());
+  DCHECK((IsIntArray<kVerifyFlags, kReadBarrierOption>()));
   return down_cast<IntArray*>(this);
 }
 
-template<VerifyObjectFlags kVerifyFlags>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Object::IsLongArray() {
   constexpr auto kNewFlags = static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis);
-  auto* component_type = GetClass<kVerifyFlags>()->GetComponentType();
+  mirror::Class* klass = GetClass<kVerifyFlags, kReadBarrierOption>();
+  mirror::Class* component_type = klass->GetComponentType<kVerifyFlags, kReadBarrierOption>();
   return component_type != nullptr && component_type->template IsPrimitiveLong<kNewFlags>();
 }
 
-template<VerifyObjectFlags kVerifyFlags>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline LongArray* Object::AsLongArray() {
-  DCHECK(IsLongArray<kVerifyFlags>());
+  DCHECK((IsLongArray<kVerifyFlags, kReadBarrierOption>()));
   return down_cast<LongArray*>(this);
 }
 
@@ -1063,7 +1066,7 @@
       // Presumably GC can happen when we are cross compiling, it should not cause performance
       // problems to do pointer size logic.
       MemberOffset field_offset = kIsStatic
-          ? klass->GetFirstReferenceStaticFieldOffset(
+          ? klass->GetFirstReferenceStaticFieldOffset<kVerifyFlags, kReadBarrierOption>(
               Runtime::Current()->GetClassLinker()->GetImagePointerSize())
           : klass->GetFirstReferenceInstanceFieldOffset();
       for (size_t i = 0u; i < num_reference_fields; ++i) {
@@ -1123,26 +1126,26 @@
   visitor(this, ClassOffset(), false);
   const uint32_t class_flags = klass->GetClassFlags<kVerifyNone>();
   if (LIKELY(class_flags == kClassFlagNormal)) {
-    DCHECK(!klass->IsVariableSize());
-    VisitInstanceFieldsReferences(klass, visitor);
+    DCHECK((!klass->IsVariableSize<kVerifyFlags, kReadBarrierOption>()));
+    VisitInstanceFieldsReferences<kVerifyFlags, kReadBarrierOption>(klass, visitor);
     DCHECK((!klass->IsClassClass<kVerifyFlags, kReadBarrierOption>()));
     DCHECK(!klass->IsStringClass());
     DCHECK(!klass->IsClassLoaderClass());
-    DCHECK(!klass->IsArrayClass());
+    DCHECK((!klass->IsArrayClass<kVerifyFlags, kReadBarrierOption>()));
   } else {
     if ((class_flags & kClassFlagNoReferenceFields) == 0) {
       DCHECK(!klass->IsStringClass());
       if (class_flags == kClassFlagClass) {
-        DCHECK(klass->IsClassClass());
-        AsClass<kVerifyNone>()->VisitReferences<kVisitNativeRoots,
-                                                kVerifyFlags,
-                                                kReadBarrierOption>(klass, visitor);
+        DCHECK((klass->IsClassClass<kVerifyFlags, kReadBarrierOption>()));
+        mirror::Class* as_klass = AsClass<kVerifyNone, kReadBarrierOption>();
+        as_klass->VisitReferences<kVisitNativeRoots, kVerifyFlags, kReadBarrierOption>(klass,
+                                                                                       visitor);
       } else if (class_flags == kClassFlagObjectArray) {
         DCHECK((klass->IsObjectArrayClass<kVerifyFlags, kReadBarrierOption>()));
-        AsObjectArray<mirror::Object, kVerifyNone>()->VisitReferences(visitor);
+        AsObjectArray<mirror::Object, kVerifyNone, kReadBarrierOption>()->VisitReferences(visitor);
       } else if ((class_flags & kClassFlagReference) != 0) {
-        VisitInstanceFieldsReferences(klass, visitor);
-        ref_visitor(klass, AsReference());
+        VisitInstanceFieldsReferences<kVerifyFlags, kReadBarrierOption>(klass, visitor);
+        ref_visitor(klass, AsReference<kVerifyFlags, kReadBarrierOption>());
       } else if (class_flags == kClassFlagDexCache) {
         mirror::DexCache* const dex_cache = AsDexCache<kVerifyFlags, kReadBarrierOption>();
         dex_cache->VisitReferences<kVisitNativeRoots,

diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index d635002..3f739df 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h

@@ -159,9 +159,12 @@
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   Class* AsClass() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsObjectArray() SHARED_REQUIRES(Locks::mutator_lock_);
-  template<class T, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<class T,
+           VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ObjectArray<T>* AsObjectArray() SHARED_REQUIRES(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
@@ -199,14 +202,18 @@
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   ShortArray* AsShortSizedArray() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsIntArray() SHARED_REQUIRES(Locks::mutator_lock_);
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   IntArray* AsIntArray() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsLongArray() SHARED_REQUIRES(Locks::mutator_lock_);
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   LongArray* AsLongArray() SHARED_REQUIRES(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
@@ -230,9 +237,11 @@
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   Throwable* AsThrowable() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsReferenceInstance() SHARED_REQUIRES(Locks::mutator_lock_);
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   Reference* AsReference() SHARED_REQUIRES(Locks::mutator_lock_);
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   bool IsWeakReferenceInstance() SHARED_REQUIRES(Locks::mutator_lock_);

diff --git a/runtime/mirror/reference-inl.h b/runtime/mirror/reference-inl.h
index bd4a9c1..12bfe38 100644
--- a/runtime/mirror/reference-inl.h
+++ b/runtime/mirror/reference-inl.h

@@ -27,14 +27,6 @@
   return Class::ComputeClassSize(false, vtable_entries, 2, 0, 0, 0, 0, pointer_size);
 }
 
-inline bool Reference::IsEnqueuable() {
-  // Not using volatile reads as an optimization since this is only called with all the mutators
-  // suspended.
-  const Object* queue = GetFieldObject<mirror::Object>(QueueOffset());
-  const Object* queue_next = GetFieldObject<mirror::Object>(QueueNextOffset());
-  return queue != nullptr && queue_next == nullptr;
-}
-
 }  // namespace mirror
 }  // namespace art
 

diff --git a/runtime/mirror/reference.h b/runtime/mirror/reference.h
index 5e467ab..3baa12e 100644
--- a/runtime/mirror/reference.h
+++ b/runtime/mirror/reference.h

@@ -75,9 +75,7 @@
   void ClearReferent() SHARED_REQUIRES(Locks::mutator_lock_) {
     SetFieldObjectVolatile<kTransactionActive>(ReferentOffset(), nullptr);
   }
-  // Volatile read/write is not necessary since the java pending next is only accessed from
-  // the java threads for cleared references. Once these cleared references have a null referent,
-  // we never end up reading their pending next from the GC again.
+
   Reference* GetPendingNext() SHARED_REQUIRES(Locks::mutator_lock_) {
     return GetFieldObject<Reference>(PendingNextOffset());
   }
@@ -91,14 +89,22 @@
     }
   }
 
-  bool IsEnqueued() SHARED_REQUIRES(Locks::mutator_lock_) {
-    // Since the references are stored as cyclic lists it means that once enqueued, the pending
-    // next is always non-null.
-    return GetPendingNext() != nullptr;
+  // Returns true if the reference's pendingNext is null, indicating it is
+  // okay to process this reference.
+  //
+  // If pendingNext is not null, then one of the following cases holds:
+  // 1. The reference has already been enqueued to a java ReferenceQueue. In
+  // this case the referent should not be considered for reference processing
+  // ever again.
+  // 2. The reference is currently part of a list of references that may
+  // shortly be enqueued on a java ReferenceQueue. In this case the reference
+  // should not be processed again until and unless the reference has been
+  // removed from the list after having determined the reference is not ready
+  // to be enqueued on a java ReferenceQueue.
+  bool IsUnprocessed() SHARED_REQUIRES(Locks::mutator_lock_) {
+    return GetPendingNext() == nullptr;
   }
 
-  bool IsEnqueuable() SHARED_REQUIRES(Locks::mutator_lock_);
-
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   static Class* GetJavaLangRefReference() SHARED_REQUIRES(Locks::mutator_lock_) {
     DCHECK(!java_lang_ref_Reference_.IsNull());
@@ -115,9 +121,9 @@
   }
 
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
-  HeapReference<Reference> pending_next_;  // Note this is Java volatile:
-  HeapReference<Object> queue_;  // Note this is Java volatile:
-  HeapReference<Reference> queue_next_;  // Note this is Java volatile:
+  HeapReference<Reference> pending_next_;
+  HeapReference<Object> queue_;
+  HeapReference<Reference> queue_next_;
   HeapReference<Object> referent_;  // Note this is Java volatile:
 
   static GcRoot<Class> java_lang_ref_Reference_;

diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index aa64ee3..2ea4b14 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc

@@ -277,6 +277,8 @@
           .WithType<ExperimentalFlags>()
           .AppendValues()
           .IntoKey(M::Experimental)
+      .Define("-Xforce-nb-testing")
+          .IntoKey(M::ForceNativeBridge)
       .Ignore({
           "-ea", "-da", "-enableassertions", "-disableassertions", "--runtime-arg", "-esa",
           "-dsa", "-enablesystemassertions", "-disablesystemassertions", "-Xrs", "-Xint:_",

diff --git a/runtime/quick/inline_method_analyser.cc b/runtime/quick/inline_method_analyser.cc
index 17306c9..6b84c8f 100644
--- a/runtime/quick/inline_method_analyser.cc
+++ b/runtime/quick/inline_method_analyser.cc

@@ -108,7 +108,7 @@
 
   switch (opcode) {
     case Instruction::RETURN_VOID:
-      if (method != nullptr) {
+      if (result != nullptr) {
         result->opcode = kInlineOpNop;
         result->flags = kInlineSpecial;
         result->d.data = 0u;

diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index 19cf759..0c3eb3b 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h

@@ -32,50 +32,61 @@
 inline MirrorType* ReadBarrier::Barrier(
     mirror::Object* obj, MemberOffset offset, mirror::HeapReference<MirrorType>* ref_addr) {
   constexpr bool with_read_barrier = kReadBarrierOption == kWithReadBarrier;
-  if (with_read_barrier && kUseBakerReadBarrier) {
-    // The higher bits of the rb_ptr, rb_ptr_high_bits (must be zero)
-    // is used to create artificial data dependency from the is_gray
-    // load to the ref field (ptr) load to avoid needing a load-load
-    // barrier between the two.
-    uintptr_t rb_ptr_high_bits;
-    bool is_gray = HasGrayReadBarrierPointer(obj, &rb_ptr_high_bits);
-    ref_addr = reinterpret_cast<mirror::HeapReference<MirrorType>*>(
-        rb_ptr_high_bits | reinterpret_cast<uintptr_t>(ref_addr));
-    MirrorType* ref = ref_addr->AsMirrorPtr();
-    MirrorType* old_ref = ref;
-    if (is_gray) {
-      // Slow-path.
-      ref = reinterpret_cast<MirrorType*>(Mark(ref));
-      // If kAlwaysUpdateField is true, update the field atomically. This may fail if mutator
-      // updates before us, but it's ok.
-      if (kAlwaysUpdateField && ref != old_ref) {
-        obj->CasFieldStrongRelaxedObjectWithoutWriteBarrier<false, false>(
-            offset, old_ref, ref);
+  if (kUseReadBarrier && with_read_barrier) {
+    if (kIsDebugBuild) {
+      Thread* const self = Thread::Current();
+      if (self != nullptr) {
+        CHECK_EQ(self->GetDebugDisallowReadBarrierCount(), 0u);
       }
     }
-    if (kEnableReadBarrierInvariantChecks) {
-      CHECK_EQ(rb_ptr_high_bits, 0U) << obj << " rb_ptr=" << obj->GetReadBarrierPointer();
-    }
-    AssertToSpaceInvariant(obj, offset, ref);
-    return ref;
-  } else if (with_read_barrier && kUseBrooksReadBarrier) {
-    // To be implemented.
-    return ref_addr->AsMirrorPtr();
-  } else if (with_read_barrier && kUseTableLookupReadBarrier) {
-    MirrorType* ref = ref_addr->AsMirrorPtr();
-    MirrorType* old_ref = ref;
-    // The heap or the collector can be null at startup. TODO: avoid the need for this null check.
-    gc::Heap* heap = Runtime::Current()->GetHeap();
-    if (heap != nullptr && heap->GetReadBarrierTable()->IsSet(old_ref)) {
-      ref = reinterpret_cast<MirrorType*>(Mark(old_ref));
-      // Update the field atomically. This may fail if mutator updates before us, but it's ok.
-      if (ref != old_ref) {
-        obj->CasFieldStrongRelaxedObjectWithoutWriteBarrier<false, false>(
-            offset, old_ref, ref);
+    if (kUseBakerReadBarrier) {
+      // The higher bits of the rb_ptr, rb_ptr_high_bits (must be zero)
+      // is used to create artificial data dependency from the is_gray
+      // load to the ref field (ptr) load to avoid needing a load-load
+      // barrier between the two.
+      uintptr_t rb_ptr_high_bits;
+      bool is_gray = HasGrayReadBarrierPointer(obj, &rb_ptr_high_bits);
+      ref_addr = reinterpret_cast<mirror::HeapReference<MirrorType>*>(
+          rb_ptr_high_bits | reinterpret_cast<uintptr_t>(ref_addr));
+      MirrorType* ref = ref_addr->AsMirrorPtr();
+      MirrorType* old_ref = ref;
+      if (is_gray) {
+        // Slow-path.
+        ref = reinterpret_cast<MirrorType*>(Mark(ref));
+        // If kAlwaysUpdateField is true, update the field atomically. This may fail if mutator
+        // updates before us, but it's ok.
+        if (kAlwaysUpdateField && ref != old_ref) {
+          obj->CasFieldStrongRelaxedObjectWithoutWriteBarrier<false, false>(
+              offset, old_ref, ref);
+        }
       }
+      if (kEnableReadBarrierInvariantChecks) {
+        CHECK_EQ(rb_ptr_high_bits, 0U) << obj << " rb_ptr=" << obj->GetReadBarrierPointer();
+      }
+      AssertToSpaceInvariant(obj, offset, ref);
+      return ref;
+    } else if (kUseBrooksReadBarrier) {
+      // To be implemented.
+      return ref_addr->AsMirrorPtr();
+    } else if (kUseTableLookupReadBarrier) {
+      MirrorType* ref = ref_addr->AsMirrorPtr();
+      MirrorType* old_ref = ref;
+      // The heap or the collector can be null at startup. TODO: avoid the need for this null check.
+      gc::Heap* heap = Runtime::Current()->GetHeap();
+      if (heap != nullptr && heap->GetReadBarrierTable()->IsSet(old_ref)) {
+        ref = reinterpret_cast<MirrorType*>(Mark(old_ref));
+        // Update the field atomically. This may fail if mutator updates before us, but it's ok.
+        if (ref != old_ref) {
+          obj->CasFieldStrongRelaxedObjectWithoutWriteBarrier<false, false>(
+              offset, old_ref, ref);
+        }
+      }
+      AssertToSpaceInvariant(obj, offset, ref);
+      return ref;
+    } else {
+      LOG(FATAL) << "Unexpected read barrier type";
+      UNREACHABLE();
     }
-    AssertToSpaceInvariant(obj, offset, ref);
-    return ref;
   } else {
     // No read barrier.
     return ref_addr->AsMirrorPtr();
@@ -87,32 +98,43 @@
                                                GcRootSource* gc_root_source) {
   MirrorType* ref = *root;
   const bool with_read_barrier = kReadBarrierOption == kWithReadBarrier;
-  if (with_read_barrier && kUseBakerReadBarrier) {
-    // TODO: separate the read barrier code from the collector code more.
-    Thread* self = Thread::Current();
-    if (self != nullptr && self->GetIsGcMarking()) {
-      ref = reinterpret_cast<MirrorType*>(Mark(ref));
-    }
-    AssertToSpaceInvariant(gc_root_source, ref);
-    return ref;
-  } else if (with_read_barrier && kUseBrooksReadBarrier) {
-    // To be implemented.
-    return ref;
-  } else if (with_read_barrier && kUseTableLookupReadBarrier) {
-    Thread* self = Thread::Current();
-    if (self != nullptr &&
-        self->GetIsGcMarking() &&
-        Runtime::Current()->GetHeap()->GetReadBarrierTable()->IsSet(ref)) {
-      MirrorType* old_ref = ref;
-      ref = reinterpret_cast<MirrorType*>(Mark(old_ref));
-      // Update the field atomically. This may fail if mutator updates before us, but it's ok.
-      if (ref != old_ref) {
-        Atomic<mirror::Object*>* atomic_root = reinterpret_cast<Atomic<mirror::Object*>*>(root);
-        atomic_root->CompareExchangeStrongRelaxed(old_ref, ref);
+  if (kUseReadBarrier && with_read_barrier) {
+    if (kIsDebugBuild) {
+      Thread* const self = Thread::Current();
+      if (self != nullptr) {
+        CHECK_EQ(self->GetDebugDisallowReadBarrierCount(), 0u);
       }
     }
-    AssertToSpaceInvariant(gc_root_source, ref);
-    return ref;
+    if (kUseBakerReadBarrier) {
+      // TODO: separate the read barrier code from the collector code more.
+      Thread* self = Thread::Current();
+      if (self != nullptr && self->GetIsGcMarking()) {
+        ref = reinterpret_cast<MirrorType*>(Mark(ref));
+      }
+      AssertToSpaceInvariant(gc_root_source, ref);
+      return ref;
+    } else if (kUseBrooksReadBarrier) {
+      // To be implemented.
+      return ref;
+    } else if (kUseTableLookupReadBarrier) {
+      Thread* self = Thread::Current();
+      if (self != nullptr &&
+          self->GetIsGcMarking() &&
+          Runtime::Current()->GetHeap()->GetReadBarrierTable()->IsSet(ref)) {
+        MirrorType* old_ref = ref;
+        ref = reinterpret_cast<MirrorType*>(Mark(old_ref));
+        // Update the field atomically. This may fail if mutator updates before us, but it's ok.
+        if (ref != old_ref) {
+          Atomic<mirror::Object*>* atomic_root = reinterpret_cast<Atomic<mirror::Object*>*>(root);
+          atomic_root->CompareExchangeStrongRelaxed(old_ref, ref);
+        }
+      }
+      AssertToSpaceInvariant(gc_root_source, ref);
+      return ref;
+    } else {
+      LOG(FATAL) << "Unexpected read barrier type";
+      UNREACHABLE();
+    }
   } else {
     return ref;
   }

diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 3926f06..b1b7473 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc

@@ -602,9 +602,12 @@
     if (is_native_bridge_loaded_) {
       PreInitializeNativeBridge(".");
     }
+    NativeBridgeAction action = force_native_bridge_
+        ? NativeBridgeAction::kInitialize
+        : NativeBridgeAction::kUnload;
     InitNonZygoteOrPostFork(self->GetJniEnv(),
                             /* is_system_server */ false,
-                            NativeBridgeAction::kInitialize,
+                            action,
                             GetInstructionSetString(kRuntimeISA));
   }
 
@@ -939,6 +942,7 @@
   allow_dex_file_fallback_ = !runtime_options.Exists(Opt::NoDexFileFallback);
 
   no_sig_chain_ = runtime_options.Exists(Opt::NoSigChain);
+  force_native_bridge_ = runtime_options.Exists(Opt::ForceNativeBridge);
 
   Split(runtime_options.GetOrDefault(Opt::CpuAbiList), ',', &cpu_abilist_);
 

diff --git a/runtime/runtime.h b/runtime/runtime.h
index c8c2ee5..bec26f8 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h

@@ -774,6 +774,9 @@
   // building a statically link version of dex2oat.
   bool no_sig_chain_;
 
+  // Force the use of native bridge even if the app ISA matches the runtime ISA.
+  bool force_native_bridge_;
+
   // Whether or not a native bridge has been loaded.
   //
   // The native bridge allows running native code compiled for a foreign ISA. The way it works is,

diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 308f3ba..097bccb 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def

@@ -92,6 +92,7 @@
 
 RUNTIME_OPTIONS_KEY (Unit,                DisableExplicitGC)
 RUNTIME_OPTIONS_KEY (Unit,                NoSigChain)
+RUNTIME_OPTIONS_KEY (Unit,                ForceNativeBridge)
 RUNTIME_OPTIONS_KEY (LogVerbosity,        Verbose)
 RUNTIME_OPTIONS_KEY (unsigned int,        LockProfThreshold)
 RUNTIME_OPTIONS_KEY (std::string,         StackTraceFile)

diff --git a/runtime/thread.cc b/runtime/thread.cc
index 21241d2..2abcd67 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc

@@ -2704,7 +2704,7 @@
   // Visiting the declaring class is necessary so that we don't unload the class of a method that
   // is executing. We need to ensure that the code stays mapped.
   void VisitDeclaringClass(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_) {
-    mirror::Class* klass = method->GetDeclaringClassNoBarrier();
+    mirror::Class* klass = method->GetDeclaringClassUnchecked<kWithoutReadBarrier>();
     // klass can be null for runtime methods.
     if (klass != nullptr) {
       mirror::Object* new_ref = klass;

diff --git a/runtime/thread.h b/runtime/thread.h
index b25bcb2..d7887ca 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h

@@ -1067,6 +1067,14 @@
 
   void InitStringEntryPoints();
 
+  void ModifyDebugDisallowReadBarrier(int8_t delta) {
+    debug_disallow_read_barrier_ += delta;
+  }
+
+  uint8_t GetDebugDisallowReadBarrierCount() const {
+    return debug_disallow_read_barrier_;
+  }
+
  private:
   explicit Thread(bool daemon);
   ~Thread() REQUIRES(!Locks::mutator_lock_, !Locks::thread_suspend_count_lock_);
@@ -1446,6 +1454,9 @@
   // Thread "interrupted" status; stays raised until queried or thrown.
   bool interrupted_ GUARDED_BY(wait_mutex_);
 
+  // Debug disable read barrier count, only is checked for debug builds and only in the runtime.
+  uint8_t debug_disallow_read_barrier_ = 0;
+
   friend class Dbg;  // For SetStateUnsafe.
   friend class gc::collector::SemiSpace;  // For getting stack traces.
   friend class Runtime;  // For CreatePeer.
@@ -1493,6 +1504,20 @@
   DISALLOW_COPY_AND_ASSIGN(ScopedStackedShadowFramePusher);
 };
 
+// Only works for debug builds.
+class ScopedDebugDisallowReadBarriers {
+ public:
+  explicit ScopedDebugDisallowReadBarriers(Thread* self) : self_(self) {
+    self_->ModifyDebugDisallowReadBarrier(1);
+  }
+  ~ScopedDebugDisallowReadBarriers() {
+    self_->ModifyDebugDisallowReadBarrier(-1);
+  }
+
+ private:
+  Thread* const self_;
+};
+
 std::ostream& operator<<(std::ostream& os, const Thread& thread);
 std::ostream& operator<<(std::ostream& os, const StackedShadowFrameType& thread);
 

diff --git a/runtime/utils/dex_cache_arrays_layout-inl.h b/runtime/utils/dex_cache_arrays_layout-inl.h
index f6ee6a2..6922564 100644
--- a/runtime/utils/dex_cache_arrays_layout-inl.h
+++ b/runtime/utils/dex_cache_arrays_layout-inl.h

@@ -31,14 +31,14 @@
                                                   const DexFile::Header& header)
     : pointer_size_(pointer_size),
       /* types_offset_ is always 0u, so it's constexpr */
-      methods_offset_(types_offset_ +
-                      RoundUp(TypesSize(header.type_ids_size_), MethodsAlignment())),
-      strings_offset_(methods_offset_ +
-                      RoundUp(MethodsSize(header.method_ids_size_), StringsAlignment())),
-      fields_offset_(strings_offset_ +
-                     RoundUp(StringsSize(header.string_ids_size_), FieldsAlignment())),
-      size_(fields_offset_ +
-            RoundUp(FieldsSize(header.field_ids_size_), Alignment())) {
+      methods_offset_(
+          RoundUp(types_offset_ + TypesSize(header.type_ids_size_), MethodsAlignment())),
+      strings_offset_(
+          RoundUp(methods_offset_ + MethodsSize(header.method_ids_size_), StringsAlignment())),
+      fields_offset_(
+          RoundUp(strings_offset_ + StringsSize(header.string_ids_size_), FieldsAlignment())),
+      size_(
+          RoundUp(fields_offset_ + FieldsSize(header.field_ids_size_), Alignment())) {
   DCHECK(ValidPointerSize(pointer_size)) << pointer_size;
 }
 

diff --git a/test/115-native-bridge/run b/test/115-native-bridge/run
index ea2045b..aeb5721 100644
--- a/test/115-native-bridge/run
+++ b/test/115-native-bridge/run

@@ -28,4 +28,4 @@
 LEFT=$(echo ${ARGS} | sed -r 's/-Djava.library.path.*//')
 RIGHT=$(echo ${ARGS} | sed -r 's/.*Djava.library.path[^ ]* //')
 MODARGS="${LEFT} -Djava.library.path=`pwd` ${RIGHT}"
-exec ${RUN} --runtime-option -XX:NativeBridge=libnativebridgetest.so ${MODARGS} NativeBridgeMain
+exec ${RUN} --runtime-option -Xforce-nb-testing --runtime-option -XX:NativeBridge=libnativebridgetest.so ${MODARGS} NativeBridgeMain

diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java
index 06cfd0a..8f9a32a 100644
--- a/test/449-checker-bce/src/Main.java
+++ b/test/449-checker-bce/src/Main.java

@@ -631,7 +631,8 @@
   /// CHECK-DAG:  <<Array2>>     NullCheck [<<Get1>>]                        loop:<<InnerLoop>>
   /// CHECK-DAG:  <<Len2:i\d+>>  ArrayLength [<<Array2>>]                    loop:<<InnerLoop>>
   /// CHECK-DAG:  <<Bounds2>>    BoundsCheck [<<Index2:i\d+>>,<<Len2>>]      loop:<<InnerLoop>>
-  /// CHECK-DAG:                 InvokeStaticOrDirect [<<Get2>>]             loop:<<InnerLoop>>
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                 InvokeStaticOrDirect [<<Get2>>{{(,[ij]\d+)?}}] loop:<<InnerLoop>>
   /// CHECK-DAG:  <<Index2>>     Phi                                         loop:<<InnerLoop>>
   /// CHECK-DAG:  <<Index1>>     Phi                                         loop:<<OuterLoop:B\d+>>
   /// CHECK-DAG:  <<Field1>>     StaticFieldGet                              loop:none
@@ -644,7 +645,8 @@
   /// CHECK-DAG:  <<Get1:l\d+>>  ArrayGet [<<Array1:l\d+>>,<<Index1:i\d+>>]  loop:<<OuterLoop>>
   //  Array reference ..[j] still in inner loop, with a direct index.
   /// CHECK-DAG:  <<Get2:i\d+>>  ArrayGet [<<Array2:l\d+>>,<<Index2:i\d+>>]  loop:<<InnerLoop:B\d+>>
-  /// CHECK-DAG:                 InvokeStaticOrDirect [<<Get2>>]             loop:<<InnerLoop>>
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                 InvokeStaticOrDirect [<<Get2>>{{(,[ij]\d+)?}}] loop:<<InnerLoop>>
   /// CHECK-DAG:  <<Index2>>     Phi                                         loop:<<InnerLoop>>
   /// CHECK-DAG:  <<Index1>>     Phi                                         loop:<<OuterLoop>>
   //  Synthetic phi.

diff --git a/test/530-checker-lse/src/Main.java b/test/530-checker-lse/src/Main.java
index f87326c..d647683 100644
--- a/test/530-checker-lse/src/Main.java
+++ b/test/530-checker-lse/src/Main.java

@@ -664,19 +664,50 @@
     System.out.println("testFinalizableByForcingGc() failed to force gc.");
   }
 
-  public static void assertIntEquals(int expected, int result) {
+  /// CHECK-START: int Main.testHSelect(boolean) load_store_elimination (before)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: Select
+
+  /// CHECK-START: int Main.testHSelect(boolean) load_store_elimination (after)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: Select
+
+  // Test that HSelect creates alias.
+  public static int testHSelect(boolean b) {
+    // Disable inlining.
+    System.out.print("");
+    System.out.print("");
+    System.out.print("");
+    System.out.print("");
+    System.out.print("");
+    System.out.print("");
+    System.out.print("");
+    System.out.print("");
+    System.out.print("");
+    System.out.print("");
+
+    TestClass obj = new TestClass();
+    TestClass obj2 = null;
+    obj.i = 0xdead;
+    if (b) {
+      obj2 = obj;
+    }
+    return obj2.i;
+  }
+
+  public static void assertIntEquals(int result, int expected) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
     }
   }
 
-  public static void assertFloatEquals(float expected, float result) {
+  public static void assertFloatEquals(float result, float expected) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
     }
   }
 
-  public static void assertDoubleEquals(double expected, double result) {
+  public static void assertDoubleEquals(double result, double expected) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
     }
@@ -723,5 +754,6 @@
     assertIntEquals(test23(false), 5);
     assertFloatEquals(test24(), 8.0f);
     testFinalizableByForcingGc();
+    assertIntEquals(testHSelect(true), 0xdead);
   }
 }

diff --git a/test/562-bce-preheader/src/Main.java b/test/562-bce-preheader/src/Main.java
index 8de0533..8b527b4 100644
--- a/test/562-bce-preheader/src/Main.java
+++ b/test/562-bce-preheader/src/Main.java

@@ -70,6 +70,26 @@
     return acc;
   }
 
+  /**
+   * An artificial example with an inconsistent phi structure during
+   * dynamic bce that is corrected afterwards. Note that only the last
+   * assignment is really live, but the other statements set up an
+   * interesting phi structure.
+   */
+  private static int doit(int[] z) {
+    int a = 0;
+    for (int i = 0; i < 10; ++i) {
+      for (int j = i; j < 10; ++j) {
+        a = z[i];
+        for (int k = 0; k < 10; ++k) {
+          a += z[k];
+          a = z[i];
+        }
+      }
+    }
+    return a;
+  }
+
   public static void main(String args[]) {
     int[][] x = new int[2][2];
     int y;
@@ -96,6 +116,9 @@
     expectEquals(26, foo(a, b,  2));
     expectEquals(38, foo(a, b,  3));
 
+    int[] z = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    expectEquals(10, doit(z));
+
     System.out.println("passed");
   }
 

diff --git a/test/565-checker-doublenegbitwise/expected.txt b/test/565-checker-doublenegbitwise/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/565-checker-doublenegbitwise/expected.txt


diff --git a/test/565-checker-doublenegbitwise/info.txt b/test/565-checker-doublenegbitwise/info.txt
new file mode 100644
index 0000000..cbe183c
--- /dev/null
+++ b/test/565-checker-doublenegbitwise/info.txt

@@ -0,0 +1 @@
+Test double-negated bitwise operations simplifications.

diff --git a/test/565-checker-doublenegbitwise/src/Main.java b/test/565-checker-doublenegbitwise/src/Main.java
new file mode 100644
index 0000000..c51eda8
--- /dev/null
+++ b/test/565-checker-doublenegbitwise/src/Main.java

@@ -0,0 +1,211 @@
+/*
+* Copyright (C) 2016 The Android Open Source Project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*      http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+public class Main {
+
+  // A dummy value to defeat inlining of these routines.
+  static boolean doThrow = false;
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertLongEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  /**
+   * Test transformation of Not/Not/And into Or/Not.
+   */
+
+  // Note: before the instruction_simplifier pass, Xor's are used instead of
+  // Not's (the simplification happens during the same pass).
+  /// CHECK-START: int Main.$opt$noinline$andToOr(int, int) instruction_simplifier (before)
+  /// CHECK:       <<P1:i\d+>>          ParameterValue
+  /// CHECK:       <<P2:i\d+>>          ParameterValue
+  /// CHECK:       <<CstM1:i\d+>>       IntConstant -1
+  /// CHECK:       <<Not1:i\d+>>        Xor [<<P1>>,<<CstM1>>]
+  /// CHECK:       <<Not2:i\d+>>        Xor [<<P2>>,<<CstM1>>]
+  /// CHECK:       <<And:i\d+>>         And [<<Not1>>,<<Not2>>]
+  /// CHECK:                            Return [<<And>>]
+
+  /// CHECK-START: int Main.$opt$noinline$andToOr(int, int) instruction_simplifier (after)
+  /// CHECK:       <<P1:i\d+>>          ParameterValue
+  /// CHECK:       <<P2:i\d+>>          ParameterValue
+  /// CHECK:       <<Or:i\d+>>          Or [<<P1>>,<<P2>>]
+  /// CHECK:       <<Not:i\d+>>         Not [<<Or>>]
+  /// CHECK:                            Return [<<Not>>]
+
+  /// CHECK-START: int Main.$opt$noinline$andToOr(int, int) instruction_simplifier (after)
+  /// CHECK:                            Not
+  /// CHECK-NOT:                        Not
+  /// CHECK-NOT:                        And
+
+  public static int $opt$noinline$andToOr(int a, int b) {
+    if (doThrow) throw new Error();
+    return ~a & ~b;
+  }
+
+  /**
+   * Test transformation of Not/Not/Or into And/Not.
+   */
+
+  // See note above.
+  // The second Xor has its arguments reversed for no obvious reason.
+  /// CHECK-START: long Main.$opt$noinline$orToAnd(long, long) instruction_simplifier (before)
+  /// CHECK:       <<P1:j\d+>>          ParameterValue
+  /// CHECK:       <<P2:j\d+>>          ParameterValue
+  /// CHECK:       <<CstM1:j\d+>>       LongConstant -1
+  /// CHECK:       <<Not1:j\d+>>        Xor [<<P1>>,<<CstM1>>]
+  /// CHECK:       <<Not2:j\d+>>        Xor [<<CstM1>>,<<P2>>]
+  /// CHECK:       <<Or:j\d+>>          Or [<<Not1>>,<<Not2>>]
+  /// CHECK:                            Return [<<Or>>]
+
+  /// CHECK-START: long Main.$opt$noinline$orToAnd(long, long) instruction_simplifier (after)
+  /// CHECK:       <<P1:j\d+>>          ParameterValue
+  /// CHECK:       <<P2:j\d+>>          ParameterValue
+  /// CHECK:       <<And:j\d+>>         And [<<P1>>,<<P2>>]
+  /// CHECK:       <<Not:j\d+>>         Not [<<And>>]
+  /// CHECK:                            Return [<<Not>>]
+
+  /// CHECK-START: long Main.$opt$noinline$orToAnd(long, long) instruction_simplifier (after)
+  /// CHECK:                            Not
+  /// CHECK-NOT:                        Not
+  /// CHECK-NOT:                        Or
+
+  public static long $opt$noinline$orToAnd(long a, long b) {
+    if (doThrow) throw new Error();
+    return ~a | ~b;
+  }
+
+  /**
+   * Test that the transformation copes with inputs being separated from the
+   * bitwise operations.
+   * This is a regression test. The initial logic was inserting the new bitwise
+   * operation incorrectly.
+   */
+
+  /// CHECK-START: int Main.$opt$noinline$regressInputsAway(int, int) instruction_simplifier (before)
+  /// CHECK:       <<P1:i\d+>>          ParameterValue
+  /// CHECK:       <<P2:i\d+>>          ParameterValue
+  /// CHECK-DAG:   <<Cst1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:   <<CstM1:i\d+>>       IntConstant -1
+  /// CHECK:       <<AddP1:i\d+>>       Add [<<P1>>,<<Cst1>>]
+  /// CHECK:       <<Not1:i\d+>>        Xor [<<AddP1>>,<<CstM1>>]
+  /// CHECK:       <<AddP2:i\d+>>       Add [<<P2>>,<<Cst1>>]
+  /// CHECK:       <<Not2:i\d+>>        Xor [<<AddP2>>,<<CstM1>>]
+  /// CHECK:       <<Or:i\d+>>          Or [<<Not1>>,<<Not2>>]
+  /// CHECK:                            Return [<<Or>>]
+
+  /// CHECK-START: int Main.$opt$noinline$regressInputsAway(int, int) instruction_simplifier (after)
+  /// CHECK:       <<P1:i\d+>>          ParameterValue
+  /// CHECK:       <<P2:i\d+>>          ParameterValue
+  /// CHECK:       <<Cst1:i\d+>>        IntConstant 1
+  /// CHECK:       <<AddP1:i\d+>>       Add [<<P1>>,<<Cst1>>]
+  /// CHECK:       <<AddP2:i\d+>>       Add [<<P2>>,<<Cst1>>]
+  /// CHECK:       <<And:i\d+>>         And [<<AddP1>>,<<AddP2>>]
+  /// CHECK:       <<Not:i\d+>>         Not [<<And>>]
+  /// CHECK:                            Return [<<Not>>]
+
+  /// CHECK-START: int Main.$opt$noinline$regressInputsAway(int, int) instruction_simplifier (after)
+  /// CHECK:                            Not
+  /// CHECK-NOT:                        Not
+  /// CHECK-NOT:                        Or
+
+  public static int $opt$noinline$regressInputsAway(int a, int b) {
+    if (doThrow) throw new Error();
+    int a1 = a + 1;
+    int not_a1 = ~a1;
+    int b1 = b + 1;
+    int not_b1 = ~b1;
+    return not_a1 | not_b1;
+  }
+
+  /**
+   * Test transformation of Not/Not/Xor into Xor.
+   */
+
+  // See first note above.
+  /// CHECK-START: int Main.$opt$noinline$notXorToXor(int, int) instruction_simplifier (before)
+  /// CHECK:       <<P1:i\d+>>          ParameterValue
+  /// CHECK:       <<P2:i\d+>>          ParameterValue
+  /// CHECK:       <<CstM1:i\d+>>       IntConstant -1
+  /// CHECK:       <<Not1:i\d+>>        Xor [<<P1>>,<<CstM1>>]
+  /// CHECK:       <<Not2:i\d+>>        Xor [<<P2>>,<<CstM1>>]
+  /// CHECK:       <<Xor:i\d+>>         Xor [<<Not1>>,<<Not2>>]
+  /// CHECK:                            Return [<<Xor>>]
+
+  /// CHECK-START: int Main.$opt$noinline$notXorToXor(int, int) instruction_simplifier (after)
+  /// CHECK:       <<P1:i\d+>>          ParameterValue
+  /// CHECK:       <<P2:i\d+>>          ParameterValue
+  /// CHECK:       <<Xor:i\d+>>         Xor [<<P1>>,<<P2>>]
+  /// CHECK:                            Return [<<Xor>>]
+
+  /// CHECK-START: int Main.$opt$noinline$notXorToXor(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:                        Not
+
+  public static int $opt$noinline$notXorToXor(int a, int b) {
+    if (doThrow) throw new Error();
+    return ~a ^ ~b;
+  }
+
+  /**
+   * Check that no transformation is done when one Not has multiple uses.
+   */
+
+  /// CHECK-START: int Main.$opt$noinline$notMultipleUses(int, int) instruction_simplifier (before)
+  /// CHECK:       <<P1:i\d+>>          ParameterValue
+  /// CHECK:       <<P2:i\d+>>          ParameterValue
+  /// CHECK:       <<CstM1:i\d+>>       IntConstant -1
+  /// CHECK:       <<One:i\d+>>         IntConstant 1
+  /// CHECK:       <<Not2:i\d+>>        Xor [<<P2>>,<<CstM1>>]
+  /// CHECK:       <<And2:i\d+>>        And [<<Not2>>,<<One>>]
+  /// CHECK:       <<Not1:i\d+>>        Xor [<<P1>>,<<CstM1>>]
+  /// CHECK:       <<And1:i\d+>>        And [<<Not1>>,<<Not2>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<And2>>,<<And1>>]
+  /// CHECK:                            Return [<<Add>>]
+
+  /// CHECK-START: int Main.$opt$noinline$notMultipleUses(int, int) instruction_simplifier (after)
+  /// CHECK:       <<P1:i\d+>>          ParameterValue
+  /// CHECK:       <<P2:i\d+>>          ParameterValue
+  /// CHECK:       <<One:i\d+>>         IntConstant 1
+  /// CHECK:       <<Not2:i\d+>>        Not [<<P2>>]
+  /// CHECK:       <<And2:i\d+>>        And [<<Not2>>,<<One>>]
+  /// CHECK:       <<Not1:i\d+>>        Not [<<P1>>]
+  /// CHECK:       <<And1:i\d+>>        And [<<Not1>>,<<Not2>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<And2>>,<<And1>>]
+  /// CHECK:                            Return [<<Add>>]
+
+  /// CHECK-START: int Main.$opt$noinline$notMultipleUses(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:                        Or
+
+  public static int $opt$noinline$notMultipleUses(int a, int b) {
+    if (doThrow) throw new Error();
+    int tmp = ~b;
+    return (tmp & 0x1) + (~a & tmp);
+  }
+
+  public static void main(String[] args) {
+    assertIntEquals(~0xff, $opt$noinline$andToOr(0xf, 0xff));
+    assertLongEquals(~0xf, $opt$noinline$orToAnd(0xf, 0xff));
+    assertIntEquals(0xf0, $opt$noinline$notXorToXor(0xf, 0xff));
+    assertIntEquals(~0xff, $opt$noinline$notMultipleUses(0xf, 0xff));
+  }
+}

diff --git a/test/569-checker-pattern-replacement/src/Main.java b/test/569-checker-pattern-replacement/src/Main.java
index 9a85c81..e2d451c 100644
--- a/test/569-checker-pattern-replacement/src/Main.java
+++ b/test/569-checker-pattern-replacement/src/Main.java

@@ -39,7 +39,8 @@
     /// CHECK-DAG:  <<Value:l\d+>>      ParameterValue
     /// CHECK-DAG:  <<Ignored:i\d+>>    IntConstant 77
     /// CHECK-DAG:  <<ClinitCk:l\d+>>   ClinitCheck
-    /// CHECK-DAG:  <<Invoke:l\d+>>     InvokeStaticOrDirect [<<Ignored>>,<<Value>>,<<ClinitCk>>]
+    // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+    /// CHECK-DAG:  <<Invoke:l\d+>>     InvokeStaticOrDirect [<<Ignored>>,<<Value>>{{(,[ij]\d+)?}},<<ClinitCk>>]
     /// CHECK-DAG:                      Return [<<Invoke>>]
 
     /// CHECK-START: java.lang.Object Main.staticReturnArg2(java.lang.String) inliner (after)
@@ -313,7 +314,8 @@
 
     /// CHECK-START: java.lang.Object Main.newObject() inliner (before)
     /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
-    /// CHECK-DAG:              InvokeStaticOrDirect [<<Obj>>] method_name:java.lang.Object.<init>
+    // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+    /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>{{(,[ij]\d+)?}}] method_name:java.lang.Object.<init>
 
     /// CHECK-START: java.lang.Object Main.newObject() inliner (after)
     /// CHECK-NOT:                      InvokeStaticOrDirect

diff --git a/test/570-checker-select/expected.txt b/test/570-checker-select/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/570-checker-select/expected.txt


diff --git a/test/570-checker-select/info.txt b/test/570-checker-select/info.txt
new file mode 100644
index 0000000..6d49532
--- /dev/null
+++ b/test/570-checker-select/info.txt

@@ -0,0 +1 @@
+Tests for HSelect codegens.

diff --git a/test/570-checker-select/src/Main.java b/test/570-checker-select/src/Main.java
new file mode 100644
index 0000000..2f8094d
--- /dev/null
+++ b/test/570-checker-select/src/Main.java

@@ -0,0 +1,195 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: int Main.BoolCond_IntVarVar(boolean, int, int) register (after)
+  /// CHECK:               Select [{{i\d+}},{{i\d+}},{{z\d+}}]
+
+  public static int BoolCond_IntVarVar(boolean cond, int x, int y) {
+    return cond ? x : y;
+  }
+
+  /// CHECK-START: int Main.BoolCond_IntVarCst(boolean, int) register (after)
+  /// CHECK:               Select [{{i\d+}},{{i\d+}},{{z\d+}}]
+
+  public static int BoolCond_IntVarCst(boolean cond, int x) {
+    return cond ? x : 1;
+  }
+
+  /// CHECK-START: int Main.BoolCond_IntCstVar(boolean, int) register (after)
+  /// CHECK:               Select [{{i\d+}},{{i\d+}},{{z\d+}}]
+
+  public static int BoolCond_IntCstVar(boolean cond, int y) {
+    return cond ? 1 : y;
+  }
+
+  /// CHECK-START: float Main.BoolCond_FloatVarVar(boolean, float, float) register (after)
+  /// CHECK:               Select [{{f\d+}},{{f\d+}},{{z\d+}}]
+
+  public static float BoolCond_FloatVarVar(boolean cond, float x, float y) {
+    return cond ? x : y;
+  }
+
+  /// CHECK-START: float Main.BoolCond_FloatVarCst(boolean, float) register (after)
+  /// CHECK:               Select [{{f\d+}},{{f\d+}},{{z\d+}}]
+
+  public static float BoolCond_FloatVarCst(boolean cond, float x) {
+    return cond ? x : 1.0f;
+  }
+
+  /// CHECK-START: float Main.BoolCond_FloatCstVar(boolean, float) register (after)
+  /// CHECK:               Select [{{f\d+}},{{f\d+}},{{z\d+}}]
+
+  public static float BoolCond_FloatCstVar(boolean cond, float y) {
+    return cond ? 1.0f : y;
+  }
+
+  /// CHECK-START: int Main.IntNonmatCond_IntVarVar(int, int, int, int) register (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK-NEXT:                     Select [{{i\d+}},{{i\d+}},<<Cond>>]
+
+  public static int IntNonmatCond_IntVarVar(int a, int b, int x, int y) {
+    return a > b ? x : y;
+  }
+
+  /// CHECK-START: int Main.IntMatCond_IntVarVar(int, int, int, int) register (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK-NEXT:       <<Sel:i\d+>>  Select [{{i\d+}},{{i\d+}},{{z\d+}}]
+  /// CHECK-NEXT:                     Add [<<Cond>>,<<Sel>>]
+
+  public static int IntMatCond_IntVarVar(int a, int b, int x, int y) {
+    int result = (a > b ? x : y);
+    return result + (a > b ? 0 : 1);
+  }
+
+  /// CHECK-START: int Main.FloatLtNonmatCond_IntVarVar(float, float, int, int) register (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{f\d+}},{{f\d+}}]
+  /// CHECK-NEXT:                     Select [{{i\d+}},{{i\d+}},<<Cond>>]
+
+  public static int FloatLtNonmatCond_IntVarVar(float a, float b, int x, int y) {
+    return a > b ? x : y;
+  }
+
+  /// CHECK-START: int Main.FloatGtNonmatCond_IntVarVar(float, float, int, int) register (after)
+  /// CHECK:            <<Cond:z\d+>> GreaterThanOrEqual [{{f\d+}},{{f\d+}}]
+  /// CHECK-NEXT:                     Select [{{i\d+}},{{i\d+}},<<Cond>>]
+
+  public static int FloatGtNonmatCond_IntVarVar(float a, float b, int x, int y) {
+    return a < b ? x : y;
+  }
+
+  /// CHECK-START: float Main.FloatGtNonmatCond_FloatVarVar(float, float, float, float) register (after)
+  /// CHECK:            <<Cond:z\d+>> GreaterThanOrEqual [{{f\d+}},{{f\d+}}]
+  /// CHECK-NEXT:                     Select [{{f\d+}},{{f\d+}},<<Cond>>]
+
+  public static float FloatGtNonmatCond_FloatVarVar(float a, float b, float x, float y) {
+    return a < b ? x : y;
+  }
+
+  /// CHECK-START: int Main.FloatLtMatCond_IntVarVar(float, float, int, int) register (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{f\d+}},{{f\d+}}]
+  /// CHECK-NEXT:       <<Sel:i\d+>>  Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK-NEXT:                     Add [<<Cond>>,<<Sel>>]
+
+  public static int FloatLtMatCond_IntVarVar(float a, float b, int x, int y) {
+    int result = (a > b ? x : y);
+    return result + (a > b ? 0 : 1);
+  }
+
+  /// CHECK-START: int Main.FloatGtMatCond_IntVarVar(float, float, int, int) register (after)
+  /// CHECK:            <<Cond:z\d+>> GreaterThanOrEqual [{{f\d+}},{{f\d+}}]
+  /// CHECK-NEXT:       <<Sel:i\d+>>  Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK-NEXT:                     Add [<<Cond>>,<<Sel>>]
+
+  public static int FloatGtMatCond_IntVarVar(float a, float b, int x, int y) {
+    int result = (a < b ? x : y);
+    return result + (a < b ? 0 : 1);
+  }
+
+  /// CHECK-START: float Main.FloatGtMatCond_FloatVarVar(float, float, float, float) register (after)
+  /// CHECK:            <<Cond:z\d+>> GreaterThanOrEqual
+  /// CHECK-NEXT:       <<Sel:f\d+>>  Select [{{f\d+}},{{f\d+}},<<Cond>>]
+  /// CHECK-NEXT:                     TypeConversion [<<Cond>>]
+
+  public static float FloatGtMatCond_FloatVarVar(float a, float b, float x, float y) {
+    float result = (a < b ? x : y);
+    return result + (a < b ? 0 : 1);
+  }
+
+  public static void assertEqual(int expected, int actual) {
+    if (expected != actual) {
+      throw new Error("Assertion failed: " + expected + " != " + actual);
+    }
+  }
+
+  public static void assertEqual(float expected, float actual) {
+    if (expected != actual) {
+      throw new Error("Assertion failed: " + expected + " != " + actual);
+    }
+  }
+
+  public static void main(String[] args) {
+    assertEqual(5, BoolCond_IntVarVar(true, 5, 7));
+    assertEqual(7, BoolCond_IntVarVar(false, 5, 7));
+    assertEqual(5, BoolCond_IntVarCst(true, 5));
+    assertEqual(1, BoolCond_IntVarCst(false, 5));
+    assertEqual(1, BoolCond_IntCstVar(true, 7));
+    assertEqual(7, BoolCond_IntCstVar(false, 7));
+
+    assertEqual(5, BoolCond_FloatVarVar(true, 5, 7));
+    assertEqual(7, BoolCond_FloatVarVar(false, 5, 7));
+    assertEqual(5, BoolCond_FloatVarCst(true, 5));
+    assertEqual(1, BoolCond_FloatVarCst(false, 5));
+    assertEqual(1, BoolCond_FloatCstVar(true, 7));
+    assertEqual(7, BoolCond_FloatCstVar(false, 7));
+
+    assertEqual(5, IntNonmatCond_IntVarVar(3, 2, 5, 7));
+    assertEqual(7, IntNonmatCond_IntVarVar(2, 3, 5, 7));
+    assertEqual(5, IntMatCond_IntVarVar(3, 2, 5, 7));
+    assertEqual(8, IntMatCond_IntVarVar(2, 3, 5, 7));
+
+    assertEqual(5, FloatLtNonmatCond_IntVarVar(3, 2, 5, 7));
+    assertEqual(7, FloatLtNonmatCond_IntVarVar(2, 3, 5, 7));
+    assertEqual(7, FloatLtNonmatCond_IntVarVar(Float.NaN, 2, 5, 7));
+    assertEqual(7, FloatLtNonmatCond_IntVarVar(2, Float.NaN, 5, 7));
+
+    assertEqual(5, FloatGtNonmatCond_IntVarVar(2, 3, 5, 7));
+    assertEqual(7, FloatGtNonmatCond_IntVarVar(3, 2, 5, 7));
+    assertEqual(7, FloatGtNonmatCond_IntVarVar(Float.NaN, 2, 5, 7));
+    assertEqual(7, FloatGtNonmatCond_IntVarVar(2, Float.NaN, 5, 7));
+
+    assertEqual(5, FloatGtNonmatCond_FloatVarVar(2, 3, 5, 7));
+    assertEqual(7, FloatGtNonmatCond_FloatVarVar(3, 2, 5, 7));
+    assertEqual(7, FloatGtNonmatCond_FloatVarVar(Float.NaN, 2, 5, 7));
+    assertEqual(7, FloatGtNonmatCond_FloatVarVar(2, Float.NaN, 5, 7));
+
+    assertEqual(5, FloatLtMatCond_IntVarVar(3, 2, 5, 7));
+    assertEqual(8, FloatLtMatCond_IntVarVar(2, 3, 5, 7));
+    assertEqual(8, FloatLtMatCond_IntVarVar(Float.NaN, 2, 5, 7));
+    assertEqual(8, FloatLtMatCond_IntVarVar(2, Float.NaN, 5, 7));
+
+    assertEqual(5, FloatGtMatCond_IntVarVar(2, 3, 5, 7));
+    assertEqual(8, FloatGtMatCond_IntVarVar(3, 2, 5, 7));
+    assertEqual(8, FloatGtMatCond_IntVarVar(Float.NaN, 2, 5, 7));
+    assertEqual(8, FloatGtMatCond_IntVarVar(2, Float.NaN, 5, 7));
+
+    assertEqual(5, FloatGtMatCond_FloatVarVar(2, 3, 5, 7));
+    assertEqual(8, FloatGtMatCond_FloatVarVar(3, 2, 5, 7));
+    assertEqual(8, FloatGtMatCond_FloatVarVar(Float.NaN, 2, 5, 7));
+    assertEqual(8, FloatGtMatCond_FloatVarVar(2, Float.NaN, 5, 7));
+  }
+}

diff --git a/test/571-irreducible-loop/expected.txt b/test/571-irreducible-loop/expected.txt
new file mode 100644
index 0000000..3a71184
--- /dev/null
+++ b/test/571-irreducible-loop/expected.txt

@@ -0,0 +1 @@
+5.9E-44

diff --git a/test/571-irreducible-loop/info.txt b/test/571-irreducible-loop/info.txt
new file mode 100644
index 0000000..1e0dd02
--- /dev/null
+++ b/test/571-irreducible-loop/info.txt

@@ -0,0 +1,2 @@
+Regression test for optimizing in the presence of
+an irreducible loop.

diff --git a/test/571-irreducible-loop/smali/IrreducibleLoop.smali b/test/571-irreducible-loop/smali/IrreducibleLoop.smali
new file mode 100644
index 0000000..737a18b
--- /dev/null
+++ b/test/571-irreducible-loop/smali/IrreducibleLoop.smali

@@ -0,0 +1,47 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LIrreducibleLoop;
+
+.super Ljava/lang/Object;
+
+# Check that on x86 we don't crash because irreducible loops
+# disabled the constant pool optimization.
+.method public static test1(IF)F
+   .registers 5
+   const/16 v0, 1
+   const/16 v1, 42
+
+   if-nez p0, :loop_entry
+   goto :other_loop_pre_entry
+
+   # The then part: beginning of the irreducible loop.
+   :loop_entry
+   if-eqz p0, :exit
+   add-float v2, p1, v1
+   sub-float v2, v2, v1
+   div-float v2, v2, v1
+   mul-float v2, v2, v1
+   :other_loop_entry
+   sub-int p0, p0, v0
+   goto :loop_entry
+
+   # The other block branching to the irreducible loop.
+   # In that block, v4 has no live range.
+   :other_loop_pre_entry
+   goto :other_loop_entry
+
+   :exit
+   return v1
+.end method

diff --git a/test/571-irreducible-loop/src/Main.java b/test/571-irreducible-loop/src/Main.java
new file mode 100644
index 0000000..ff22f67
--- /dev/null
+++ b/test/571-irreducible-loop/src/Main.java

@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("IrreducibleLoop");
+    Method m = c.getMethod("test1", int.class, float.class);
+    Object[] arguments = { 42, 31.0f };
+    System.out.println(m.invoke(null, arguments));
+  }
+}

diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 9296621..a8938fa 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk

@@ -459,10 +459,7 @@
 
 # Known broken tests for the mips32 optimizing compiler backend.
 TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS := \
-    441-checker-inliner \
     510-checker-try-catch \
-    536-checker-intrinsic-optimization \
-    557-checker-instruction-simplifier-ror \
 
 ifeq (mips,$(TARGET_ARCH))
   ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
@@ -477,7 +474,6 @@
 
 # Known broken tests for the mips64 optimizing compiler backend.
 TEST_ART_BROKEN_OPTIMIZING_MIPS64_RUN_TESTS := \
-    557-checker-instruction-simplifier-ror \
 
 ifeq (mips64,$(TARGET_ARCH))
   ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
@@ -604,18 +600,6 @@
 TEST_ART_BROKEN_DEFAULT_HEAP_POISONING_RUN_TESTS :=
 TEST_ART_BROKEN_OPTIMIZING_HEAP_POISONING_RUN_TESTS :=
 
-# Tests broken by multi-image.
-TEST_ART_BROKEN_MULTI_IMAGE_RUN_TESTS := \
-  476-checker-ctor-memory-barrier \
-  530-checker-lse
-
-ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-    $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-    $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), \
-    $(TEST_ART_BROKEN_MULTI_IMAGE_RUN_TESTS),  $(ALL_ADDRESS_SIZES))
-
-TEST_ART_BROKEN_MULTI_IMAGE_RUN_TESTS :=
-
 # Clear variables ahead of appending to them when defining tests.
 $(foreach target, $(TARGET_TYPES), $(eval ART_RUN_TEST_$(call name-to-var,$(target))_RULES :=))
 $(foreach target, $(TARGET_TYPES), \

diff --git a/tools/libcore_failures_concurrent_collector.txt b/tools/libcore_failures_concurrent_collector.txt
index 95d1292..d8ef9ba 100644
--- a/tools/libcore_failures_concurrent_collector.txt
+++ b/tools/libcore_failures_concurrent_collector.txt

@@ -27,7 +27,8 @@
   description: "TimeoutException on host-{x86,x86-64}-concurrent-collector",
   result: EXEC_FAILED,
   modes: [host],
-  names: ["libcore.java.util.zip.DeflaterOutputStreamTest#testSyncFlushDisabled",
+  names: ["libcore.java.util.zip.DeflaterOutputStreamTest#testSyncFlushEnabled",
+          "libcore.java.util.zip.DeflaterOutputStreamTest#testSyncFlushDisabled",
           "libcore.java.util.zip.GZIPOutputStreamTest#testSyncFlushEnabled",
           "libcore.java.util.zip.OldAndroidGZIPStreamTest#testGZIPStream",
           "libcore.java.util.zip.OldAndroidZipStreamTest#testZipStream",
@@ -40,7 +41,8 @@
   result: EXEC_FAILED,
   modes: [device],
   names: ["libcore.icu.RelativeDateTimeFormatterTest#test_bug25821045",
-          "libcore.java.text.SimpleDateFormatTest#testLocales"],
+          "libcore.java.text.SimpleDateFormatTest#testLocales",
+          "libcore.java.util.zip.ZipFileTest#testZipFileWithLotsOfEntries"],
   bug: 26711853
 }
 ]
commit	bd707ab60fa33ec1ba921ee0c7d3b24d70f00a54	[log] [tgz]
author	Igor Murashkin <iam@google.com>	Thu Feb 04 23:30:13 2016 +0000
committer	Gerrit Code Review <noreply-gerritcodereview@google.com>	Thu Feb 04 23:30:13 2016 +0000
tree	445bb30d2a2dfeede4653d10bbec8017e203cb5b
parent	a4e981265dd57adbe755e12a814c0f30ae073c2f [diff]
parent	8d1da85822f254f102dfec2903ca7aa0064444af [diff]