Merge "ART: Rewrite stub-test inline assembly"
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 9775f6a..70c9dc1 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -348,6 +348,7 @@
 
 COMPILER_GTEST_HOST_SRC_FILES_mips := \
   $(COMPILER_GTEST_COMMON_SRC_FILES_mips) \
+  compiler/utils/mips/assembler_mips_test.cc \
 
 COMPILER_GTEST_HOST_SRC_FILES_mips64 := \
   $(COMPILER_GTEST_COMMON_SRC_FILES_mips64) \
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 96e13ac..8e3b555 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -210,7 +210,8 @@
   dex/quick/arm64/arm64_lir.h
 
 LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_mips := \
-  dex/quick/mips/mips_lir.h
+  dex/quick/mips/mips_lir.h \
+  utils/mips/assembler_mips.h
 
 LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_mips64 := \
   $(LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_mips)
diff --git a/compiler/jni/jni_cfi_test_expected.inc b/compiler/jni/jni_cfi_test_expected.inc
index 8b5fdc3..5af2242 100644
--- a/compiler/jni/jni_cfi_test_expected.inc
+++ b/compiler/jni/jni_cfi_test_expected.inc
@@ -325,84 +325,73 @@
 // 0x0000007f: .cfi_def_cfa_offset: 128
 
 static constexpr uint8_t expected_asm_kMips[] = {
-    0xC0, 0xFF, 0xBD, 0x27, 0x3C, 0x00, 0xBF, 0xAF, 0x38, 0x00, 0xB8, 0xAF,
-    0x34, 0x00, 0xAF, 0xAF, 0x30, 0x00, 0xAE, 0xAF, 0x2C, 0x00, 0xAD, 0xAF,
-    0x28, 0x00, 0xAC, 0xAF, 0x24, 0x00, 0xAB, 0xAF, 0x20, 0x00, 0xAA, 0xAF,
-    0x1C, 0x00, 0xA9, 0xAF, 0x18, 0x00, 0xA8, 0xAF, 0x00, 0x00, 0xA4, 0xAF,
-    0x44, 0x00, 0xA5, 0xAF, 0x48, 0x00, 0xAC, 0xE7, 0x4C, 0x00, 0xA6, 0xAF,
-    0x50, 0x00, 0xA7, 0xAF, 0xE0, 0xFF, 0xBD, 0x27, 0x20, 0x00, 0xBD, 0x27,
-    0x18, 0x00, 0xA8, 0x8F, 0x1C, 0x00, 0xA9, 0x8F, 0x20, 0x00, 0xAA, 0x8F,
-    0x24, 0x00, 0xAB, 0x8F, 0x28, 0x00, 0xAC, 0x8F, 0x2C, 0x00, 0xAD, 0x8F,
-    0x30, 0x00, 0xAE, 0x8F, 0x34, 0x00, 0xAF, 0x8F, 0x38, 0x00, 0xB8, 0x8F,
-    0x3C, 0x00, 0xBF, 0x8F, 0x40, 0x00, 0xBD, 0x27, 0x09, 0x00, 0xE0, 0x03,
-    0x00, 0x00, 0x00, 0x00,
+    0xC0, 0xFF, 0xBD, 0x27, 0x3C, 0x00, 0xBF, 0xAF, 0x38, 0x00, 0xBE, 0xAF,
+    0x34, 0x00, 0xB7, 0xAF, 0x30, 0x00, 0xB6, 0xAF, 0x2C, 0x00, 0xB5, 0xAF,
+    0x28, 0x00, 0xB4, 0xAF, 0x24, 0x00, 0xB3, 0xAF, 0x20, 0x00, 0xB2, 0xAF,
+    0x00, 0x00, 0xA4, 0xAF, 0x44, 0x00, 0xA5, 0xAF, 0x48, 0x00, 0xAC, 0xE7,
+    0x4C, 0x00, 0xA6, 0xAF, 0x50, 0x00, 0xA7, 0xAF, 0xE0, 0xFF, 0xBD, 0x27,
+    0x20, 0x00, 0xBD, 0x27, 0x20, 0x00, 0xB2, 0x8F, 0x24, 0x00, 0xB3, 0x8F,
+    0x28, 0x00, 0xB4, 0x8F, 0x2C, 0x00, 0xB5, 0x8F, 0x30, 0x00, 0xB6, 0x8F,
+    0x34, 0x00, 0xB7, 0x8F, 0x38, 0x00, 0xBE, 0x8F, 0x3C, 0x00, 0xBF, 0x8F,
+    0x40, 0x00, 0xBD, 0x27, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
 };
 static constexpr uint8_t expected_cfi_kMips[] = {
-    0x44, 0x0E, 0x40, 0x44, 0x9F, 0x01, 0x44, 0x98, 0x02, 0x44, 0x8F, 0x03,
-    0x44, 0x8E, 0x04, 0x44, 0x8D, 0x05, 0x44, 0x8C, 0x06, 0x44, 0x8B, 0x07,
-    0x44, 0x8A, 0x08, 0x44, 0x89, 0x09, 0x44, 0x88, 0x0A, 0x58, 0x0E, 0x60,
-    0x44, 0x0E, 0x40, 0x0A, 0x44, 0xC8, 0x44, 0xC9, 0x44, 0xCA, 0x44, 0xCB,
-    0x44, 0xCC, 0x44, 0xCD, 0x44, 0xCE, 0x44, 0xCF, 0x44, 0xD8, 0x44, 0xDF,
-    0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+    0x44, 0x0E, 0x40, 0x44, 0x9F, 0x01, 0x44, 0x9E, 0x02, 0x44, 0x97, 0x03,
+    0x44, 0x96, 0x04, 0x44, 0x95, 0x05, 0x44, 0x94, 0x06, 0x44, 0x93, 0x07,
+    0x44, 0x92, 0x08, 0x58, 0x0E, 0x60, 0x44, 0x0E, 0x40, 0x0A, 0x44, 0xD2,
+    0x44, 0xD3, 0x44, 0xD4, 0x44, 0xD5, 0x44, 0xD6, 0x44, 0xD7, 0x44, 0xDE,
+    0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
 };
 // 0x00000000: addiu r29, r29, -64
 // 0x00000004: .cfi_def_cfa_offset: 64
 // 0x00000004: sw r31, +60(r29)
 // 0x00000008: .cfi_offset: r31 at cfa-4
-// 0x00000008: sw r24, +56(r29)
-// 0x0000000c: .cfi_offset: r24 at cfa-8
-// 0x0000000c: sw r15, +52(r29)
-// 0x00000010: .cfi_offset: r15 at cfa-12
-// 0x00000010: sw r14, +48(r29)
-// 0x00000014: .cfi_offset: r14 at cfa-16
-// 0x00000014: sw r13, +44(r29)
-// 0x00000018: .cfi_offset: r13 at cfa-20
-// 0x00000018: sw r12, +40(r29)
-// 0x0000001c: .cfi_offset: r12 at cfa-24
-// 0x0000001c: sw r11, +36(r29)
-// 0x00000020: .cfi_offset: r11 at cfa-28
-// 0x00000020: sw r10, +32(r29)
-// 0x00000024: .cfi_offset: r10 at cfa-32
-// 0x00000024: sw r9, +28(r29)
-// 0x00000028: .cfi_offset: r9 at cfa-36
-// 0x00000028: sw r8, +24(r29)
-// 0x0000002c: .cfi_offset: r8 at cfa-40
-// 0x0000002c: sw r4, +0(r29)
-// 0x00000030: sw r5, +68(r29)
-// 0x00000034: swc1 f12, +72(r29)
-// 0x00000038: sw r6, +76(r29)
-// 0x0000003c: sw r7, +80(r29)
-// 0x00000040: addiu r29, r29, -32
-// 0x00000044: .cfi_def_cfa_offset: 96
-// 0x00000044: addiu r29, r29, 32
-// 0x00000048: .cfi_def_cfa_offset: 64
-// 0x00000048: .cfi_remember_state
-// 0x00000048: lw r8, +24(r29)
-// 0x0000004c: .cfi_restore: r8
-// 0x0000004c: lw r9, +28(r29)
-// 0x00000050: .cfi_restore: r9
-// 0x00000050: lw r10, +32(r29)
-// 0x00000054: .cfi_restore: r10
-// 0x00000054: lw r11, +36(r29)
-// 0x00000058: .cfi_restore: r11
-// 0x00000058: lw r12, +40(r29)
-// 0x0000005c: .cfi_restore: r12
-// 0x0000005c: lw r13, +44(r29)
-// 0x00000060: .cfi_restore: r13
-// 0x00000060: lw r14, +48(r29)
-// 0x00000064: .cfi_restore: r14
-// 0x00000064: lw r15, +52(r29)
-// 0x00000068: .cfi_restore: r15
-// 0x00000068: lw r24, +56(r29)
-// 0x0000006c: .cfi_restore: r24
-// 0x0000006c: lw r31, +60(r29)
-// 0x00000070: .cfi_restore: r31
-// 0x00000070: addiu r29, r29, 64
-// 0x00000074: .cfi_def_cfa_offset: 0
-// 0x00000074: jr r31
-// 0x00000078: nop
-// 0x0000007c: .cfi_restore_state
-// 0x0000007c: .cfi_def_cfa_offset: 64
+// 0x00000008: sw r30, +56(r29)
+// 0x0000000c: .cfi_offset: r30 at cfa-8
+// 0x0000000c: sw r23, +52(r29)
+// 0x00000010: .cfi_offset: r23 at cfa-12
+// 0x00000010: sw r22, +48(r29)
+// 0x00000014: .cfi_offset: r22 at cfa-16
+// 0x00000014: sw r21, +44(r29)
+// 0x00000018: .cfi_offset: r21 at cfa-20
+// 0x00000018: sw r20, +40(r29)
+// 0x0000001c: .cfi_offset: r20 at cfa-24
+// 0x0000001c: sw r19, +36(r29)
+// 0x00000020: .cfi_offset: r19 at cfa-28
+// 0x00000020: sw r18, +32(r29)
+// 0x00000024: .cfi_offset: r18 at cfa-32
+// 0x00000024: sw r4, +0(r29)
+// 0x00000028: sw r5, +68(r29)
+// 0x0000002c: swc1 f12, +72(r29)
+// 0x00000030: sw r6, +76(r29)
+// 0x00000034: sw r7, +80(r29)
+// 0x00000038: addiu r29, r29, -32
+// 0x0000003c: .cfi_def_cfa_offset: 96
+// 0x0000003c: addiu r29, r29, 32
+// 0x00000040: .cfi_def_cfa_offset: 64
+// 0x00000040: .cfi_remember_state
+// 0x00000040: lw r18, +32(r29)
+// 0x00000044: .cfi_restore: r18
+// 0x00000044: lw r19, +36(r29)
+// 0x00000048: .cfi_restore: r19
+// 0x00000048: lw r20, +40(r29)
+// 0x0000004c: .cfi_restore: r20
+// 0x0000004c: lw r21, +44(r29)
+// 0x00000050: .cfi_restore: r21
+// 0x00000050: lw r22, +48(r29)
+// 0x00000054: .cfi_restore: r22
+// 0x00000054: lw r23, +52(r29)
+// 0x00000058: .cfi_restore: r23
+// 0x00000058: lw r30, +56(r29)
+// 0x0000005c: .cfi_restore: r30
+// 0x0000005c: lw r31, +60(r29)
+// 0x00000060: .cfi_restore: r31
+// 0x00000060: addiu r29, r29, 64
+// 0x00000064: .cfi_def_cfa_offset: 0
+// 0x00000064: jr r31
+// 0x00000068: nop
+// 0x0000006c: .cfi_restore_state
+// 0x0000006c: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kMips64[] = {
     0xA0, 0xFF, 0xBD, 0x67, 0x58, 0x00, 0xBF, 0xFF, 0x50, 0x00, 0xBE, 0xFF,
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 953dfcb..34f0802 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -67,6 +67,7 @@
   const bool is_synchronized = (access_flags & kAccSynchronized) != 0;
   const char* shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx));
   InstructionSet instruction_set = driver->GetInstructionSet();
+  const InstructionSetFeatures* instruction_set_features = driver->GetInstructionSetFeatures();
   const bool is_64_bit_target = Is64BitInstructionSet(instruction_set);
   // Calling conventions used to iterate over parameters to method
   std::unique_ptr<JniCallingConvention> main_jni_conv(
@@ -93,7 +94,7 @@
       JniCallingConvention::Create(is_static, is_synchronized, jni_end_shorty, instruction_set));
 
   // Assembler that holds generated instructions
-  std::unique_ptr<Assembler> jni_asm(Assembler::Create(instruction_set));
+  std::unique_ptr<Assembler> jni_asm(Assembler::Create(instruction_set, instruction_set_features));
   jni_asm->cfi().SetEnabled(driver->GetCompilerOptions().GetGenerateDebugInfo());
 
   // Offsets into data structures
diff --git a/compiler/jni/quick/mips/calling_convention_mips.cc b/compiler/jni/quick/mips/calling_convention_mips.cc
index be2397f..ecf143d 100644
--- a/compiler/jni/quick/mips/calling_convention_mips.cc
+++ b/compiler/jni/quick/mips/calling_convention_mips.cc
@@ -162,22 +162,19 @@
   }
   padding_ = padding;
 
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(T0));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(T1));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(T2));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(T3));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(T4));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(T5));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(T6));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(T7));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(T8));
+  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S2));
+  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S3));
+  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S4));
+  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S5));
+  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S6));
+  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S7));
+  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(FP));
 }
 
 uint32_t MipsJniCallingConvention::CoreSpillMask() const {
   // Compute spill mask to agree with callee saves initialized in the constructor
   uint32_t result = 0;
-  result = 1 << T0 | 1 << T1 | 1 << T2 | 1 << T3 | 1 << T4 | 1 << T5 | 1 << T6 |
-           1 << T7 | 1 << T8 | 1 << RA;
+  result = 1 << S2 | 1 << S3 | 1 << S4 | 1 << S5 | 1 << S6 | 1 << S7 | 1 << FP | 1 << RA;
   return result;
 }
 
diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc
index 496ca95..b01b0fe 100644
--- a/compiler/utils/assembler.cc
+++ b/compiler/utils/assembler.cc
@@ -122,7 +122,8 @@
   this->AdvancePC(assembler_->CodeSize());
 }
 
-Assembler* Assembler::Create(InstructionSet instruction_set) {
+Assembler* Assembler::Create(InstructionSet instruction_set,
+                             const InstructionSetFeatures* instruction_set_features) {
   switch (instruction_set) {
 #ifdef ART_ENABLE_CODEGEN_arm
     case kArm:
@@ -136,7 +137,9 @@
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips
     case kMips:
-      return new mips::MipsAssembler();
+      return new mips::MipsAssembler(instruction_set_features != nullptr
+                                         ? instruction_set_features->AsMipsInstructionSetFeatures()
+                                         : nullptr);
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips64
     case kMips64:
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index 1088cb1..f1c0b92 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -20,6 +20,7 @@
 #include <vector>
 
 #include "arch/instruction_set.h"
+#include "arch/instruction_set_features.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "arm/constants_arm.h"
@@ -284,7 +285,8 @@
 
 class Assembler {
  public:
-  static Assembler* Create(InstructionSet instruction_set);
+  static Assembler* Create(InstructionSet instruction_set,
+                           const InstructionSetFeatures* instruction_set_features = nullptr);
 
   // Finalize the code; emit slow paths, fixup branches, add literal pool, etc.
   virtual void FinalizeCode() { buffer_.EmitSlowPaths(this); }
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index bd994f4..b30f7d7 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -129,13 +129,14 @@
     return RepeatRegisterImm<RegisterView::kUseSecondaryName>(f, imm_bytes, fmt);
   }
 
-  template <typename Reg1Type, typename Reg2Type, typename ImmType,
-            RegisterView Reg1View, RegisterView Reg2View>
-  std::string RepeatRegRegImmBits(void (Ass::*f)(Reg1Type, Reg2Type, ImmType),
-                                  int imm_bits,
-                                  std::string fmt) {
-    const std::vector<Reg1Type*> reg1_registers = GetRegisters();
-    const std::vector<Reg2Type*> reg2_registers = GetRegisters();
+  template <typename Reg1, typename Reg2, typename ImmType>
+  std::string RepeatTemplatedRegistersImmBits(void (Ass::*f)(Reg1, Reg2, ImmType),
+                                              int imm_bits,
+                                              const std::vector<Reg1*> reg1_registers,
+                                              const std::vector<Reg2*> reg2_registers,
+                                              std::string (AssemblerTest::*GetName1)(const Reg1&),
+                                              std::string (AssemblerTest::*GetName2)(const Reg2&),
+                                              std::string fmt) {
     std::string str;
     std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), imm_bits > 0);
 
@@ -146,13 +147,13 @@
           (assembler_.get()->*f)(*reg1, *reg2, new_imm);
           std::string base = fmt;
 
-          std::string reg1_string = GetRegName<Reg1View>(*reg1);
+          std::string reg1_string = (this->*GetName1)(*reg1);
           size_t reg1_index;
           while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) {
             base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string);
           }
 
-          std::string reg2_string = GetRegName<Reg2View>(*reg2);
+          std::string reg2_string = (this->*GetName2)(*reg2);
           size_t reg2_index;
           while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) {
             base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string);
@@ -178,15 +179,75 @@
     return str;
   }
 
-  template <typename Reg1Type, typename Reg2Type, typename ImmType>
-  std::string RepeatRRIb(void (Ass::*f)(Reg1Type, Reg2Type, ImmType),
-                         int imm_bits,
-                         std::string fmt) {
-    return RepeatRegRegImmBits<Reg1Type,
-                               Reg2Type,
-                               ImmType,
-                               RegisterView::kUsePrimaryName,
-                               RegisterView::kUsePrimaryName>(f, imm_bits, fmt);
+  template <typename RegType, typename ImmType>
+  std::string RepeatTemplatedRegisterImmBits(void (Ass::*f)(RegType, ImmType),
+                                              int imm_bits,
+                                              const std::vector<Reg*> registers,
+                                              std::string (AssemblerTest::*GetName)(const RegType&),
+                                              std::string fmt) {
+    std::string str;
+    std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), imm_bits > 0);
+
+    for (auto reg : registers) {
+      for (int64_t imm : imms) {
+        ImmType new_imm = CreateImmediate(imm);
+        (assembler_.get()->*f)(*reg, new_imm);
+        std::string base = fmt;
+
+        std::string reg_string = (this->*GetName)(*reg);
+        size_t reg_index;
+        while ((reg_index = base.find(REG_TOKEN)) != std::string::npos) {
+          base.replace(reg_index, ConstexprStrLen(REG_TOKEN), reg_string);
+        }
+
+        size_t imm_index = base.find(IMM_TOKEN);
+        if (imm_index != std::string::npos) {
+          std::ostringstream sreg;
+          sreg << imm;
+          std::string imm_string = sreg.str();
+          base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string);
+        }
+
+        if (str.size() > 0) {
+          str += "\n";
+        }
+        str += base;
+      }
+    }
+    // Add a newline at the end.
+    str += "\n";
+    return str;
+  }
+
+  template <typename ImmType>
+  std::string RepeatRRIb(void (Ass::*f)(Reg, Reg, ImmType), int imm_bits, std::string fmt) {
+    return RepeatTemplatedRegistersImmBits<Reg, Reg, ImmType>(f,
+        imm_bits,
+        GetRegisters(),
+        GetRegisters(),
+        &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+        &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+        fmt);
+  }
+
+  template <typename ImmType>
+  std::string RepeatRIb(void (Ass::*f)(Reg, ImmType), int imm_bits, std::string fmt) {
+    return RepeatTemplatedRegisterImmBits<Reg, ImmType>(f,
+        imm_bits,
+        GetRegisters(),
+        &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+        fmt);
+  }
+
+  template <typename ImmType>
+  std::string RepeatFRIb(void (Ass::*f)(FPReg, Reg, ImmType), int imm_bits, std::string fmt) {
+    return RepeatTemplatedRegistersImmBits<FPReg, Reg, ImmType>(f,
+        imm_bits,
+        GetFPRegisters(),
+        GetRegisters(),
+        &AssemblerTest::GetFPRegName,
+        &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+        fmt);
   }
 
   std::string RepeatFF(void (Ass::*f)(FPReg, FPReg), std::string fmt) {
diff --git a/compiler/utils/label.h b/compiler/utils/label.h
index ff4a1a4..1038f44 100644
--- a/compiler/utils/label.h
+++ b/compiler/utils/label.h
@@ -70,6 +70,13 @@
  public:
   Label() : position_(0) {}
 
+  Label(Label&& src)
+      : position_(src.position_) {
+    // We must unlink/unbind the src label when moving; if not, calling the destructor on
+    // the src label would fail.
+    src.position_ = 0;
+  }
+
   ~Label() {
     // Assert if label is being destroyed with unresolved branches pending.
     CHECK(!IsLinked());
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index c5fae92..6f35e9e 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -19,6 +19,7 @@
 #include "base/bit_utils.h"
 #include "base/casts.h"
 #include "entrypoints/quick/quick_entrypoints.h"
+#include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "memory_region.h"
 #include "thread.h"
 
@@ -34,172 +35,193 @@
   return os;
 }
 
-void MipsAssembler::Emit(int32_t value) {
-  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  buffer_.Emit<int32_t>(value);
+void MipsAssembler::FinalizeCode() {
+  for (auto& exception_block : exception_blocks_) {
+    EmitExceptionPoll(&exception_block);
+  }
+  PromoteBranches();
+}
+
+void MipsAssembler::FinalizeInstructions(const MemoryRegion& region) {
+  EmitBranches();
+  Assembler::FinalizeInstructions(region);
+}
+
+void MipsAssembler::EmitBranches() {
+  CHECK(!overwriting_);
+  // Switch from appending instructions at the end of the buffer to overwriting
+  // existing instructions (branch placeholders) in the buffer.
+  overwriting_ = true;
+  for (auto& branch : branches_) {
+    EmitBranch(&branch);
+  }
+  overwriting_ = false;
+}
+
+void MipsAssembler::Emit(uint32_t value) {
+  if (overwriting_) {
+    // Branches to labels are emitted into their placeholders here.
+    buffer_.Store<uint32_t>(overwrite_location_, value);
+    overwrite_location_ += sizeof(uint32_t);
+  } else {
+    // Other instructions are simply appended at the end here.
+    AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+    buffer_.Emit<uint32_t>(value);
+  }
 }
 
 void MipsAssembler::EmitR(int opcode, Register rs, Register rt, Register rd, int shamt, int funct) {
   CHECK_NE(rs, kNoRegister);
   CHECK_NE(rt, kNoRegister);
   CHECK_NE(rd, kNoRegister);
-  int32_t encoding = opcode << kOpcodeShift |
-                     static_cast<int32_t>(rs) << kRsShift |
-                     static_cast<int32_t>(rt) << kRtShift |
-                     static_cast<int32_t>(rd) << kRdShift |
-                     shamt << kShamtShift |
-                     funct;
+  uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
+                      static_cast<uint32_t>(rs) << kRsShift |
+                      static_cast<uint32_t>(rt) << kRtShift |
+                      static_cast<uint32_t>(rd) << kRdShift |
+                      shamt << kShamtShift |
+                      funct;
   Emit(encoding);
 }
 
 void MipsAssembler::EmitI(int opcode, Register rs, Register rt, uint16_t imm) {
   CHECK_NE(rs, kNoRegister);
   CHECK_NE(rt, kNoRegister);
-  int32_t encoding = opcode << kOpcodeShift |
-                     static_cast<int32_t>(rs) << kRsShift |
-                     static_cast<int32_t>(rt) << kRtShift |
-                     imm;
+  uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
+                      static_cast<uint32_t>(rs) << kRsShift |
+                      static_cast<uint32_t>(rt) << kRtShift |
+                      imm;
   Emit(encoding);
 }
 
-void MipsAssembler::EmitJ(int opcode, int address) {
-  int32_t encoding = opcode << kOpcodeShift |
-                     address;
+void MipsAssembler::EmitI21(int opcode, Register rs, uint32_t imm21) {
+  CHECK_NE(rs, kNoRegister);
+  CHECK(IsUint<21>(imm21)) << imm21;
+  uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
+                      static_cast<uint32_t>(rs) << kRsShift |
+                      imm21;
   Emit(encoding);
 }
 
-void MipsAssembler::EmitFR(int opcode, int fmt, FRegister ft, FRegister fs, FRegister fd, int funct) {
+void MipsAssembler::EmitI26(int opcode, uint32_t imm26) {
+  CHECK(IsUint<26>(imm26)) << imm26;
+  uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | imm26;
+  Emit(encoding);
+}
+
+void MipsAssembler::EmitFR(int opcode, int fmt, FRegister ft, FRegister fs, FRegister fd,
+                           int funct) {
   CHECK_NE(ft, kNoFRegister);
   CHECK_NE(fs, kNoFRegister);
   CHECK_NE(fd, kNoFRegister);
-  int32_t encoding = opcode << kOpcodeShift |
-                     fmt << kFmtShift |
-                     static_cast<int32_t>(ft) << kFtShift |
-                     static_cast<int32_t>(fs) << kFsShift |
-                     static_cast<int32_t>(fd) << kFdShift |
-                     funct;
+  uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
+                      fmt << kFmtShift |
+                      static_cast<uint32_t>(ft) << kFtShift |
+                      static_cast<uint32_t>(fs) << kFsShift |
+                      static_cast<uint32_t>(fd) << kFdShift |
+                      funct;
   Emit(encoding);
 }
 
-void MipsAssembler::EmitFI(int opcode, int fmt, FRegister rt, uint16_t imm) {
-  CHECK_NE(rt, kNoFRegister);
-  int32_t encoding = opcode << kOpcodeShift |
-                     fmt << kFmtShift |
-                     static_cast<int32_t>(rt) << kRtShift |
-                     imm;
+void MipsAssembler::EmitFI(int opcode, int fmt, FRegister ft, uint16_t imm) {
+  CHECK_NE(ft, kNoFRegister);
+  uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
+                      fmt << kFmtShift |
+                      static_cast<uint32_t>(ft) << kFtShift |
+                      imm;
   Emit(encoding);
 }
 
-void MipsAssembler::EmitBranch(Register rt, Register rs, Label* label, bool equal) {
-  int offset;
-  if (label->IsBound()) {
-    offset = label->Position() - buffer_.Size();
-  } else {
-    // Use the offset field of the branch instruction for linking the sites.
-    offset = label->position_;
-    label->LinkTo(buffer_.Size());
-  }
-  if (equal) {
-    Beq(rt, rs, (offset >> 2) & kBranchOffsetMask);
-  } else {
-    Bne(rt, rs, (offset >> 2) & kBranchOffsetMask);
-  }
-}
-
-void MipsAssembler::EmitJump(Label* label, bool link) {
-  int offset;
-  if (label->IsBound()) {
-    offset = label->Position() - buffer_.Size();
-  } else {
-    // Use the offset field of the jump instruction for linking the sites.
-    offset = label->position_;
-    label->LinkTo(buffer_.Size());
-  }
-  if (link) {
-    Jal((offset >> 2) & kJumpOffsetMask);
-  } else {
-    J((offset >> 2) & kJumpOffsetMask);
-  }
-}
-
-int32_t MipsAssembler::EncodeBranchOffset(int offset, int32_t inst, bool is_jump) {
-  CHECK_ALIGNED(offset, 4);
-  CHECK(IsInt(POPCOUNT(kBranchOffsetMask), offset)) << offset;
-
-  // Properly preserve only the bits supported in the instruction.
-  offset >>= 2;
-  if (is_jump) {
-    offset &= kJumpOffsetMask;
-    return (inst & ~kJumpOffsetMask) | offset;
-  } else {
-    offset &= kBranchOffsetMask;
-    return (inst & ~kBranchOffsetMask) | offset;
-  }
-}
-
-int MipsAssembler::DecodeBranchOffset(int32_t inst, bool is_jump) {
-  // Sign-extend, then left-shift by 2.
-  if (is_jump) {
-    return (((inst & kJumpOffsetMask) << 6) >> 4);
-  } else {
-    return (((inst & kBranchOffsetMask) << 16) >> 14);
-  }
-}
-
-void MipsAssembler::Bind(Label* label, bool is_jump) {
-  CHECK(!label->IsBound());
-  int bound_pc = buffer_.Size();
-  while (label->IsLinked()) {
-    int32_t position = label->Position();
-    int32_t next = buffer_.Load<int32_t>(position);
-    int32_t offset = is_jump ? bound_pc - position : bound_pc - position - 4;
-    int32_t encoded = MipsAssembler::EncodeBranchOffset(offset, next, is_jump);
-    buffer_.Store<int32_t>(position, encoded);
-    label->position_ = MipsAssembler::DecodeBranchOffset(next, is_jump);
-  }
-  label->BindTo(bound_pc);
-}
-
-void MipsAssembler::Add(Register rd, Register rs, Register rt) {
-  EmitR(0, rs, rt, rd, 0, 0x20);
-}
-
 void MipsAssembler::Addu(Register rd, Register rs, Register rt) {
   EmitR(0, rs, rt, rd, 0, 0x21);
 }
 
-void MipsAssembler::Addi(Register rt, Register rs, uint16_t imm16) {
-  EmitI(0x8, rs, rt, imm16);
-}
-
 void MipsAssembler::Addiu(Register rt, Register rs, uint16_t imm16) {
   EmitI(0x9, rs, rt, imm16);
 }
 
-void MipsAssembler::Sub(Register rd, Register rs, Register rt) {
-  EmitR(0, rs, rt, rd, 0, 0x22);
-}
-
 void MipsAssembler::Subu(Register rd, Register rs, Register rt) {
   EmitR(0, rs, rt, rd, 0, 0x23);
 }
 
-void MipsAssembler::Mult(Register rs, Register rt) {
+void MipsAssembler::MultR2(Register rs, Register rt) {
+  CHECK(!IsR6());
   EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x18);
 }
 
-void MipsAssembler::Multu(Register rs, Register rt) {
+void MipsAssembler::MultuR2(Register rs, Register rt) {
+  CHECK(!IsR6());
   EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x19);
 }
 
-void MipsAssembler::Div(Register rs, Register rt) {
+void MipsAssembler::DivR2(Register rs, Register rt) {
+  CHECK(!IsR6());
   EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x1a);
 }
 
-void MipsAssembler::Divu(Register rs, Register rt) {
+void MipsAssembler::DivuR2(Register rs, Register rt) {
+  CHECK(!IsR6());
   EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x1b);
 }
 
+void MipsAssembler::MulR2(Register rd, Register rs, Register rt) {
+  CHECK(!IsR6());
+  EmitR(0x1c, rs, rt, rd, 0, 2);
+}
+
+void MipsAssembler::DivR2(Register rd, Register rs, Register rt) {
+  CHECK(!IsR6());
+  DivR2(rs, rt);
+  Mflo(rd);
+}
+
+void MipsAssembler::ModR2(Register rd, Register rs, Register rt) {
+  CHECK(!IsR6());
+  DivR2(rs, rt);
+  Mfhi(rd);
+}
+
+void MipsAssembler::DivuR2(Register rd, Register rs, Register rt) {
+  CHECK(!IsR6());
+  DivuR2(rs, rt);
+  Mflo(rd);
+}
+
+void MipsAssembler::ModuR2(Register rd, Register rs, Register rt) {
+  CHECK(!IsR6());
+  DivuR2(rs, rt);
+  Mfhi(rd);
+}
+
+void MipsAssembler::MulR6(Register rd, Register rs, Register rt) {
+  CHECK(IsR6());
+  EmitR(0, rs, rt, rd, 2, 0x18);
+}
+
+void MipsAssembler::MuhuR6(Register rd, Register rs, Register rt) {
+  CHECK(IsR6());
+  EmitR(0, rs, rt, rd, 3, 0x19);
+}
+
+void MipsAssembler::DivR6(Register rd, Register rs, Register rt) {
+  CHECK(IsR6());
+  EmitR(0, rs, rt, rd, 2, 0x1a);
+}
+
+void MipsAssembler::ModR6(Register rd, Register rs, Register rt) {
+  CHECK(IsR6());
+  EmitR(0, rs, rt, rd, 3, 0x1a);
+}
+
+void MipsAssembler::DivuR6(Register rd, Register rs, Register rt) {
+  CHECK(IsR6());
+  EmitR(0, rs, rt, rd, 2, 0x1b);
+}
+
+void MipsAssembler::ModuR6(Register rd, Register rs, Register rt) {
+  CHECK(IsR6());
+  EmitR(0, rs, rt, rd, 3, 0x1b);
+}
+
 void MipsAssembler::And(Register rd, Register rs, Register rt) {
   EmitR(0, rs, rt, rd, 0, 0x24);
 }
@@ -228,27 +250,35 @@
   EmitR(0, rs, rt, rd, 0, 0x27);
 }
 
-void MipsAssembler::Sll(Register rd, Register rs, int shamt) {
-  EmitR(0, rs, static_cast<Register>(0), rd, shamt, 0x00);
+void MipsAssembler::Seb(Register rd, Register rt) {
+  EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x10, 0x20);
 }
 
-void MipsAssembler::Srl(Register rd, Register rs, int shamt) {
-  EmitR(0, rs, static_cast<Register>(0), rd, shamt, 0x02);
+void MipsAssembler::Seh(Register rd, Register rt) {
+  EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x18, 0x20);
 }
 
-void MipsAssembler::Sra(Register rd, Register rs, int shamt) {
-  EmitR(0, rs, static_cast<Register>(0), rd, shamt, 0x03);
+void MipsAssembler::Sll(Register rd, Register rt, int shamt) {
+  EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x00);
 }
 
-void MipsAssembler::Sllv(Register rd, Register rs, Register rt) {
+void MipsAssembler::Srl(Register rd, Register rt, int shamt) {
+  EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x02);
+}
+
+void MipsAssembler::Sra(Register rd, Register rt, int shamt) {
+  EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x03);
+}
+
+void MipsAssembler::Sllv(Register rd, Register rt, Register rs) {
   EmitR(0, rs, rt, rd, 0, 0x04);
 }
 
-void MipsAssembler::Srlv(Register rd, Register rs, Register rt) {
+void MipsAssembler::Srlv(Register rd, Register rt, Register rs) {
   EmitR(0, rs, rt, rd, 0, 0x06);
 }
 
-void MipsAssembler::Srav(Register rd, Register rs, Register rt) {
+void MipsAssembler::Srav(Register rd, Register rt, Register rs) {
   EmitR(0, rs, rt, rd, 0, 0x07);
 }
 
@@ -276,11 +306,18 @@
   EmitI(0xf, static_cast<Register>(0), rt, imm16);
 }
 
+void MipsAssembler::Sync(uint32_t stype) {
+  EmitR(0, static_cast<Register>(0), static_cast<Register>(0), static_cast<Register>(0),
+        stype & 0x1f, 0xf);
+}
+
 void MipsAssembler::Mfhi(Register rd) {
+  CHECK(!IsR6());
   EmitR(0, static_cast<Register>(0), static_cast<Register>(0), rd, 0, 0x10);
 }
 
 void MipsAssembler::Mflo(Register rd) {
+  CHECK(!IsR6());
   EmitR(0, static_cast<Register>(0), static_cast<Register>(0), rd, 0, 0x12);
 }
 
@@ -312,34 +349,276 @@
   EmitI(0xb, rs, rt, imm16);
 }
 
-void MipsAssembler::Beq(Register rt, Register rs, uint16_t imm16) {
+void MipsAssembler::B(uint16_t imm16) {
+  EmitI(0x4, static_cast<Register>(0), static_cast<Register>(0), imm16);
+}
+
+void MipsAssembler::Beq(Register rs, Register rt, uint16_t imm16) {
   EmitI(0x4, rs, rt, imm16);
-  Nop();
 }
 
-void MipsAssembler::Bne(Register rt, Register rs, uint16_t imm16) {
+void MipsAssembler::Bne(Register rs, Register rt, uint16_t imm16) {
   EmitI(0x5, rs, rt, imm16);
-  Nop();
 }
 
-void MipsAssembler::J(uint32_t address) {
-  EmitJ(0x2, address);
-  Nop();
+void MipsAssembler::Beqz(Register rt, uint16_t imm16) {
+  Beq(ZERO, rt, imm16);
 }
 
-void MipsAssembler::Jal(uint32_t address) {
-  EmitJ(0x2, address);
-  Nop();
+void MipsAssembler::Bnez(Register rt, uint16_t imm16) {
+  Bne(ZERO, rt, imm16);
 }
 
-void MipsAssembler::Jr(Register rs) {
-  EmitR(0, rs, static_cast<Register>(0), static_cast<Register>(0), 0, 0x09);  // Jalr zero, rs
-  Nop();
+void MipsAssembler::Bltz(Register rt, uint16_t imm16) {
+  EmitI(0x1, rt, static_cast<Register>(0), imm16);
+}
+
+void MipsAssembler::Bgez(Register rt, uint16_t imm16) {
+  EmitI(0x1, rt, static_cast<Register>(0x1), imm16);
+}
+
+void MipsAssembler::Blez(Register rt, uint16_t imm16) {
+  EmitI(0x6, rt, static_cast<Register>(0), imm16);
+}
+
+void MipsAssembler::Bgtz(Register rt, uint16_t imm16) {
+  EmitI(0x7, rt, static_cast<Register>(0), imm16);
+}
+
+void MipsAssembler::J(uint32_t addr26) {
+  EmitI26(0x2, addr26);
+}
+
+void MipsAssembler::Jal(uint32_t addr26) {
+  EmitI26(0x3, addr26);
+}
+
+void MipsAssembler::Jalr(Register rd, Register rs) {
+  EmitR(0, rs, static_cast<Register>(0), rd, 0, 0x09);
 }
 
 void MipsAssembler::Jalr(Register rs) {
-  EmitR(0, rs, static_cast<Register>(0), RA, 0, 0x09);
-  Nop();
+  Jalr(RA, rs);
+}
+
+void MipsAssembler::Jr(Register rs) {
+  Jalr(ZERO, rs);
+}
+
+void MipsAssembler::Nal() {
+  EmitI(0x1, static_cast<Register>(0), static_cast<Register>(0x10), 0);
+}
+
+void MipsAssembler::Auipc(Register rs, uint16_t imm16) {
+  CHECK(IsR6());
+  EmitI(0x3B, rs, static_cast<Register>(0x1E), imm16);
+}
+
+void MipsAssembler::Addiupc(Register rs, uint32_t imm19) {
+  CHECK(IsR6());
+  CHECK(IsUint<19>(imm19)) << imm19;
+  EmitI21(0x3B, rs, imm19);
+}
+
+void MipsAssembler::Bc(uint32_t imm26) {
+  CHECK(IsR6());
+  EmitI26(0x32, imm26);
+}
+
+void MipsAssembler::Jic(Register rt, uint16_t imm16) {
+  CHECK(IsR6());
+  EmitI(0x36, static_cast<Register>(0), rt, imm16);
+}
+
+void MipsAssembler::Jialc(Register rt, uint16_t imm16) {
+  CHECK(IsR6());
+  EmitI(0x3E, static_cast<Register>(0), rt, imm16);
+}
+
+void MipsAssembler::Bltc(Register rs, Register rt, uint16_t imm16) {
+  CHECK(IsR6());
+  CHECK_NE(rs, ZERO);
+  CHECK_NE(rt, ZERO);
+  CHECK_NE(rs, rt);
+  EmitI(0x17, rs, rt, imm16);
+}
+
+void MipsAssembler::Bltzc(Register rt, uint16_t imm16) {
+  CHECK(IsR6());
+  CHECK_NE(rt, ZERO);
+  EmitI(0x17, rt, rt, imm16);
+}
+
+void MipsAssembler::Bgtzc(Register rt, uint16_t imm16) {
+  CHECK(IsR6());
+  CHECK_NE(rt, ZERO);
+  EmitI(0x17, static_cast<Register>(0), rt, imm16);
+}
+
+void MipsAssembler::Bgec(Register rs, Register rt, uint16_t imm16) {
+  CHECK(IsR6());
+  CHECK_NE(rs, ZERO);
+  CHECK_NE(rt, ZERO);
+  CHECK_NE(rs, rt);
+  EmitI(0x16, rs, rt, imm16);
+}
+
+void MipsAssembler::Bgezc(Register rt, uint16_t imm16) {
+  CHECK(IsR6());
+  CHECK_NE(rt, ZERO);
+  EmitI(0x16, rt, rt, imm16);
+}
+
+void MipsAssembler::Blezc(Register rt, uint16_t imm16) {
+  CHECK(IsR6());
+  CHECK_NE(rt, ZERO);
+  EmitI(0x16, static_cast<Register>(0), rt, imm16);
+}
+
+void MipsAssembler::Bltuc(Register rs, Register rt, uint16_t imm16) {
+  CHECK(IsR6());
+  CHECK_NE(rs, ZERO);
+  CHECK_NE(rt, ZERO);
+  CHECK_NE(rs, rt);
+  EmitI(0x7, rs, rt, imm16);
+}
+
+void MipsAssembler::Bgeuc(Register rs, Register rt, uint16_t imm16) {
+  CHECK(IsR6());
+  CHECK_NE(rs, ZERO);
+  CHECK_NE(rt, ZERO);
+  CHECK_NE(rs, rt);
+  EmitI(0x6, rs, rt, imm16);
+}
+
+void MipsAssembler::Beqc(Register rs, Register rt, uint16_t imm16) {
+  CHECK(IsR6());
+  CHECK_NE(rs, ZERO);
+  CHECK_NE(rt, ZERO);
+  CHECK_NE(rs, rt);
+  EmitI(0x8, std::min(rs, rt), std::max(rs, rt), imm16);
+}
+
+void MipsAssembler::Bnec(Register rs, Register rt, uint16_t imm16) {
+  CHECK(IsR6());
+  CHECK_NE(rs, ZERO);
+  CHECK_NE(rt, ZERO);
+  CHECK_NE(rs, rt);
+  EmitI(0x18, std::min(rs, rt), std::max(rs, rt), imm16);
+}
+
+void MipsAssembler::Beqzc(Register rs, uint32_t imm21) {
+  CHECK(IsR6());
+  CHECK_NE(rs, ZERO);
+  EmitI21(0x36, rs, imm21);
+}
+
+void MipsAssembler::Bnezc(Register rs, uint32_t imm21) {
+  CHECK(IsR6());
+  CHECK_NE(rs, ZERO);
+  EmitI21(0x3E, rs, imm21);
+}
+
+void MipsAssembler::EmitBcond(BranchCondition cond, Register rs, Register rt, uint16_t imm16) {
+  switch (cond) {
+    case kCondLTZ:
+      CHECK_EQ(rt, ZERO);
+      Bltz(rs, imm16);
+      break;
+    case kCondGEZ:
+      CHECK_EQ(rt, ZERO);
+      Bgez(rs, imm16);
+      break;
+    case kCondLEZ:
+      CHECK_EQ(rt, ZERO);
+      Blez(rs, imm16);
+      break;
+    case kCondGTZ:
+      CHECK_EQ(rt, ZERO);
+      Bgtz(rs, imm16);
+      break;
+    case kCondEQ:
+      Beq(rs, rt, imm16);
+      break;
+    case kCondNE:
+      Bne(rs, rt, imm16);
+      break;
+    case kCondEQZ:
+      CHECK_EQ(rt, ZERO);
+      Beqz(rs, imm16);
+      break;
+    case kCondNEZ:
+      CHECK_EQ(rt, ZERO);
+      Bnez(rs, imm16);
+      break;
+    case kCondLT:
+    case kCondGE:
+    case kCondLE:
+    case kCondGT:
+    case kCondLTU:
+    case kCondGEU:
+    case kUncond:
+      // We don't support synthetic R2 branches (preceded with slt[u]) at this level
+      // (R2 doesn't have branches to compare 2 registers using <, <=, >=, >).
+      LOG(FATAL) << "Unexpected branch condition " << cond;
+      UNREACHABLE();
+  }
+}
+
+void MipsAssembler::EmitBcondc(BranchCondition cond, Register rs, Register rt, uint32_t imm16_21) {
+  switch (cond) {
+    case kCondLT:
+      Bltc(rs, rt, imm16_21);
+      break;
+    case kCondGE:
+      Bgec(rs, rt, imm16_21);
+      break;
+    case kCondLE:
+      Bgec(rt, rs, imm16_21);
+      break;
+    case kCondGT:
+      Bltc(rt, rs, imm16_21);
+      break;
+    case kCondLTZ:
+      CHECK_EQ(rt, ZERO);
+      Bltzc(rs, imm16_21);
+      break;
+    case kCondGEZ:
+      CHECK_EQ(rt, ZERO);
+      Bgezc(rs, imm16_21);
+      break;
+    case kCondLEZ:
+      CHECK_EQ(rt, ZERO);
+      Blezc(rs, imm16_21);
+      break;
+    case kCondGTZ:
+      CHECK_EQ(rt, ZERO);
+      Bgtzc(rs, imm16_21);
+      break;
+    case kCondEQ:
+      Beqc(rs, rt, imm16_21);
+      break;
+    case kCondNE:
+      Bnec(rs, rt, imm16_21);
+      break;
+    case kCondEQZ:
+      CHECK_EQ(rt, ZERO);
+      Beqzc(rs, imm16_21);
+      break;
+    case kCondNEZ:
+      CHECK_EQ(rt, ZERO);
+      Bnezc(rs, imm16_21);
+      break;
+    case kCondLTU:
+      Bltuc(rs, rt, imm16_21);
+      break;
+    case kCondGEU:
+      Bgeuc(rs, rt, imm16_21);
+      break;
+    case kUncond:
+      LOG(FATAL) << "Unexpected branch condition " << cond;
+      UNREACHABLE();
+  }
 }
 
 void MipsAssembler::AddS(FRegister fd, FRegister fs, FRegister ft) {
@@ -358,52 +637,84 @@
   EmitFR(0x11, 0x10, ft, fs, fd, 0x3);
 }
 
-void MipsAssembler::AddD(DRegister fd, DRegister fs, DRegister ft) {
-  EmitFR(0x11, 0x11, ConvertDRegToFReg(ft), ConvertDRegToFReg(fs), ConvertDRegToFReg(fd), 0x0);
+void MipsAssembler::AddD(FRegister fd, FRegister fs, FRegister ft) {
+  EmitFR(0x11, 0x11, ft, fs, fd, 0x0);
 }
 
-void MipsAssembler::SubD(DRegister fd, DRegister fs, DRegister ft) {
-  EmitFR(0x11, 0x11, ConvertDRegToFReg(ft), ConvertDRegToFReg(fs), ConvertDRegToFReg(fd), 0x1);
+void MipsAssembler::SubD(FRegister fd, FRegister fs, FRegister ft) {
+  EmitFR(0x11, 0x11, ft, fs, fd, 0x1);
 }
 
-void MipsAssembler::MulD(DRegister fd, DRegister fs, DRegister ft) {
-  EmitFR(0x11, 0x11, ConvertDRegToFReg(ft), ConvertDRegToFReg(fs), ConvertDRegToFReg(fd), 0x2);
+void MipsAssembler::MulD(FRegister fd, FRegister fs, FRegister ft) {
+  EmitFR(0x11, 0x11, ft, fs, fd, 0x2);
 }
 
-void MipsAssembler::DivD(DRegister fd, DRegister fs, DRegister ft) {
-  EmitFR(0x11, 0x11, ConvertDRegToFReg(ft), ConvertDRegToFReg(fs), ConvertDRegToFReg(fd), 0x3);
+void MipsAssembler::DivD(FRegister fd, FRegister fs, FRegister ft) {
+  EmitFR(0x11, 0x11, ft, fs, fd, 0x3);
 }
 
 void MipsAssembler::MovS(FRegister fd, FRegister fs) {
   EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x6);
 }
 
-void MipsAssembler::MovD(DRegister fd, DRegister fs) {
-  EmitFR(0x11, 0x11, static_cast<FRegister>(0), ConvertDRegToFReg(fs), ConvertDRegToFReg(fd), 0x6);
+void MipsAssembler::MovD(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x6);
+}
+
+void MipsAssembler::NegS(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x7);
+}
+
+void MipsAssembler::NegD(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x7);
+}
+
+void MipsAssembler::Cvtsw(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x14, static_cast<FRegister>(0), fs, fd, 0x20);
+}
+
+void MipsAssembler::Cvtdw(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x14, static_cast<FRegister>(0), fs, fd, 0x21);
+}
+
+void MipsAssembler::Cvtsd(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x20);
+}
+
+void MipsAssembler::Cvtds(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x21);
 }
 
 void MipsAssembler::Mfc1(Register rt, FRegister fs) {
-  EmitFR(0x11, 0x00, ConvertRegToFReg(rt), fs, static_cast<FRegister>(0), 0x0);
+  EmitFR(0x11, 0x00, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0);
 }
 
-void MipsAssembler::Mtc1(FRegister ft, Register rs) {
-  EmitFR(0x11, 0x04, ft, ConvertRegToFReg(rs), static_cast<FRegister>(0), 0x0);
+void MipsAssembler::Mtc1(Register rt, FRegister fs) {
+  EmitFR(0x11, 0x04, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0);
+}
+
+void MipsAssembler::Mfhc1(Register rt, FRegister fs) {
+  EmitFR(0x11, 0x03, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0);
+}
+
+void MipsAssembler::Mthc1(Register rt, FRegister fs) {
+  EmitFR(0x11, 0x07, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0);
 }
 
 void MipsAssembler::Lwc1(FRegister ft, Register rs, uint16_t imm16) {
-  EmitI(0x31, rs, ConvertFRegToReg(ft), imm16);
+  EmitI(0x31, rs, static_cast<Register>(ft), imm16);
 }
 
-void MipsAssembler::Ldc1(DRegister ft, Register rs, uint16_t imm16) {
-  EmitI(0x35, rs, ConvertDRegToReg(ft), imm16);
+void MipsAssembler::Ldc1(FRegister ft, Register rs, uint16_t imm16) {
+  EmitI(0x35, rs, static_cast<Register>(ft), imm16);
 }
 
 void MipsAssembler::Swc1(FRegister ft, Register rs, uint16_t imm16) {
-  EmitI(0x39, rs, ConvertFRegToReg(ft), imm16);
+  EmitI(0x39, rs, static_cast<Register>(ft), imm16);
 }
 
-void MipsAssembler::Sdc1(DRegister ft, Register rs, uint16_t imm16) {
-  EmitI(0x3d, rs, ConvertDRegToReg(ft), imm16);
+void MipsAssembler::Sdc1(FRegister ft, Register rs, uint16_t imm16) {
+  EmitI(0x3d, rs, static_cast<Register>(ft), imm16);
 }
 
 void MipsAssembler::Break() {
@@ -415,63 +726,881 @@
   EmitR(0x0, static_cast<Register>(0), static_cast<Register>(0), static_cast<Register>(0), 0, 0x0);
 }
 
-void MipsAssembler::Move(Register rt, Register rs) {
-  EmitI(0x9, rs, rt, 0);    // Addiu
+void MipsAssembler::Move(Register rd, Register rs) {
+  Or(rd, rs, ZERO);
 }
 
-void MipsAssembler::Clear(Register rt) {
-  EmitR(0, static_cast<Register>(0), static_cast<Register>(0), rt, 0, 0x20);
+void MipsAssembler::Clear(Register rd) {
+  Move(rd, ZERO);
 }
 
-void MipsAssembler::Not(Register rt, Register rs) {
-  EmitR(0, static_cast<Register>(0), rs, rt, 0, 0x27);
+void MipsAssembler::Not(Register rd, Register rs) {
+  Nor(rd, rs, ZERO);
 }
 
-void MipsAssembler::Mul(Register rd, Register rs, Register rt) {
-  Mult(rs, rt);
-  Mflo(rd);
+void MipsAssembler::Push(Register rs) {
+  IncreaseFrameSize(kMipsWordSize);
+  Sw(rs, SP, 0);
 }
 
-void MipsAssembler::Div(Register rd, Register rs, Register rt) {
-  Div(rs, rt);
-  Mflo(rd);
+void MipsAssembler::Pop(Register rd) {
+  Lw(rd, SP, 0);
+  DecreaseFrameSize(kMipsWordSize);
 }
 
-void MipsAssembler::Rem(Register rd, Register rs, Register rt) {
-  Div(rs, rt);
-  Mfhi(rd);
+void MipsAssembler::PopAndReturn(Register rd, Register rt) {
+  Lw(rd, SP, 0);
+  Jr(rt);
+  DecreaseFrameSize(kMipsWordSize);
 }
 
-void MipsAssembler::AddConstant(Register rt, Register rs, int32_t value) {
-  Addiu(rt, rs, value);
-}
-
-void MipsAssembler::LoadImmediate(Register rt, int32_t value) {
-  Addiu(rt, ZERO, value);
-}
-
-void MipsAssembler::EmitLoad(ManagedRegister m_dst, Register src_register, int32_t src_offset,
-                             size_t size) {
-  MipsManagedRegister dst = m_dst.AsMips();
-  if (dst.IsNoRegister()) {
-    CHECK_EQ(0u, size) << dst;
-  } else if (dst.IsCoreRegister()) {
-    CHECK_EQ(4u, size) << dst;
-    LoadFromOffset(kLoadWord, dst.AsCoreRegister(), src_register, src_offset);
-  } else if (dst.IsRegisterPair()) {
-    CHECK_EQ(8u, size) << dst;
-    LoadFromOffset(kLoadWord, dst.AsRegisterPairLow(), src_register, src_offset);
-    LoadFromOffset(kLoadWord, dst.AsRegisterPairHigh(), src_register, src_offset + 4);
-  } else if (dst.IsFRegister()) {
-    LoadSFromOffset(dst.AsFRegister(), src_register, src_offset);
+void MipsAssembler::LoadConst32(Register rd, int32_t value) {
+  if (IsUint<16>(value)) {
+    // Use OR with (unsigned) immediate to encode 16b unsigned int.
+    Ori(rd, ZERO, value);
+  } else if (IsInt<16>(value)) {
+    // Use ADD with (signed) immediate to encode 16b signed int.
+    Addiu(rd, ZERO, value);
   } else {
-    CHECK(dst.IsDRegister()) << dst;
-    LoadDFromOffset(dst.AsDRegister(), src_register, src_offset);
+    Lui(rd, High16Bits(value));
+    if (value & 0xFFFF)
+      Ori(rd, rd, Low16Bits(value));
+  }
+}
+
+void MipsAssembler::LoadConst64(Register reg_hi, Register reg_lo, int64_t value) {
+  LoadConst32(reg_lo, Low32Bits(value));
+  LoadConst32(reg_hi, High32Bits(value));
+}
+
+void MipsAssembler::StoreConst32ToOffset(int32_t value,
+                                         Register base,
+                                         int32_t offset,
+                                         Register temp) {
+  if (!IsInt<16>(offset)) {
+    CHECK_NE(temp, AT);  //  Must not use AT as temp, as not to overwrite the loaded value.
+    LoadConst32(AT, offset);
+    Addu(AT, AT, base);
+    base = AT;
+    offset = 0;
+  }
+  LoadConst32(temp, value);
+  Sw(temp, base, offset);
+}
+
+void MipsAssembler::StoreConst64ToOffset(int64_t value,
+                                         Register base,
+                                         int32_t offset,
+                                         Register temp) {
+  // IsInt<16> must be passed a signed value.
+  if (!IsInt<16>(offset) || !IsInt<16>(static_cast<int32_t>(offset + kMipsWordSize))) {
+    CHECK_NE(temp, AT);  //  Must not use AT as temp, as not to overwrite the loaded value.
+    LoadConst32(AT, offset);
+    Addu(AT, AT, base);
+    base = AT;
+    offset = 0;
+  }
+  LoadConst32(temp, Low32Bits(value));
+  Sw(temp, base, offset);
+  LoadConst32(temp, High32Bits(value));
+  Sw(temp, base, offset + kMipsWordSize);
+}
+
+void MipsAssembler::LoadSConst32(FRegister r, int32_t value, Register temp) {
+  LoadConst32(temp, value);
+  Mtc1(temp, r);
+}
+
+void MipsAssembler::LoadDConst64(FRegister rd, int64_t value, Register temp) {
+  LoadConst32(temp, Low32Bits(value));
+  Mtc1(temp, rd);
+  LoadConst32(temp, High32Bits(value));
+  Mthc1(temp, rd);
+}
+
+void MipsAssembler::Addiu32(Register rt, Register rs, int32_t value, Register temp) {
+  if (IsInt<16>(value)) {
+    Addiu(rt, rs, value);
+  } else {
+    LoadConst32(temp, value);
+    Addu(rt, rs, temp);
+  }
+}
+
+void MipsAssembler::Branch::InitShortOrLong(MipsAssembler::Branch::OffsetBits offset_size,
+                                            MipsAssembler::Branch::Type short_type,
+                                            MipsAssembler::Branch::Type long_type) {
+  type_ = (offset_size <= branch_info_[short_type].offset_size) ? short_type : long_type;
+}
+
+void MipsAssembler::Branch::InitializeType(bool is_call, bool is_r6) {
+  OffsetBits offset_size = GetOffsetSizeNeeded(location_, target_);
+  if (is_r6) {
+    // R6
+    if (is_call) {
+      InitShortOrLong(offset_size, kR6Call, kR6LongCall);
+    } else if (condition_ == kUncond) {
+      InitShortOrLong(offset_size, kR6UncondBranch, kR6LongUncondBranch);
+    } else {
+      if (condition_ == kCondEQZ || condition_ == kCondNEZ) {
+        // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions.
+        type_ = (offset_size <= kOffset23) ? kR6CondBranch : kR6LongCondBranch;
+      } else {
+        InitShortOrLong(offset_size, kR6CondBranch, kR6LongCondBranch);
+      }
+    }
+  } else {
+    // R2
+    if (is_call) {
+      InitShortOrLong(offset_size, kCall, kLongCall);
+    } else if (condition_ == kUncond) {
+      InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch);
+    } else {
+      InitShortOrLong(offset_size, kCondBranch, kLongCondBranch);
+    }
+  }
+  old_type_ = type_;
+}
+
+bool MipsAssembler::Branch::IsNop(BranchCondition condition, Register lhs, Register rhs) {
+  switch (condition) {
+    case kCondLT:
+    case kCondGT:
+    case kCondNE:
+    case kCondLTU:
+      return lhs == rhs;
+    default:
+      return false;
+  }
+}
+
+bool MipsAssembler::Branch::IsUncond(BranchCondition condition, Register lhs, Register rhs) {
+  switch (condition) {
+    case kUncond:
+      return true;
+    case kCondGE:
+    case kCondLE:
+    case kCondEQ:
+    case kCondGEU:
+      return lhs == rhs;
+    default:
+      return false;
+  }
+}
+
+MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target)
+    : old_location_(location),
+      location_(location),
+      target_(target),
+      lhs_reg_(0),
+      rhs_reg_(0),
+      condition_(kUncond) {
+  InitializeType(false, is_r6);
+}
+
+MipsAssembler::Branch::Branch(bool is_r6,
+                              uint32_t location,
+                              uint32_t target,
+                              MipsAssembler::BranchCondition condition,
+                              Register lhs_reg,
+                              Register rhs_reg)
+    : old_location_(location),
+      location_(location),
+      target_(target),
+      lhs_reg_(lhs_reg),
+      rhs_reg_(rhs_reg),
+      condition_(condition) {
+  CHECK_NE(condition, kUncond);
+  switch (condition) {
+    case kCondLT:
+    case kCondGE:
+    case kCondLE:
+    case kCondGT:
+    case kCondLTU:
+    case kCondGEU:
+      // We don't support synthetic R2 branches (preceded with slt[u]) at this level
+      // (R2 doesn't have branches to compare 2 registers using <, <=, >=, >).
+      // We leave this up to the caller.
+      CHECK(is_r6);
+      FALLTHROUGH_INTENDED;
+    case kCondEQ:
+    case kCondNE:
+      // Require registers other than 0 not only for R6, but also for R2 to catch errors.
+      // To compare with 0, use dedicated kCond*Z conditions.
+      CHECK_NE(lhs_reg, ZERO);
+      CHECK_NE(rhs_reg, ZERO);
+      break;
+    case kCondLTZ:
+    case kCondGEZ:
+    case kCondLEZ:
+    case kCondGTZ:
+    case kCondEQZ:
+    case kCondNEZ:
+      // Require registers other than 0 not only for R6, but also for R2 to catch errors.
+      CHECK_NE(lhs_reg, ZERO);
+      CHECK_EQ(rhs_reg, ZERO);
+      break;
+    case kUncond:
+      UNREACHABLE();
+  }
+  CHECK(!IsNop(condition, lhs_reg, rhs_reg));
+  if (IsUncond(condition, lhs_reg, rhs_reg)) {
+    // Branch condition is always true, make the branch unconditional.
+    condition_ = kUncond;
+  }
+  InitializeType(false, is_r6);
+}
+
+MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target, Register indirect_reg)
+    : old_location_(location),
+      location_(location),
+      target_(target),
+      lhs_reg_(indirect_reg),
+      rhs_reg_(0),
+      condition_(kUncond) {
+  CHECK_NE(indirect_reg, ZERO);
+  CHECK_NE(indirect_reg, AT);
+  InitializeType(true, is_r6);
+}
+
+MipsAssembler::BranchCondition MipsAssembler::Branch::OppositeCondition(
+    MipsAssembler::BranchCondition cond) {
+  switch (cond) {
+    case kCondLT:
+      return kCondGE;
+    case kCondGE:
+      return kCondLT;
+    case kCondLE:
+      return kCondGT;
+    case kCondGT:
+      return kCondLE;
+    case kCondLTZ:
+      return kCondGEZ;
+    case kCondGEZ:
+      return kCondLTZ;
+    case kCondLEZ:
+      return kCondGTZ;
+    case kCondGTZ:
+      return kCondLEZ;
+    case kCondEQ:
+      return kCondNE;
+    case kCondNE:
+      return kCondEQ;
+    case kCondEQZ:
+      return kCondNEZ;
+    case kCondNEZ:
+      return kCondEQZ;
+    case kCondLTU:
+      return kCondGEU;
+    case kCondGEU:
+      return kCondLTU;
+    case kUncond:
+      LOG(FATAL) << "Unexpected branch condition " << cond;
+  }
+  UNREACHABLE();
+}
+
+MipsAssembler::Branch::Type MipsAssembler::Branch::GetType() const {
+  return type_;
+}
+
+MipsAssembler::BranchCondition MipsAssembler::Branch::GetCondition() const {
+  return condition_;
+}
+
+Register MipsAssembler::Branch::GetLeftRegister() const {
+  return static_cast<Register>(lhs_reg_);
+}
+
+Register MipsAssembler::Branch::GetRightRegister() const {
+  return static_cast<Register>(rhs_reg_);
+}
+
+uint32_t MipsAssembler::Branch::GetTarget() const {
+  return target_;
+}
+
+uint32_t MipsAssembler::Branch::GetLocation() const {
+  return location_;
+}
+
+uint32_t MipsAssembler::Branch::GetOldLocation() const {
+  return old_location_;
+}
+
+uint32_t MipsAssembler::Branch::GetLength() const {
+  return branch_info_[type_].length;
+}
+
+uint32_t MipsAssembler::Branch::GetOldLength() const {
+  return branch_info_[old_type_].length;
+}
+
+uint32_t MipsAssembler::Branch::GetSize() const {
+  return GetLength() * sizeof(uint32_t);
+}
+
+uint32_t MipsAssembler::Branch::GetOldSize() const {
+  return GetOldLength() * sizeof(uint32_t);
+}
+
+uint32_t MipsAssembler::Branch::GetEndLocation() const {
+  return GetLocation() + GetSize();
+}
+
+uint32_t MipsAssembler::Branch::GetOldEndLocation() const {
+  return GetOldLocation() + GetOldSize();
+}
+
+bool MipsAssembler::Branch::IsLong() const {
+  switch (type_) {
+    // R2 short branches.
+    case kUncondBranch:
+    case kCondBranch:
+    case kCall:
+    // R6 short branches.
+    case kR6UncondBranch:
+    case kR6CondBranch:
+    case kR6Call:
+      return false;
+    // R2 long branches.
+    case kLongUncondBranch:
+    case kLongCondBranch:
+    case kLongCall:
+    // R6 long branches.
+    case kR6LongUncondBranch:
+    case kR6LongCondBranch:
+    case kR6LongCall:
+      return true;
+  }
+  UNREACHABLE();
+}
+
+bool MipsAssembler::Branch::IsResolved() const {
+  return target_ != kUnresolved;
+}
+
+MipsAssembler::Branch::OffsetBits MipsAssembler::Branch::GetOffsetSize() const {
+  OffsetBits offset_size =
+      (type_ == kR6CondBranch && (condition_ == kCondEQZ || condition_ == kCondNEZ))
+          ? kOffset23
+          : branch_info_[type_].offset_size;
+  return offset_size;
+}
+
+MipsAssembler::Branch::OffsetBits MipsAssembler::Branch::GetOffsetSizeNeeded(uint32_t location,
+                                                                             uint32_t target) {
+  // For unresolved targets assume the shortest encoding
+  // (later it will be made longer if needed).
+  if (target == kUnresolved)
+    return kOffset16;
+  int64_t distance = static_cast<int64_t>(target) - location;
+  // To simplify calculations in composite branches consisting of multiple instructions
+  // bump up the distance by a value larger than the max byte size of a composite branch.
+  distance += (distance >= 0) ? kMaxBranchSize : -kMaxBranchSize;
+  if (IsInt<kOffset16>(distance))
+    return kOffset16;
+  else if (IsInt<kOffset18>(distance))
+    return kOffset18;
+  else if (IsInt<kOffset21>(distance))
+    return kOffset21;
+  else if (IsInt<kOffset23>(distance))
+    return kOffset23;
+  else if (IsInt<kOffset28>(distance))
+    return kOffset28;
+  return kOffset32;
+}
+
+void MipsAssembler::Branch::Resolve(uint32_t target) {
+  target_ = target;
+}
+
+void MipsAssembler::Branch::Relocate(uint32_t expand_location, uint32_t delta) {
+  if (location_ > expand_location) {
+    location_ += delta;
+  }
+  if (!IsResolved()) {
+    return;  // Don't know the target yet.
+  }
+  if (target_ > expand_location) {
+    target_ += delta;
+  }
+}
+
+void MipsAssembler::Branch::PromoteToLong() {
+  switch (type_) {
+    // R2 short branches.
+    case kUncondBranch:
+      type_ = kLongUncondBranch;
+      break;
+    case kCondBranch:
+      type_ = kLongCondBranch;
+      break;
+    case kCall:
+      type_ = kLongCall;
+      break;
+    // R6 short branches.
+    case kR6UncondBranch:
+      type_ = kR6LongUncondBranch;
+      break;
+    case kR6CondBranch:
+      type_ = kR6LongCondBranch;
+      break;
+    case kR6Call:
+      type_ = kR6LongCall;
+      break;
+    default:
+      // Note: 'type_' is already long.
+      break;
+  }
+  CHECK(IsLong());
+}
+
+uint32_t MipsAssembler::Branch::PromoteIfNeeded(uint32_t max_short_distance) {
+  // If the branch is still unresolved or already long, nothing to do.
+  if (IsLong() || !IsResolved()) {
+    return 0;
+  }
+  // Promote the short branch to long if the offset size is too small
+  // to hold the distance between location_ and target_.
+  if (GetOffsetSizeNeeded(location_, target_) > GetOffsetSize()) {
+    PromoteToLong();
+    uint32_t old_size = GetOldSize();
+    uint32_t new_size = GetSize();
+    CHECK_GT(new_size, old_size);
+    return new_size - old_size;
+  }
+  // The following logic is for debugging/testing purposes.
+  // Promote some short branches to long when it's not really required.
+  if (UNLIKELY(max_short_distance != std::numeric_limits<uint32_t>::max())) {
+    int64_t distance = static_cast<int64_t>(target_) - location_;
+    distance = (distance >= 0) ? distance : -distance;
+    if (distance >= max_short_distance) {
+      PromoteToLong();
+      uint32_t old_size = GetOldSize();
+      uint32_t new_size = GetSize();
+      CHECK_GT(new_size, old_size);
+      return new_size - old_size;
+    }
+  }
+  return 0;
+}
+
+uint32_t MipsAssembler::Branch::GetOffsetLocation() const {
+  return location_ + branch_info_[type_].instr_offset * sizeof(uint32_t);
+}
+
+uint32_t MipsAssembler::Branch::GetOffset() const {
+  CHECK(IsResolved());
+  uint32_t ofs_mask = 0xFFFFFFFF >> (32 - GetOffsetSize());
+  // Calculate the byte distance between instructions and also account for
+  // different PC-relative origins.
+  uint32_t offset = target_ - GetOffsetLocation() - branch_info_[type_].pc_org * sizeof(uint32_t);
+  // Prepare the offset for encoding into the instruction(s).
+  offset = (offset & ofs_mask) >> branch_info_[type_].offset_shift;
+  return offset;
+}
+
+MipsAssembler::Branch* MipsAssembler::GetBranch(uint32_t branch_id) {
+  CHECK_LT(branch_id, branches_.size());
+  return &branches_[branch_id];
+}
+
+const MipsAssembler::Branch* MipsAssembler::GetBranch(uint32_t branch_id) const {
+  CHECK_LT(branch_id, branches_.size());
+  return &branches_[branch_id];
+}
+
+void MipsAssembler::Bind(MipsLabel* label) {
+  CHECK(!label->IsBound());
+  uint32_t bound_pc = buffer_.Size();
+
+  // Walk the list of branches referring to and preceding this label.
+  // Store the previously unknown target addresses in them.
+  while (label->IsLinked()) {
+    uint32_t branch_id = label->Position();
+    Branch* branch = GetBranch(branch_id);
+    branch->Resolve(bound_pc);
+
+    uint32_t branch_location = branch->GetLocation();
+    // Extract the location of the previous branch in the list (walking the list backwards;
+    // the previous branch ID was stored in the space reserved for this branch).
+    uint32_t prev = buffer_.Load<uint32_t>(branch_location);
+
+    // On to the previous branch in the list...
+    label->position_ = prev;
+  }
+
+  // Now make the label object contain its own location (relative to the end of the preceding
+  // branch, if any; it will be used by the branches referring to and following this label).
+  label->prev_branch_id_plus_one_ = branches_.size();
+  if (label->prev_branch_id_plus_one_) {
+    uint32_t branch_id = label->prev_branch_id_plus_one_ - 1;
+    const Branch* branch = GetBranch(branch_id);
+    bound_pc -= branch->GetEndLocation();
+  }
+  label->BindTo(bound_pc);
+}
+
+uint32_t MipsAssembler::GetLabelLocation(MipsLabel* label) const {
+  CHECK(label->IsBound());
+  uint32_t target = label->Position();
+  if (label->prev_branch_id_plus_one_) {
+    // Get label location based on the branch preceding it.
+    uint32_t branch_id = label->prev_branch_id_plus_one_ - 1;
+    const Branch* branch = GetBranch(branch_id);
+    target += branch->GetEndLocation();
+  }
+  return target;
+}
+
+uint32_t MipsAssembler::GetAdjustedPosition(uint32_t old_position) {
+  // We can reconstruct the adjustment by going through all the branches from the beginning
+  // up to the old_position. Since we expect AdjustedPosition() to be called in a loop
+  // with increasing old_position, we can use the data from last AdjustedPosition() to
+  // continue where we left off and the whole loop should be O(m+n) where m is the number
+  // of positions to adjust and n is the number of branches.
+  if (old_position < last_old_position_) {
+    last_position_adjustment_ = 0;
+    last_old_position_ = 0;
+    last_branch_id_ = 0;
+  }
+  while (last_branch_id_ != branches_.size()) {
+    const Branch* branch = GetBranch(last_branch_id_);
+    if (branch->GetLocation() >= old_position + last_position_adjustment_) {
+      break;
+    }
+    last_position_adjustment_ += branch->GetSize() - branch->GetOldSize();
+    ++last_branch_id_;
+  }
+  last_old_position_ = old_position;
+  return old_position + last_position_adjustment_;
+}
+
+void MipsAssembler::FinalizeLabeledBranch(MipsLabel* label) {
+  uint32_t length = branches_.back().GetLength();
+  if (!label->IsBound()) {
+    // Branch forward (to a following label), distance is unknown.
+    // The first branch forward will contain 0, serving as the terminator of
+    // the list of forward-reaching branches.
+    Emit(label->position_);
+    length--;
+    // Now make the label object point to this branch
+    // (this forms a linked list of branches preceding this label).
+    uint32_t branch_id = branches_.size() - 1;
+    label->LinkTo(branch_id);
+  }
+  // Reserve space for the branch.
+  while (length--) {
+    Nop();
+  }
+}
+
+void MipsAssembler::Buncond(MipsLabel* label) {
+  uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
+  branches_.emplace_back(IsR6(), buffer_.Size(), target);
+  FinalizeLabeledBranch(label);
+}
+
+void MipsAssembler::Bcond(MipsLabel* label, BranchCondition condition, Register lhs, Register rhs) {
+  // If lhs = rhs, this can be a NOP.
+  if (Branch::IsNop(condition, lhs, rhs)) {
+    return;
+  }
+  uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
+  branches_.emplace_back(IsR6(), buffer_.Size(), target, condition, lhs, rhs);
+  FinalizeLabeledBranch(label);
+}
+
+void MipsAssembler::Call(MipsLabel* label, Register indirect_reg) {
+  uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
+  branches_.emplace_back(IsR6(), buffer_.Size(), target, indirect_reg);
+  FinalizeLabeledBranch(label);
+}
+
+void MipsAssembler::PromoteBranches() {
+  // Promote short branches to long as necessary.
+  bool changed;
+  do {
+    changed = false;
+    for (auto& branch : branches_) {
+      CHECK(branch.IsResolved());
+      uint32_t delta = branch.PromoteIfNeeded();
+      // If this branch has been promoted and needs to expand in size,
+      // relocate all branches by the expansion size.
+      if (delta) {
+        changed = true;
+        uint32_t expand_location = branch.GetLocation();
+        for (auto& branch2 : branches_) {
+          branch2.Relocate(expand_location, delta);
+        }
+      }
+    }
+  } while (changed);
+
+  // Account for branch expansion by resizing the code buffer
+  // and moving the code in it to its final location.
+  size_t branch_count = branches_.size();
+  if (branch_count > 0) {
+    // Resize.
+    Branch& last_branch = branches_[branch_count - 1];
+    uint32_t size_delta = last_branch.GetEndLocation() - last_branch.GetOldEndLocation();
+    uint32_t old_size = buffer_.Size();
+    buffer_.Resize(old_size + size_delta);
+    // Move the code residing between branch placeholders.
+    uint32_t end = old_size;
+    for (size_t i = branch_count; i > 0; ) {
+      Branch& branch = branches_[--i];
+      uint32_t size = end - branch.GetOldEndLocation();
+      buffer_.Move(branch.GetEndLocation(), branch.GetOldEndLocation(), size);
+      end = branch.GetOldLocation();
+    }
+  }
+}
+
+// Note: make sure branch_info_[] and EmitBranch() are kept synchronized.
+const MipsAssembler::Branch::BranchInfo MipsAssembler::Branch::branch_info_[] = {
+  // R2 short branches.
+  {  2, 0, 1, MipsAssembler::Branch::kOffset18, 2 },  // kUncondBranch
+  {  2, 0, 1, MipsAssembler::Branch::kOffset18, 2 },  // kCondBranch
+  {  5, 2, 0, MipsAssembler::Branch::kOffset16, 0 },  // kCall
+  // R2 long branches.
+  {  9, 3, 1, MipsAssembler::Branch::kOffset32, 0 },  // kLongUncondBranch
+  { 10, 4, 1, MipsAssembler::Branch::kOffset32, 0 },  // kLongCondBranch
+  {  6, 1, 1, MipsAssembler::Branch::kOffset32, 0 },  // kLongCall
+  // R6 short branches.
+  {  1, 0, 1, MipsAssembler::Branch::kOffset28, 2 },  // kR6UncondBranch
+  {  2, 0, 1, MipsAssembler::Branch::kOffset18, 2 },  // kR6CondBranch
+                                                      // Exception: kOffset23 for beqzc/bnezc.
+  {  2, 0, 0, MipsAssembler::Branch::kOffset21, 2 },  // kR6Call
+  // R6 long branches.
+  {  2, 0, 0, MipsAssembler::Branch::kOffset32, 0 },  // kR6LongUncondBranch
+  {  3, 1, 0, MipsAssembler::Branch::kOffset32, 0 },  // kR6LongCondBranch
+  {  3, 0, 0, MipsAssembler::Branch::kOffset32, 0 },  // kR6LongCall
+};
+
+// Note: make sure branch_info_[] and mitBranch() are kept synchronized.
+void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) {
+  CHECK_EQ(overwriting_, true);
+  overwrite_location_ = branch->GetLocation();
+  uint32_t offset = branch->GetOffset();
+  BranchCondition condition = branch->GetCondition();
+  Register lhs = branch->GetLeftRegister();
+  Register rhs = branch->GetRightRegister();
+  switch (branch->GetType()) {
+    // R2 short branches.
+    case Branch::kUncondBranch:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      B(offset);
+      Nop();  // TODO: improve by filling the delay slot.
+      break;
+    case Branch::kCondBranch:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      EmitBcond(condition, lhs, rhs, offset);
+      Nop();  // TODO: improve by filling the delay slot.
+      break;
+    case Branch::kCall:
+      Nal();
+      Nop();  // TODO: is this NOP really needed here?
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Addiu(lhs, RA, offset);
+      Jalr(lhs);
+      Nop();
+      break;
+
+    // R2 long branches.
+    case Branch::kLongUncondBranch:
+      // To get the value of the PC register we need to use the NAL instruction.
+      // NAL clobbers the RA register. However, RA must be preserved if the
+      // method is compiled without the entry/exit sequences that would take care
+      // of preserving RA (typically, leaf methods don't preserve RA explicitly).
+      // So, we need to preserve RA in some temporary storage ourselves. The AT
+      // register can't be used for this because we need it to load a constant
+      // which will be added to the value that NAL stores in RA. And we can't
+      // use T9 for this in the context of the JNI compiler, which uses it
+      // as a scratch register (see InterproceduralScratchRegister()).
+      // If we were to add a 32-bit constant to RA using two ADDIU instructions,
+      // we'd also need to use the ROTR instruction, which requires no less than
+      // MIPSR2.
+      // Perhaps, we could use T8 or one of R2's multiplier/divider registers
+      // (LO or HI) or even a floating-point register, but that doesn't seem
+      // like a nice solution. We may want this to work on both R6 and pre-R6.
+      // For now simply use the stack for RA. This should be OK since for the
+      // vast majority of code a short PC-relative branch is sufficient.
+      // TODO: can this be improved?
+      Push(RA);
+      Nal();
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Lui(AT, High16Bits(offset));
+      Ori(AT, AT, Low16Bits(offset));
+      Addu(AT, AT, RA);
+      Lw(RA, SP, 0);
+      Jr(AT);
+      DecreaseFrameSize(kMipsWordSize);
+      break;
+    case Branch::kLongCondBranch:
+      // The comment on case 'Branch::kLongUncondBranch' applies here as well.
+      // Note: the opposite condition branch encodes 8 as the distance, which is equal to the
+      // number of instructions skipped:
+      // (PUSH(IncreaseFrameSize(ADDIU) + SW) + NAL + LUI + ORI + ADDU + LW + JR).
+      EmitBcond(Branch::OppositeCondition(condition), lhs, rhs, 8);
+      Push(RA);
+      Nal();
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Lui(AT, High16Bits(offset));
+      Ori(AT, AT, Low16Bits(offset));
+      Addu(AT, AT, RA);
+      Lw(RA, SP, 0);
+      Jr(AT);
+      DecreaseFrameSize(kMipsWordSize);
+      break;
+    case Branch::kLongCall:
+      Nal();
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Lui(AT, High16Bits(offset));
+      Ori(AT, AT, Low16Bits(offset));
+      Addu(lhs, AT, RA);
+      Jalr(lhs);
+      Nop();
+      break;
+
+    // R6 short branches.
+    case Branch::kR6UncondBranch:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Bc(offset);
+      break;
+    case Branch::kR6CondBranch:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      EmitBcondc(condition, lhs, rhs, offset);
+      Nop();  // TODO: improve by filling the forbidden slot.
+      break;
+    case Branch::kR6Call:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Addiupc(lhs, offset);
+      Jialc(lhs, 0);
+      break;
+
+    // R6 long branches.
+    case Branch::kR6LongUncondBranch:
+      offset += (offset & 0x8000) << 1;  // Account for sign extension in jic.
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Auipc(AT, High16Bits(offset));
+      Jic(AT, Low16Bits(offset));
+      break;
+    case Branch::kR6LongCondBranch:
+      EmitBcondc(Branch::OppositeCondition(condition), lhs, rhs, 2);
+      offset += (offset & 0x8000) << 1;  // Account for sign extension in jic.
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Auipc(AT, High16Bits(offset));
+      Jic(AT, Low16Bits(offset));
+      break;
+    case Branch::kR6LongCall:
+      offset += (offset & 0x8000) << 1;  // Account for sign extension in addiu.
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Auipc(lhs, High16Bits(offset));
+      Addiu(lhs, lhs, Low16Bits(offset));
+      Jialc(lhs, 0);
+      break;
+  }
+  CHECK_EQ(overwrite_location_, branch->GetEndLocation());
+  CHECK_LT(branch->GetSize(), static_cast<uint32_t>(Branch::kMaxBranchSize));
+}
+
+void MipsAssembler::B(MipsLabel* label) {
+  Buncond(label);
+}
+
+void MipsAssembler::Jalr(MipsLabel* label, Register indirect_reg) {
+  Call(label, indirect_reg);
+}
+
+void MipsAssembler::Beq(Register rs, Register rt, MipsLabel* label) {
+  Bcond(label, kCondEQ, rs, rt);
+}
+
+void MipsAssembler::Bne(Register rs, Register rt, MipsLabel* label) {
+  Bcond(label, kCondNE, rs, rt);
+}
+
+void MipsAssembler::Beqz(Register rt, MipsLabel* label) {
+  Bcond(label, kCondEQZ, rt);
+}
+
+void MipsAssembler::Bnez(Register rt, MipsLabel* label) {
+  Bcond(label, kCondNEZ, rt);
+}
+
+void MipsAssembler::Bltz(Register rt, MipsLabel* label) {
+  Bcond(label, kCondLTZ, rt);
+}
+
+void MipsAssembler::Bgez(Register rt, MipsLabel* label) {
+  Bcond(label, kCondGEZ, rt);
+}
+
+void MipsAssembler::Blez(Register rt, MipsLabel* label) {
+  Bcond(label, kCondLEZ, rt);
+}
+
+void MipsAssembler::Bgtz(Register rt, MipsLabel* label) {
+  Bcond(label, kCondGTZ, rt);
+}
+
+void MipsAssembler::Blt(Register rs, Register rt, MipsLabel* label) {
+  if (IsR6()) {
+    Bcond(label, kCondLT, rs, rt);
+  } else if (!Branch::IsNop(kCondLT, rs, rt)) {
+    // Synthesize the instruction (not available on R2).
+    Slt(AT, rs, rt);
+    Bnez(AT, label);
+  }
+}
+
+void MipsAssembler::Bge(Register rs, Register rt, MipsLabel* label) {
+  if (IsR6()) {
+    Bcond(label, kCondGE, rs, rt);
+  } else if (Branch::IsUncond(kCondGE, rs, rt)) {
+    B(label);
+  } else {
+    // Synthesize the instruction (not available on R2).
+    Slt(AT, rs, rt);
+    Beqz(AT, label);
+  }
+}
+
+void MipsAssembler::Bltu(Register rs, Register rt, MipsLabel* label) {
+  if (IsR6()) {
+    Bcond(label, kCondLTU, rs, rt);
+  } else if (!Branch::IsNop(kCondLTU, rs, rt)) {
+    // Synthesize the instruction (not available on R2).
+    Sltu(AT, rs, rt);
+    Bnez(AT, label);
+  }
+}
+
+void MipsAssembler::Bgeu(Register rs, Register rt, MipsLabel* label) {
+  if (IsR6()) {
+    Bcond(label, kCondGEU, rs, rt);
+  } else if (Branch::IsUncond(kCondGEU, rs, rt)) {
+    B(label);
+  } else {
+    // Synthesize the instruction (not available on R2).
+    Sltu(AT, rs, rt);
+    Beqz(AT, label);
   }
 }
 
 void MipsAssembler::LoadFromOffset(LoadOperandType type, Register reg, Register base,
                                    int32_t offset) {
+  // IsInt<16> must be passed a signed value.
+  if (!IsInt<16>(offset) ||
+      (type == kLoadDoubleword && !IsInt<16>(static_cast<int32_t>(offset + kMipsWordSize)))) {
+    LoadConst32(AT, offset);
+    Addu(AT, AT, base);
+    base = AT;
+    offset = 0;
+  }
+
   switch (type) {
     case kLoadSignedByte:
       Lb(reg, base, offset);
@@ -488,8 +1617,16 @@
     case kLoadWord:
       Lw(reg, base, offset);
       break;
-    case kLoadWordPair:
-      LOG(FATAL) << "UNREACHABLE";
+    case kLoadDoubleword:
+      if (reg == base) {
+        // This will clobber the base when loading the lower register. Since we have to load the
+        // higher register as well, this will fail. Solution: reverse the order.
+        Lw(static_cast<Register>(reg + 1), base, offset + kMipsWordSize);
+        Lw(reg, base, offset);
+      } else {
+        Lw(reg, base, offset);
+        Lw(static_cast<Register>(reg + 1), base, offset + kMipsWordSize);
+      }
       break;
     default:
       LOG(FATAL) << "UNREACHABLE";
@@ -497,15 +1634,74 @@
 }
 
 void MipsAssembler::LoadSFromOffset(FRegister reg, Register base, int32_t offset) {
+  if (!IsInt<16>(offset)) {
+    LoadConst32(AT, offset);
+    Addu(AT, AT, base);
+    base = AT;
+    offset = 0;
+  }
+
   Lwc1(reg, base, offset);
 }
 
-void MipsAssembler::LoadDFromOffset(DRegister reg, Register base, int32_t offset) {
-  Ldc1(reg, base, offset);
+void MipsAssembler::LoadDFromOffset(FRegister reg, Register base, int32_t offset) {
+  // IsInt<16> must be passed a signed value.
+  if (!IsInt<16>(offset) ||
+      (!IsAligned<kMipsDoublewordSize>(offset) &&
+       !IsInt<16>(static_cast<int32_t>(offset + kMipsWordSize)))) {
+    LoadConst32(AT, offset);
+    Addu(AT, AT, base);
+    base = AT;
+    offset = 0;
+  }
+
+  if (offset & 0x7) {
+    if (Is32BitFPU()) {
+      Lwc1(reg, base, offset);
+      Lwc1(static_cast<FRegister>(reg + 1), base, offset + kMipsWordSize);
+    } else {
+      // 64-bit FPU.
+      Lwc1(reg, base, offset);
+      Lw(T8, base, offset + kMipsWordSize);
+      Mthc1(T8, reg);
+    }
+  } else {
+    Ldc1(reg, base, offset);
+  }
+}
+
+void MipsAssembler::EmitLoad(ManagedRegister m_dst, Register src_register, int32_t src_offset,
+                             size_t size) {
+  MipsManagedRegister dst = m_dst.AsMips();
+  if (dst.IsNoRegister()) {
+    CHECK_EQ(0u, size) << dst;
+  } else if (dst.IsCoreRegister()) {
+    CHECK_EQ(kMipsWordSize, size) << dst;
+    LoadFromOffset(kLoadWord, dst.AsCoreRegister(), src_register, src_offset);
+  } else if (dst.IsRegisterPair()) {
+    CHECK_EQ(kMipsDoublewordSize, size) << dst;
+    LoadFromOffset(kLoadDoubleword, dst.AsRegisterPairLow(), src_register, src_offset);
+  } else if (dst.IsFRegister()) {
+    if (size == kMipsWordSize) {
+      LoadSFromOffset(dst.AsFRegister(), src_register, src_offset);
+    } else {
+      CHECK_EQ(kMipsDoublewordSize, size) << dst;
+      LoadDFromOffset(dst.AsFRegister(), src_register, src_offset);
+    }
+  }
 }
 
 void MipsAssembler::StoreToOffset(StoreOperandType type, Register reg, Register base,
                                   int32_t offset) {
+  // IsInt<16> must be passed a signed value.
+  if (!IsInt<16>(offset) ||
+      (type == kStoreDoubleword && !IsInt<16>(static_cast<int32_t>(offset + kMipsWordSize)))) {
+    LoadConst32(AT, offset);
+    Addu(AT, AT, base);
+    base = AT;
+    offset = 0;
+  }
+
   switch (type) {
     case kStoreByte:
       Sb(reg, base, offset);
@@ -516,8 +1712,11 @@
     case kStoreWord:
       Sw(reg, base, offset);
       break;
-    case kStoreWordPair:
-      LOG(FATAL) << "UNREACHABLE";
+    case kStoreDoubleword:
+      CHECK_NE(reg, base);
+      CHECK_NE(static_cast<Register>(reg + 1), base);
+      Sw(reg, base, offset);
+      Sw(static_cast<Register>(reg + 1), base, offset + kMipsWordSize);
       break;
     default:
       LOG(FATAL) << "UNREACHABLE";
@@ -525,11 +1724,40 @@
 }
 
 void MipsAssembler::StoreSToOffset(FRegister reg, Register base, int32_t offset) {
+  if (!IsInt<16>(offset)) {
+    LoadConst32(AT, offset);
+    Addu(AT, AT, base);
+    base = AT;
+    offset = 0;
+  }
+
   Swc1(reg, base, offset);
 }
 
-void MipsAssembler::StoreDToOffset(DRegister reg, Register base, int32_t offset) {
-  Sdc1(reg, base, offset);
+void MipsAssembler::StoreDToOffset(FRegister reg, Register base, int32_t offset) {
+  // IsInt<16> must be passed a signed value.
+  if (!IsInt<16>(offset) ||
+      (!IsAligned<kMipsDoublewordSize>(offset) &&
+       !IsInt<16>(static_cast<int32_t>(offset + kMipsWordSize)))) {
+    LoadConst32(AT, offset);
+    Addu(AT, AT, base);
+    base = AT;
+    offset = 0;
+  }
+
+  if (offset & 0x7) {
+    if (Is32BitFPU()) {
+      Swc1(reg, base, offset);
+      Swc1(static_cast<FRegister>(reg + 1), base, offset + kMipsWordSize);
+    } else {
+      // 64-bit FPU.
+      Mfhc1(T8, reg);
+      Swc1(reg, base, offset);
+      Sw(T8, base, offset + kMipsWordSize);
+    }
+  } else {
+    Sdc1(reg, base, offset);
+  }
 }
 
 static dwarf::Reg DWARFReg(Register reg) {
@@ -546,7 +1774,7 @@
   // Increase frame to required size.
   IncreaseFrameSize(frame_size);
 
-  // Push callee saves and return address
+  // Push callee saves and return address.
   int stack_offset = frame_size - kFramePointerSize;
   StoreToOffset(kStoreWord, RA, SP, stack_offset);
   cfi_.RelOffset(DWARFReg(RA), stack_offset);
@@ -569,13 +1797,13 @@
       offset += spill.getSize();
     } else if (reg.IsCoreRegister()) {
       StoreToOffset(kStoreWord, reg.AsCoreRegister(), SP, offset);
-      offset += 4;
+      offset += kMipsWordSize;
     } else if (reg.IsFRegister()) {
       StoreSToOffset(reg.AsFRegister(), SP, offset);
-      offset += 4;
+      offset += kMipsWordSize;
     } else if (reg.IsDRegister()) {
-      StoreDToOffset(reg.AsDRegister(), SP, offset);
-      offset += 8;
+      StoreDToOffset(reg.AsOverlappingDRegisterLow(), SP, offset);
+      offset += kMipsDoublewordSize;
     }
   }
 }
@@ -585,7 +1813,7 @@
   CHECK_ALIGNED(frame_size, kStackAlignment);
   cfi_.RememberState();
 
-  // Pop callee saves and return address
+  // Pop callee saves and return address.
   int stack_offset = frame_size - (callee_save_regs.size() * kFramePointerSize) - kFramePointerSize;
   for (size_t i = 0; i < callee_save_regs.size(); ++i) {
     Register reg = callee_save_regs.at(i).AsMips().AsCoreRegister();
@@ -601,6 +1829,7 @@
 
   // Then jump to the return address.
   Jr(RA);
+  Nop();
 
   // The CFI should be restored for any code that follows the exit block.
   cfi_.RestoreState();
@@ -608,14 +1837,14 @@
 }
 
 void MipsAssembler::IncreaseFrameSize(size_t adjust) {
-  CHECK_ALIGNED(adjust, kStackAlignment);
-  AddConstant(SP, SP, -adjust);
+  CHECK_ALIGNED(adjust, kFramePointerSize);
+  Addiu32(SP, SP, -adjust);
   cfi_.AdjustCFAOffset(adjust);
 }
 
 void MipsAssembler::DecreaseFrameSize(size_t adjust) {
-  CHECK_ALIGNED(adjust, kStackAlignment);
-  AddConstant(SP, SP, adjust);
+  CHECK_ALIGNED(adjust, kFramePointerSize);
+  Addiu32(SP, SP, adjust);
   cfi_.AdjustCFAOffset(-adjust);
 }
 
@@ -624,18 +1853,20 @@
   if (src.IsNoRegister()) {
     CHECK_EQ(0u, size);
   } else if (src.IsCoreRegister()) {
-    CHECK_EQ(4u, size);
+    CHECK_EQ(kMipsWordSize, size);
     StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value());
   } else if (src.IsRegisterPair()) {
-    CHECK_EQ(8u, size);
+    CHECK_EQ(kMipsDoublewordSize, size);
     StoreToOffset(kStoreWord, src.AsRegisterPairLow(), SP, dest.Int32Value());
     StoreToOffset(kStoreWord, src.AsRegisterPairHigh(),
-                  SP, dest.Int32Value() + 4);
+                  SP, dest.Int32Value() + kMipsWordSize);
   } else if (src.IsFRegister()) {
-    StoreSToOffset(src.AsFRegister(), SP, dest.Int32Value());
-  } else {
-    CHECK(src.IsDRegister());
-    StoreDToOffset(src.AsDRegister(), SP, dest.Int32Value());
+    if (size == kMipsWordSize) {
+      StoreSToOffset(src.AsFRegister(), SP, dest.Int32Value());
+    } else {
+      CHECK_EQ(kMipsDoublewordSize, size);
+      StoreDToOffset(src.AsFRegister(), SP, dest.Int32Value());
+    }
   }
 }
 
@@ -655,29 +1886,30 @@
                                           ManagedRegister mscratch) {
   MipsManagedRegister scratch = mscratch.AsMips();
   CHECK(scratch.IsCoreRegister()) << scratch;
-  LoadImmediate(scratch.AsCoreRegister(), imm);
+  LoadConst32(scratch.AsCoreRegister(), imm);
   StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
 }
 
-void MipsAssembler::StoreImmediateToThread32(ThreadOffset<4> dest, uint32_t imm,
-                                           ManagedRegister mscratch) {
-  MipsManagedRegister scratch = mscratch.AsMips();
-  CHECK(scratch.IsCoreRegister()) << scratch;
-  LoadImmediate(scratch.AsCoreRegister(), imm);
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(), S1, dest.Int32Value());
-}
-
-void MipsAssembler::StoreStackOffsetToThread32(ThreadOffset<4> thr_offs,
-                                             FrameOffset fr_offs,
+void MipsAssembler::StoreImmediateToThread32(ThreadOffset<kMipsWordSize> dest, uint32_t imm,
                                              ManagedRegister mscratch) {
   MipsManagedRegister scratch = mscratch.AsMips();
   CHECK(scratch.IsCoreRegister()) << scratch;
-  AddConstant(scratch.AsCoreRegister(), SP, fr_offs.Int32Value());
+  // Is this function even referenced anywhere else in the code?
+  LoadConst32(scratch.AsCoreRegister(), imm);
+  StoreToOffset(kStoreWord, scratch.AsCoreRegister(), S1, dest.Int32Value());
+}
+
+void MipsAssembler::StoreStackOffsetToThread32(ThreadOffset<kMipsWordSize> thr_offs,
+                                               FrameOffset fr_offs,
+                                               ManagedRegister mscratch) {
+  MipsManagedRegister scratch = mscratch.AsMips();
+  CHECK(scratch.IsCoreRegister()) << scratch;
+  Addiu32(scratch.AsCoreRegister(), SP, fr_offs.Int32Value());
   StoreToOffset(kStoreWord, scratch.AsCoreRegister(),
                 S1, thr_offs.Int32Value());
 }
 
-void MipsAssembler::StoreStackPointerToThread32(ThreadOffset<4> thr_offs) {
+void MipsAssembler::StoreStackPointerToThread32(ThreadOffset<kMipsWordSize> thr_offs) {
   StoreToOffset(kStoreWord, SP, S1, thr_offs.Int32Value());
 }
 
@@ -687,14 +1919,15 @@
   MipsManagedRegister scratch = mscratch.AsMips();
   StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value());
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, in_off.Int32Value());
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value() + 4);
+  StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value() + kMipsWordSize);
 }
 
 void MipsAssembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) {
   return EmitLoad(mdest, SP, src.Int32Value(), size);
 }
 
-void MipsAssembler::LoadFromThread32(ManagedRegister mdest, ThreadOffset<4> src, size_t size) {
+void MipsAssembler::LoadFromThread32(ManagedRegister mdest,
+                                     ThreadOffset<kMipsWordSize> src, size_t size) {
   return EmitLoad(mdest, S1, src.Int32Value(), size);
 }
 
@@ -707,7 +1940,7 @@
 void MipsAssembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs,
                             bool unpoison_reference) {
   MipsManagedRegister dest = mdest.AsMips();
-  CHECK(dest.IsCoreRegister() && dest.IsCoreRegister());
+  CHECK(dest.IsCoreRegister() && base.AsMips().IsCoreRegister());
   LoadFromOffset(kLoadWord, dest.AsCoreRegister(),
                  base.AsMips().AsCoreRegister(), offs.Int32Value());
   if (kPoisonHeapReferences && unpoison_reference) {
@@ -715,16 +1948,15 @@
   }
 }
 
-void MipsAssembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base,
-                               Offset offs) {
+void MipsAssembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) {
   MipsManagedRegister dest = mdest.AsMips();
-  CHECK(dest.IsCoreRegister() && dest.IsCoreRegister()) << dest;
+  CHECK(dest.IsCoreRegister() && base.AsMips().IsCoreRegister());
   LoadFromOffset(kLoadWord, dest.AsCoreRegister(),
                  base.AsMips().AsCoreRegister(), offs.Int32Value());
 }
 
 void MipsAssembler::LoadRawPtrFromThread32(ManagedRegister mdest,
-                                         ThreadOffset<4> offs) {
+                                           ThreadOffset<kMipsWordSize> offs) {
   MipsManagedRegister dest = mdest.AsMips();
   CHECK(dest.IsCoreRegister());
   LoadFromOffset(kLoadWord, dest.AsCoreRegister(), S1, offs.Int32Value());
@@ -738,7 +1970,7 @@
   UNIMPLEMENTED(FATAL) << "no zero extension necessary for mips";
 }
 
-void MipsAssembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t /*size*/) {
+void MipsAssembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) {
   MipsManagedRegister dest = mdest.AsMips();
   MipsManagedRegister src = msrc.AsMips();
   if (!dest.Equals(src)) {
@@ -747,14 +1979,19 @@
       Move(dest.AsCoreRegister(), src.AsCoreRegister());
     } else if (dest.IsFRegister()) {
       CHECK(src.IsFRegister()) << src;
-      MovS(dest.AsFRegister(), src.AsFRegister());
+      if (size == kMipsWordSize) {
+        MovS(dest.AsFRegister(), src.AsFRegister());
+      } else {
+        CHECK_EQ(kMipsDoublewordSize, size);
+        MovD(dest.AsFRegister(), src.AsFRegister());
+      }
     } else if (dest.IsDRegister()) {
       CHECK(src.IsDRegister()) << src;
-      MovD(dest.AsDRegister(), src.AsDRegister());
+      MovD(dest.AsOverlappingDRegisterLow(), src.AsOverlappingDRegisterLow());
     } else {
       CHECK(dest.IsRegisterPair()) << dest;
       CHECK(src.IsRegisterPair()) << src;
-      // Ensure that the first move doesn't clobber the input of the second
+      // Ensure that the first move doesn't clobber the input of the second.
       if (src.AsRegisterPairHigh() != dest.AsRegisterPairLow()) {
         Move(dest.AsRegisterPairLow(), src.AsRegisterPairLow());
         Move(dest.AsRegisterPairHigh(), src.AsRegisterPairHigh());
@@ -766,8 +2003,7 @@
   }
 }
 
-void MipsAssembler::CopyRef(FrameOffset dest, FrameOffset src,
-                            ManagedRegister mscratch) {
+void MipsAssembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) {
   MipsManagedRegister scratch = mscratch.AsMips();
   CHECK(scratch.IsCoreRegister()) << scratch;
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value());
@@ -775,8 +2011,8 @@
 }
 
 void MipsAssembler::CopyRawPtrFromThread32(FrameOffset fr_offs,
-                                         ThreadOffset<4> thr_offs,
-                                         ManagedRegister mscratch) {
+                                           ThreadOffset<kMipsWordSize> thr_offs,
+                                           ManagedRegister mscratch) {
   MipsManagedRegister scratch = mscratch.AsMips();
   CHECK(scratch.IsCoreRegister()) << scratch;
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
@@ -785,9 +2021,9 @@
                 SP, fr_offs.Int32Value());
 }
 
-void MipsAssembler::CopyRawPtrToThread32(ThreadOffset<4> thr_offs,
-                                       FrameOffset fr_offs,
-                                       ManagedRegister mscratch) {
+void MipsAssembler::CopyRawPtrToThread32(ThreadOffset<kMipsWordSize> thr_offs,
+                                         FrameOffset fr_offs,
+                                         ManagedRegister mscratch) {
   MipsManagedRegister scratch = mscratch.AsMips();
   CHECK(scratch.IsCoreRegister()) << scratch;
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
@@ -796,26 +2032,25 @@
                 S1, thr_offs.Int32Value());
 }
 
-void MipsAssembler::Copy(FrameOffset dest, FrameOffset src,
-                         ManagedRegister mscratch, size_t size) {
+void MipsAssembler::Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch, size_t size) {
   MipsManagedRegister scratch = mscratch.AsMips();
   CHECK(scratch.IsCoreRegister()) << scratch;
-  CHECK(size == 4 || size == 8) << size;
-  if (size == 4) {
+  CHECK(size == kMipsWordSize || size == kMipsDoublewordSize) << size;
+  if (size == kMipsWordSize) {
     LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value());
     StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
-  } else if (size == 8) {
+  } else if (size == kMipsDoublewordSize) {
     LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value());
     StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
-    LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value() + 4);
-    StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value() + 4);
+    LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value() + kMipsWordSize);
+    StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value() + kMipsWordSize);
   }
 }
 
 void MipsAssembler::Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset,
                          ManagedRegister mscratch, size_t size) {
   Register scratch = mscratch.AsMips().AsCoreRegister();
-  CHECK_EQ(size, 4u);
+  CHECK_EQ(size, kMipsWordSize);
   LoadFromOffset(kLoadWord, scratch, src_base.AsMips().AsCoreRegister(), src_offset.Int32Value());
   StoreToOffset(kStoreWord, scratch, SP, dest.Int32Value());
 }
@@ -823,107 +2058,117 @@
 void MipsAssembler::Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src,
                          ManagedRegister mscratch, size_t size) {
   Register scratch = mscratch.AsMips().AsCoreRegister();
-  CHECK_EQ(size, 4u);
+  CHECK_EQ(size, kMipsWordSize);
   LoadFromOffset(kLoadWord, scratch, SP, src.Int32Value());
   StoreToOffset(kStoreWord, scratch, dest_base.AsMips().AsCoreRegister(), dest_offset.Int32Value());
 }
 
-void MipsAssembler::Copy(FrameOffset /*dest*/, FrameOffset /*src_base*/, Offset /*src_offset*/,
-                         ManagedRegister /*mscratch*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL) << "no mips implementation";
+void MipsAssembler::Copy(FrameOffset dest ATTRIBUTE_UNUSED,
+                         FrameOffset src_base ATTRIBUTE_UNUSED,
+                         Offset src_offset ATTRIBUTE_UNUSED,
+                         ManagedRegister mscratch ATTRIBUTE_UNUSED,
+                         size_t size ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "no MIPS implementation";
 }
 
 void MipsAssembler::Copy(ManagedRegister dest, Offset dest_offset,
                          ManagedRegister src, Offset src_offset,
                          ManagedRegister mscratch, size_t size) {
-  CHECK_EQ(size, 4u);
+  CHECK_EQ(size, kMipsWordSize);
   Register scratch = mscratch.AsMips().AsCoreRegister();
   LoadFromOffset(kLoadWord, scratch, src.AsMips().AsCoreRegister(), src_offset.Int32Value());
   StoreToOffset(kStoreWord, scratch, dest.AsMips().AsCoreRegister(), dest_offset.Int32Value());
 }
 
-void MipsAssembler::Copy(FrameOffset /*dest*/, Offset /*dest_offset*/, FrameOffset /*src*/, Offset /*src_offset*/,
-                         ManagedRegister /*mscratch*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL) << "no mips implementation";
+void MipsAssembler::Copy(FrameOffset dest ATTRIBUTE_UNUSED,
+                         Offset dest_offset ATTRIBUTE_UNUSED,
+                         FrameOffset src ATTRIBUTE_UNUSED,
+                         Offset src_offset ATTRIBUTE_UNUSED,
+                         ManagedRegister mscratch ATTRIBUTE_UNUSED,
+                         size_t size ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "no MIPS implementation";
 }
 
 void MipsAssembler::MemoryBarrier(ManagedRegister) {
-  UNIMPLEMENTED(FATAL) << "no mips implementation";
+  // TODO: sync?
+  UNIMPLEMENTED(FATAL) << "no MIPS implementation";
 }
 
 void MipsAssembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
-                                    FrameOffset handle_scope_offset,
-                                    ManagedRegister min_reg, bool null_allowed) {
+                                           FrameOffset handle_scope_offset,
+                                           ManagedRegister min_reg,
+                                           bool null_allowed) {
   MipsManagedRegister out_reg = mout_reg.AsMips();
   MipsManagedRegister in_reg = min_reg.AsMips();
   CHECK(in_reg.IsNoRegister() || in_reg.IsCoreRegister()) << in_reg;
   CHECK(out_reg.IsCoreRegister()) << out_reg;
   if (null_allowed) {
-    Label null_arg;
+    MipsLabel null_arg;
     // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
     // the address in the handle scope holding the reference.
-    // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset)
+    // E.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset).
     if (in_reg.IsNoRegister()) {
       LoadFromOffset(kLoadWord, out_reg.AsCoreRegister(),
                      SP, handle_scope_offset.Int32Value());
       in_reg = out_reg;
     }
     if (!out_reg.Equals(in_reg)) {
-      LoadImmediate(out_reg.AsCoreRegister(), 0);
+      LoadConst32(out_reg.AsCoreRegister(), 0);
     }
-    EmitBranch(in_reg.AsCoreRegister(), ZERO, &null_arg, true);
-    AddConstant(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value());
-    Bind(&null_arg, false);
+    Beqz(in_reg.AsCoreRegister(), &null_arg);
+    Addiu32(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value());
+    Bind(&null_arg);
   } else {
-    AddConstant(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value());
+    Addiu32(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value());
   }
 }
 
 void MipsAssembler::CreateHandleScopeEntry(FrameOffset out_off,
-                                    FrameOffset handle_scope_offset,
-                                    ManagedRegister mscratch,
-                                    bool null_allowed) {
+                                           FrameOffset handle_scope_offset,
+                                           ManagedRegister mscratch,
+                                           bool null_allowed) {
   MipsManagedRegister scratch = mscratch.AsMips();
   CHECK(scratch.IsCoreRegister()) << scratch;
   if (null_allowed) {
-    Label null_arg;
-    LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP,
-                   handle_scope_offset.Int32Value());
+    MipsLabel null_arg;
+    LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value());
     // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
     // the address in the handle scope holding the reference.
-    // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset)
-    EmitBranch(scratch.AsCoreRegister(), ZERO, &null_arg, true);
-    AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value());
-    Bind(&null_arg, false);
+    // E.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset).
+    Beqz(scratch.AsCoreRegister(), &null_arg);
+    Addiu32(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value());
+    Bind(&null_arg);
   } else {
-    AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value());
+    Addiu32(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value());
   }
   StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, out_off.Int32Value());
 }
 
 // Given a handle scope entry, load the associated reference.
 void MipsAssembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg,
-                                          ManagedRegister min_reg) {
+                                                 ManagedRegister min_reg) {
   MipsManagedRegister out_reg = mout_reg.AsMips();
   MipsManagedRegister in_reg = min_reg.AsMips();
   CHECK(out_reg.IsCoreRegister()) << out_reg;
   CHECK(in_reg.IsCoreRegister()) << in_reg;
-  Label null_arg;
+  MipsLabel null_arg;
   if (!out_reg.Equals(in_reg)) {
-    LoadImmediate(out_reg.AsCoreRegister(), 0);
+    LoadConst32(out_reg.AsCoreRegister(), 0);
   }
-  EmitBranch(in_reg.AsCoreRegister(), ZERO, &null_arg, true);
+  Beqz(in_reg.AsCoreRegister(), &null_arg);
   LoadFromOffset(kLoadWord, out_reg.AsCoreRegister(),
                  in_reg.AsCoreRegister(), 0);
-  Bind(&null_arg, false);
+  Bind(&null_arg);
 }
 
-void MipsAssembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
-  // TODO: not validating references
+void MipsAssembler::VerifyObject(ManagedRegister src ATTRIBUTE_UNUSED,
+                                 bool could_be_null ATTRIBUTE_UNUSED) {
+  // TODO: not validating references.
 }
 
-void MipsAssembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
-  // TODO: not validating references
+void MipsAssembler::VerifyObject(FrameOffset src ATTRIBUTE_UNUSED,
+                                 bool could_be_null ATTRIBUTE_UNUSED) {
+  // TODO: not validating references.
 }
 
 void MipsAssembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister mscratch) {
@@ -934,22 +2179,24 @@
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
                  base.AsCoreRegister(), offset.Int32Value());
   Jalr(scratch.AsCoreRegister());
-  // TODO: place reference map on call
+  Nop();
+  // TODO: place reference map on call.
 }
 
 void MipsAssembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) {
   MipsManagedRegister scratch = mscratch.AsMips();
   CHECK(scratch.IsCoreRegister()) << scratch;
   // Call *(*(SP + base) + offset)
-  LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
-                 SP, base.Int32Value());
+  LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, base.Int32Value());
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
                  scratch.AsCoreRegister(), offset.Int32Value());
   Jalr(scratch.AsCoreRegister());
-  // TODO: place reference map on call
+  Nop();
+  // TODO: place reference map on call.
 }
 
-void MipsAssembler::CallFromThread32(ThreadOffset<4> /*offset*/, ManagedRegister /*mscratch*/) {
+void MipsAssembler::CallFromThread32(ThreadOffset<kMipsWordSize> offset ATTRIBUTE_UNUSED,
+                                     ManagedRegister mscratch ATTRIBUTE_UNUSED) {
   UNIMPLEMENTED(FATAL) << "no mips implementation";
 }
 
@@ -958,35 +2205,38 @@
 }
 
 void MipsAssembler::GetCurrentThread(FrameOffset offset,
-                                     ManagedRegister /*mscratch*/) {
+                                     ManagedRegister mscratch ATTRIBUTE_UNUSED) {
   StoreToOffset(kStoreWord, S1, SP, offset.Int32Value());
 }
 
 void MipsAssembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) {
   MipsManagedRegister scratch = mscratch.AsMips();
-  MipsExceptionSlowPath* slow = new MipsExceptionSlowPath(scratch, stack_adjust);
-  buffer_.EnqueueSlowPath(slow);
+  exception_blocks_.emplace_back(scratch, stack_adjust);
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
-                 S1, Thread::ExceptionOffset<4>().Int32Value());
-  EmitBranch(scratch.AsCoreRegister(), ZERO, slow->Entry(), false);
+                 S1, Thread::ExceptionOffset<kMipsWordSize>().Int32Value());
+  // TODO: on MIPS32R6 prefer Bnezc(scratch.AsCoreRegister(), slow.Entry());
+  // as the NAL instruction (occurring in long R2 branches) may become deprecated.
+  // For now use common for R2 and R6 instructions as this code must execute on both.
+  Bnez(scratch.AsCoreRegister(), exception_blocks_.back().Entry());
 }
 
-void MipsExceptionSlowPath::Emit(Assembler* sasm) {
-  MipsAssembler* sp_asm = down_cast<MipsAssembler*>(sasm);
-#define __ sp_asm->
-  __ Bind(&entry_, false);
-  if (stack_adjust_ != 0) {  // Fix up the frame.
-    __ DecreaseFrameSize(stack_adjust_);
+void MipsAssembler::EmitExceptionPoll(MipsExceptionSlowPath* exception) {
+  Bind(exception->Entry());
+  if (exception->stack_adjust_ != 0) {  // Fix up the frame.
+    DecreaseFrameSize(exception->stack_adjust_);
   }
-  // Pass exception object as argument
-  // Don't care about preserving A0 as this call won't return
-  __ Move(A0, scratch_.AsCoreRegister());
-  // Set up call to Thread::Current()->pDeliverException
-  __ LoadFromOffset(kLoadWord, T9, S1, QUICK_ENTRYPOINT_OFFSET(4, pDeliverException).Int32Value());
-  __ Jr(T9);
-  // Call never returns
-  __ Break();
-#undef __
+  // Pass exception object as argument.
+  // Don't care about preserving A0 as this call won't return.
+  CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
+  Move(A0, exception->scratch_.AsCoreRegister());
+  // Set up call to Thread::Current()->pDeliverException.
+  LoadFromOffset(kLoadWord, T9, S1,
+    QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, pDeliverException).Int32Value());
+  Jr(T9);
+  Nop();
+
+  // Call never returns.
+  Break();
 }
 
 }  // namespace mips
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 378a59c..aa187b8 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -17,54 +17,111 @@
 #ifndef ART_COMPILER_UTILS_MIPS_ASSEMBLER_MIPS_H_
 #define ART_COMPILER_UTILS_MIPS_ASSEMBLER_MIPS_H_
 
+#include <utility>
 #include <vector>
 
+#include "arch/mips/instruction_set_features_mips.h"
 #include "base/macros.h"
 #include "constants_mips.h"
 #include "globals.h"
 #include "managed_register_mips.h"
-#include "utils/assembler.h"
 #include "offsets.h"
+#include "utils/assembler.h"
+#include "utils/label.h"
 
 namespace art {
 namespace mips {
 
+static constexpr size_t kMipsWordSize = 4;
+static constexpr size_t kMipsDoublewordSize = 8;
+
 enum LoadOperandType {
   kLoadSignedByte,
   kLoadUnsignedByte,
   kLoadSignedHalfword,
   kLoadUnsignedHalfword,
   kLoadWord,
-  kLoadWordPair,
-  kLoadSWord,
-  kLoadDWord
+  kLoadDoubleword
 };
 
 enum StoreOperandType {
   kStoreByte,
   kStoreHalfword,
   kStoreWord,
-  kStoreWordPair,
-  kStoreSWord,
-  kStoreDWord
+  kStoreDoubleword
+};
+
+class MipsLabel : public Label {
+ public:
+  MipsLabel() : prev_branch_id_plus_one_(0) {}
+
+  MipsLabel(MipsLabel&& src)
+      : Label(std::move(src)), prev_branch_id_plus_one_(src.prev_branch_id_plus_one_) {}
+
+ private:
+  uint32_t prev_branch_id_plus_one_;  // To get distance from preceding branch, if any.
+
+  friend class MipsAssembler;
+  DISALLOW_COPY_AND_ASSIGN(MipsLabel);
+};
+
+// Slowpath entered when Thread::Current()->_exception is non-null.
+class MipsExceptionSlowPath {
+ public:
+  explicit MipsExceptionSlowPath(MipsManagedRegister scratch, size_t stack_adjust)
+      : scratch_(scratch), stack_adjust_(stack_adjust) {}
+
+  MipsExceptionSlowPath(MipsExceptionSlowPath&& src)
+      : scratch_(std::move(src.scratch_)),
+        stack_adjust_(std::move(src.stack_adjust_)),
+        exception_entry_(std::move(src.exception_entry_)) {}
+
+ private:
+  MipsLabel* Entry() { return &exception_entry_; }
+  const MipsManagedRegister scratch_;
+  const size_t stack_adjust_;
+  MipsLabel exception_entry_;
+
+  friend class MipsAssembler;
+  DISALLOW_COPY_AND_ASSIGN(MipsExceptionSlowPath);
 };
 
 class MipsAssembler FINAL : public Assembler {
  public:
-  MipsAssembler() {}
-  virtual ~MipsAssembler() {}
+  explicit MipsAssembler(const MipsInstructionSetFeatures* instruction_set_features = nullptr)
+      : overwriting_(false),
+        overwrite_location_(0),
+        last_position_adjustment_(0),
+        last_old_position_(0),
+        last_branch_id_(0),
+        isa_features_(instruction_set_features) {}
+
+  virtual ~MipsAssembler() {
+    for (auto& branch : branches_) {
+      CHECK(branch.IsResolved());
+    }
+  }
 
   // Emit Machine Instructions.
-  void Add(Register rd, Register rs, Register rt);
   void Addu(Register rd, Register rs, Register rt);
-  void Addi(Register rt, Register rs, uint16_t imm16);
   void Addiu(Register rt, Register rs, uint16_t imm16);
-  void Sub(Register rd, Register rs, Register rt);
   void Subu(Register rd, Register rs, Register rt);
-  void Mult(Register rs, Register rt);
-  void Multu(Register rs, Register rt);
-  void Div(Register rs, Register rt);
-  void Divu(Register rs, Register rt);
+
+  void MultR2(Register rs, Register rt);  // R2
+  void MultuR2(Register rs, Register rt);  // R2
+  void DivR2(Register rs, Register rt);  // R2
+  void DivuR2(Register rs, Register rt);  // R2
+  void MulR2(Register rd, Register rs, Register rt);  // R2
+  void DivR2(Register rd, Register rs, Register rt);  // R2
+  void ModR2(Register rd, Register rs, Register rt);  // R2
+  void DivuR2(Register rd, Register rs, Register rt);  // R2
+  void ModuR2(Register rd, Register rs, Register rt);  // R2
+  void MulR6(Register rd, Register rs, Register rt);  // R6
+  void MuhuR6(Register rd, Register rs, Register rt);  // R6
+  void DivR6(Register rd, Register rs, Register rt);  // R6
+  void ModR6(Register rd, Register rs, Register rt);  // R6
+  void DivuR6(Register rd, Register rs, Register rt);  // R6
+  void ModuR6(Register rd, Register rs, Register rt);  // R6
 
   void And(Register rd, Register rs, Register rt);
   void Andi(Register rt, Register rs, uint16_t imm16);
@@ -74,12 +131,15 @@
   void Xori(Register rt, Register rs, uint16_t imm16);
   void Nor(Register rd, Register rs, Register rt);
 
-  void Sll(Register rd, Register rs, int shamt);
-  void Srl(Register rd, Register rs, int shamt);
-  void Sra(Register rd, Register rs, int shamt);
-  void Sllv(Register rd, Register rs, Register rt);
-  void Srlv(Register rd, Register rs, Register rt);
-  void Srav(Register rd, Register rs, Register rt);
+  void Seb(Register rd, Register rt);  // R2+
+  void Seh(Register rd, Register rt);  // R2+
+
+  void Sll(Register rd, Register rt, int shamt);
+  void Srl(Register rd, Register rt, int shamt);
+  void Sra(Register rd, Register rt, int shamt);
+  void Sllv(Register rd, Register rt, Register rs);
+  void Srlv(Register rd, Register rt, Register rs);
+  void Srav(Register rd, Register rt, Register rs);
 
   void Lb(Register rt, Register rs, uint16_t imm16);
   void Lh(Register rt, Register rs, uint16_t imm16);
@@ -87,8 +147,9 @@
   void Lbu(Register rt, Register rs, uint16_t imm16);
   void Lhu(Register rt, Register rs, uint16_t imm16);
   void Lui(Register rt, uint16_t imm16);
-  void Mfhi(Register rd);
-  void Mflo(Register rd);
+  void Sync(uint32_t stype);
+  void Mfhi(Register rd);  // R2
+  void Mflo(Register rd);  // R2
 
   void Sb(Register rt, Register rs, uint16_t imm16);
   void Sh(Register rt, Register rs, uint16_t imm16);
@@ -99,81 +160,138 @@
   void Slti(Register rt, Register rs, uint16_t imm16);
   void Sltiu(Register rt, Register rs, uint16_t imm16);
 
-  void Beq(Register rt, Register rs, uint16_t imm16);
-  void Bne(Register rt, Register rs, uint16_t imm16);
-  void J(uint32_t address);
-  void Jal(uint32_t address);
-  void Jr(Register rs);
+  void B(uint16_t imm16);
+  void Beq(Register rs, Register rt, uint16_t imm16);
+  void Bne(Register rs, Register rt, uint16_t imm16);
+  void Beqz(Register rt, uint16_t imm16);
+  void Bnez(Register rt, uint16_t imm16);
+  void Bltz(Register rt, uint16_t imm16);
+  void Bgez(Register rt, uint16_t imm16);
+  void Blez(Register rt, uint16_t imm16);
+  void Bgtz(Register rt, uint16_t imm16);
+  void J(uint32_t addr26);
+  void Jal(uint32_t addr26);
+  void Jalr(Register rd, Register rs);
   void Jalr(Register rs);
+  void Jr(Register rs);
+  void Nal();
+  void Auipc(Register rs, uint16_t imm16);  // R6
+  void Addiupc(Register rs, uint32_t imm19);  // R6
+  void Bc(uint32_t imm26);  // R6
+  void Jic(Register rt, uint16_t imm16);  // R6
+  void Jialc(Register rt, uint16_t imm16);  // R6
+  void Bltc(Register rs, Register rt, uint16_t imm16);  // R6
+  void Bltzc(Register rt, uint16_t imm16);  // R6
+  void Bgtzc(Register rt, uint16_t imm16);  // R6
+  void Bgec(Register rs, Register rt, uint16_t imm16);  // R6
+  void Bgezc(Register rt, uint16_t imm16);  // R6
+  void Blezc(Register rt, uint16_t imm16);  // R6
+  void Bltuc(Register rs, Register rt, uint16_t imm16);  // R6
+  void Bgeuc(Register rs, Register rt, uint16_t imm16);  // R6
+  void Beqc(Register rs, Register rt, uint16_t imm16);  // R6
+  void Bnec(Register rs, Register rt, uint16_t imm16);  // R6
+  void Beqzc(Register rs, uint32_t imm21);  // R6
+  void Bnezc(Register rs, uint32_t imm21);  // R6
 
   void AddS(FRegister fd, FRegister fs, FRegister ft);
   void SubS(FRegister fd, FRegister fs, FRegister ft);
   void MulS(FRegister fd, FRegister fs, FRegister ft);
   void DivS(FRegister fd, FRegister fs, FRegister ft);
-  void AddD(DRegister fd, DRegister fs, DRegister ft);
-  void SubD(DRegister fd, DRegister fs, DRegister ft);
-  void MulD(DRegister fd, DRegister fs, DRegister ft);
-  void DivD(DRegister fd, DRegister fs, DRegister ft);
+  void AddD(FRegister fd, FRegister fs, FRegister ft);
+  void SubD(FRegister fd, FRegister fs, FRegister ft);
+  void MulD(FRegister fd, FRegister fs, FRegister ft);
+  void DivD(FRegister fd, FRegister fs, FRegister ft);
   void MovS(FRegister fd, FRegister fs);
-  void MovD(DRegister fd, DRegister fs);
+  void MovD(FRegister fd, FRegister fs);
+  void NegS(FRegister fd, FRegister fs);
+  void NegD(FRegister fd, FRegister fs);
+
+  void Cvtsw(FRegister fd, FRegister fs);
+  void Cvtdw(FRegister fd, FRegister fs);
+  void Cvtsd(FRegister fd, FRegister fs);
+  void Cvtds(FRegister fd, FRegister fs);
 
   void Mfc1(Register rt, FRegister fs);
-  void Mtc1(FRegister ft, Register rs);
+  void Mtc1(Register rt, FRegister fs);
+  void Mfhc1(Register rt, FRegister fs);
+  void Mthc1(Register rt, FRegister fs);
   void Lwc1(FRegister ft, Register rs, uint16_t imm16);
-  void Ldc1(DRegister ft, Register rs, uint16_t imm16);
+  void Ldc1(FRegister ft, Register rs, uint16_t imm16);
   void Swc1(FRegister ft, Register rs, uint16_t imm16);
-  void Sdc1(DRegister ft, Register rs, uint16_t imm16);
+  void Sdc1(FRegister ft, Register rs, uint16_t imm16);
 
   void Break();
   void Nop();
-  void Move(Register rt, Register rs);
-  void Clear(Register rt);
-  void Not(Register rt, Register rs);
-  void Mul(Register rd, Register rs, Register rt);
-  void Div(Register rd, Register rs, Register rt);
-  void Rem(Register rd, Register rs, Register rt);
+  void Move(Register rd, Register rs);
+  void Clear(Register rd);
+  void Not(Register rd, Register rs);
 
-  void AddConstant(Register rt, Register rs, int32_t value);
-  void LoadImmediate(Register rt, int32_t value);
+  // Higher level composite instructions.
+  void LoadConst32(Register rd, int32_t value);
+  void LoadConst64(Register reg_hi, Register reg_lo, int64_t value);
+  void LoadDConst64(FRegister rd, int64_t value, Register temp);
+  void LoadSConst32(FRegister r, int32_t value, Register temp);
+  void StoreConst32ToOffset(int32_t value, Register base, int32_t offset, Register temp);
+  void StoreConst64ToOffset(int64_t value, Register base, int32_t offset, Register temp);
+  void Addiu32(Register rt, Register rs, int32_t value, Register rtmp = AT);
+
+  // These will generate R2 branches or R6 branches as appropriate.
+  void Bind(MipsLabel* label);
+  void B(MipsLabel* label);
+  void Jalr(MipsLabel* label, Register indirect_reg);
+  void Beq(Register rs, Register rt, MipsLabel* label);
+  void Bne(Register rs, Register rt, MipsLabel* label);
+  void Beqz(Register rt, MipsLabel* label);
+  void Bnez(Register rt, MipsLabel* label);
+  void Bltz(Register rt, MipsLabel* label);
+  void Bgez(Register rt, MipsLabel* label);
+  void Blez(Register rt, MipsLabel* label);
+  void Bgtz(Register rt, MipsLabel* label);
+  void Blt(Register rs, Register rt, MipsLabel* label);
+  void Bge(Register rs, Register rt, MipsLabel* label);
+  void Bltu(Register rs, Register rt, MipsLabel* label);
+  void Bgeu(Register rs, Register rt, MipsLabel* label);
 
   void EmitLoad(ManagedRegister m_dst, Register src_register, int32_t src_offset, size_t size);
   void LoadFromOffset(LoadOperandType type, Register reg, Register base, int32_t offset);
   void LoadSFromOffset(FRegister reg, Register base, int32_t offset);
-  void LoadDFromOffset(DRegister reg, Register base, int32_t offset);
+  void LoadDFromOffset(FRegister reg, Register base, int32_t offset);
   void StoreToOffset(StoreOperandType type, Register reg, Register base, int32_t offset);
   void StoreSToOffset(FRegister reg, Register base, int32_t offset);
-  void StoreDToOffset(DRegister reg, Register base, int32_t offset);
+  void StoreDToOffset(FRegister reg, Register base, int32_t offset);
 
   // Emit data (e.g. encoded instruction or immediate) to the instruction stream.
-  void Emit(int32_t value);
-  void EmitBranch(Register rt, Register rs, Label* label, bool equal);
-  void EmitJump(Label* label, bool link);
-  void Bind(Label* label, bool is_jump);
+  void Emit(uint32_t value);
+
+  // Push/pop composite routines.
+  void Push(Register rs);
+  void Pop(Register rd);
+  void PopAndReturn(Register rd, Register rt);
 
   void Bind(Label* label) OVERRIDE {
-    Bind(label, false);
+    Bind(down_cast<MipsLabel*>(label));
   }
-  void Jump(Label* label) OVERRIDE {
-    EmitJump(label, false);
+  void Jump(Label* label ATTRIBUTE_UNUSED) OVERRIDE {
+    UNIMPLEMENTED(FATAL) << "Do not use Jump for MIPS";
   }
 
   //
-  // Overridden common assembler high-level functionality
+  // Overridden common assembler high-level functionality.
   //
 
-  // Emit code that will create an activation on the stack
+  // Emit code that will create an activation on the stack.
   void BuildFrame(size_t frame_size, ManagedRegister method_reg,
                   const std::vector<ManagedRegister>& callee_save_regs,
                   const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
 
-  // Emit code that will remove an activation from the stack
+  // Emit code that will remove an activation from the stack.
   void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs)
       OVERRIDE;
 
   void IncreaseFrameSize(size_t adjust) OVERRIDE;
   void DecreaseFrameSize(size_t adjust) OVERRIDE;
 
-  // Store routines
+  // Store routines.
   void Store(FrameOffset offs, ManagedRegister msrc, size_t size) OVERRIDE;
   void StoreRef(FrameOffset dest, ManagedRegister msrc) OVERRIDE;
   void StoreRawPtr(FrameOffset dest, ManagedRegister msrc) OVERRIDE;
@@ -191,7 +309,7 @@
   void StoreSpanning(FrameOffset dest, ManagedRegister msrc, FrameOffset in_off,
                      ManagedRegister mscratch) OVERRIDE;
 
-  // Load routines
+  // Load routines.
   void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE;
 
   void LoadFromThread32(ManagedRegister mdest, ThreadOffset<4> src, size_t size) OVERRIDE;
@@ -205,7 +323,7 @@
 
   void LoadRawPtrFromThread32(ManagedRegister mdest, ThreadOffset<4> offs) OVERRIDE;
 
-  // Copying routines
+  // Copying routines.
   void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE;
 
   void CopyRawPtrFromThread32(FrameOffset fr_offs, ThreadOffset<4> thr_offs,
@@ -235,13 +353,13 @@
 
   void MemoryBarrier(ManagedRegister) OVERRIDE;
 
-  // Sign extension
+  // Sign extension.
   void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
 
-  // Zero extension
+  // Zero extension.
   void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
 
-  // Exploit fast access in managed code to Thread::Current()
+  // Exploit fast access in managed code to Thread::Current().
   void GetCurrentThread(ManagedRegister tr) OVERRIDE;
   void GetCurrentThread(FrameOffset dest_offset, ManagedRegister mscratch) OVERRIDE;
 
@@ -257,7 +375,7 @@
   void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset,
                               ManagedRegister mscratch, bool null_allowed) OVERRIDE;
 
-  // src holds a handle scope entry (Object**) load this into dst
+  // src holds a handle scope entry (Object**) load this into dst.
   void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
 
   // Heap::VerifyObject on src. In some cases (such as a reference to this) we
@@ -265,7 +383,7 @@
   void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
   void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
 
-  // Call to address held at [base+offset]
+  // Call to address held at [base+offset].
   void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE;
   void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE;
   void CallFromThread32(ThreadOffset<4> offset, ManagedRegister mscratch) OVERRIDE;
@@ -274,43 +392,253 @@
   // and branch to a ExceptionSlowPath if it is.
   void ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) OVERRIDE;
 
+  // Emit slow paths queued during assembly and promote short branches to long if needed.
+  void FinalizeCode() OVERRIDE;
+
+  // Emit branches and finalize all instructions.
+  void FinalizeInstructions(const MemoryRegion& region);
+
+  // Returns the (always-)current location of a label (can be used in class CodeGeneratorMIPS,
+  // must be used instead of MipsLabel::GetPosition()).
+  uint32_t GetLabelLocation(MipsLabel* label) const;
+
+  // Get the final position of a label after local fixup based on the old position
+  // recorded before FinalizeCode().
+  uint32_t GetAdjustedPosition(uint32_t old_position);
+
+  enum BranchCondition {
+    kCondLT,
+    kCondGE,
+    kCondLE,
+    kCondGT,
+    kCondLTZ,
+    kCondGEZ,
+    kCondLEZ,
+    kCondGTZ,
+    kCondEQ,
+    kCondNE,
+    kCondEQZ,
+    kCondNEZ,
+    kCondLTU,
+    kCondGEU,
+    kUncond,
+  };
+  friend std::ostream& operator<<(std::ostream& os, const BranchCondition& rhs);
+
  private:
+  class Branch {
+   public:
+    enum Type {
+      // R2 short branches.
+      kUncondBranch,
+      kCondBranch,
+      kCall,
+      // R2 long branches.
+      kLongUncondBranch,
+      kLongCondBranch,
+      kLongCall,
+      // R6 short branches.
+      kR6UncondBranch,
+      kR6CondBranch,
+      kR6Call,
+      // R6 long branches.
+      kR6LongUncondBranch,
+      kR6LongCondBranch,
+      kR6LongCall,
+    };
+    // Bit sizes of offsets defined as enums to minimize chance of typos.
+    enum OffsetBits {
+      kOffset16 = 16,
+      kOffset18 = 18,
+      kOffset21 = 21,
+      kOffset23 = 23,
+      kOffset28 = 28,
+      kOffset32 = 32,
+    };
+
+    static constexpr uint32_t kUnresolved = 0xffffffff;  // Unresolved target_
+    static constexpr int32_t kMaxBranchLength = 32;
+    static constexpr int32_t kMaxBranchSize = kMaxBranchLength * sizeof(uint32_t);
+
+    struct BranchInfo {
+      // Branch length as a number of 4-byte-long instructions.
+      uint32_t length;
+      // Ordinal number (0-based) of the first (or the only) instruction that contains the branch's
+      // PC-relative offset (or its most significant 16-bit half, which goes first).
+      uint32_t instr_offset;
+      // Different MIPS instructions with PC-relative offsets apply said offsets to slightly
+      // different origins, e.g. to PC or PC+4. Encode the origin distance (as a number of 4-byte
+      // instructions) from the instruction containing the offset.
+      uint32_t pc_org;
+      // How large (in bits) a PC-relative offset can be for a given type of branch (kR6CondBranch
+      // is an exception: use kOffset23 for beqzc/bnezc).
+      OffsetBits offset_size;
+      // Some MIPS instructions with PC-relative offsets shift the offset by 2. Encode the shift
+      // count.
+      int offset_shift;
+    };
+    static const BranchInfo branch_info_[/* Type */];
+
+    // Unconditional branch.
+    Branch(bool is_r6, uint32_t location, uint32_t target);
+    // Conditional branch.
+    Branch(bool is_r6,
+           uint32_t location,
+           uint32_t target,
+           BranchCondition condition,
+           Register lhs_reg,
+           Register rhs_reg = ZERO);
+    // Call (branch and link) that stores the target address in a given register (i.e. T9).
+    Branch(bool is_r6, uint32_t location, uint32_t target, Register indirect_reg);
+
+    // Some conditional branches with lhs = rhs are effectively NOPs, while some
+    // others are effectively unconditional. MIPSR6 conditional branches require lhs != rhs.
+    // So, we need a way to identify such branches in order to emit no instructions for them
+    // or change them to unconditional.
+    static bool IsNop(BranchCondition condition, Register lhs, Register rhs);
+    static bool IsUncond(BranchCondition condition, Register lhs, Register rhs);
+
+    static BranchCondition OppositeCondition(BranchCondition cond);
+
+    Type GetType() const;
+    BranchCondition GetCondition() const;
+    Register GetLeftRegister() const;
+    Register GetRightRegister() const;
+    uint32_t GetTarget() const;
+    uint32_t GetLocation() const;
+    uint32_t GetOldLocation() const;
+    uint32_t GetLength() const;
+    uint32_t GetOldLength() const;
+    uint32_t GetSize() const;
+    uint32_t GetOldSize() const;
+    uint32_t GetEndLocation() const;
+    uint32_t GetOldEndLocation() const;
+    bool IsLong() const;
+    bool IsResolved() const;
+
+    // Returns the bit size of the signed offset that the branch instruction can handle.
+    OffsetBits GetOffsetSize() const;
+
+    // Calculates the distance between two byte locations in the assembler buffer and
+    // returns the number of bits needed to represent the distance as a signed integer.
+    //
+    // Branch instructions have signed offsets of 16, 19 (addiupc), 21 (beqzc/bnezc),
+    // and 26 (bc) bits, which are additionally shifted left 2 positions at run time.
+    //
+    // Composite branches (made of several instructions) with longer reach have 32-bit
+    // offsets encoded as 2 16-bit "halves" in two instructions (high half goes first).
+    // The composite branches cover the range of PC + +/-2GB.
+    //
+    // The returned values are therefore: 18, 21, 23, 28 and 32. There's also a special
+    // case with the addiu instruction and a 16 bit offset.
+    static OffsetBits GetOffsetSizeNeeded(uint32_t location, uint32_t target);
+
+    // Resolve a branch when the target is known.
+    void Resolve(uint32_t target);
+
+    // Relocate a branch by a given delta if needed due to expansion of this or another
+    // branch at a given location by this delta (just changes location_ and target_).
+    void Relocate(uint32_t expand_location, uint32_t delta);
+
+    // If the branch is short, changes its type to long.
+    void PromoteToLong();
+
+    // If necessary, updates the type by promoting a short branch to a long branch
+    // based on the branch location and target. Returns the amount (in bytes) by
+    // which the branch size has increased.
+    // max_short_distance caps the maximum distance between location_ and target_
+    // that is allowed for short branches. This is for debugging/testing purposes.
+    // max_short_distance = 0 forces all short branches to become long.
+    // Use the implicit default argument when not debugging/testing.
+    uint32_t PromoteIfNeeded(uint32_t max_short_distance = std::numeric_limits<uint32_t>::max());
+
+    // Returns the location of the instruction(s) containing the offset.
+    uint32_t GetOffsetLocation() const;
+
+    // Calculates and returns the offset ready for encoding in the branch instruction(s).
+    uint32_t GetOffset() const;
+
+   private:
+    // Completes branch construction by determining and recording its type.
+    void InitializeType(bool is_call, bool is_r6);
+    // Helper for the above.
+    void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type);
+
+    uint32_t old_location_;          // Offset into assembler buffer in bytes.
+    uint32_t location_;              // Offset into assembler buffer in bytes.
+    uint32_t target_;                // Offset into assembler buffer in bytes.
+
+    uint32_t lhs_reg_ : 5;           // Left-hand side register in conditional branches or
+                                     // indirect call register.
+    uint32_t rhs_reg_ : 5;           // Right-hand side register in conditional branches.
+    BranchCondition condition_ : 5;  // Condition for conditional branches.
+
+    Type type_ : 5;                  // Current type of the branch.
+    Type old_type_ : 5;              // Initial type of the branch.
+  };
+  friend std::ostream& operator<<(std::ostream& os, const Branch::Type& rhs);
+  friend std::ostream& operator<<(std::ostream& os, const Branch::OffsetBits& rhs);
+
   void EmitR(int opcode, Register rs, Register rt, Register rd, int shamt, int funct);
   void EmitI(int opcode, Register rs, Register rt, uint16_t imm);
-  void EmitJ(int opcode, int address);
+  void EmitI21(int opcode, Register rs, uint32_t imm21);
+  void EmitI26(int opcode, uint32_t imm26);
   void EmitFR(int opcode, int fmt, FRegister ft, FRegister fs, FRegister fd, int funct);
   void EmitFI(int opcode, int fmt, FRegister rt, uint16_t imm);
+  void EmitBcond(BranchCondition cond, Register rs, Register rt, uint16_t imm16);
+  void EmitBcondc(BranchCondition cond, Register rs, Register rt, uint32_t imm16_21);  // R6
 
-  int32_t EncodeBranchOffset(int offset, int32_t inst, bool is_jump);
-  int DecodeBranchOffset(int32_t inst, bool is_jump);
+  void Buncond(MipsLabel* label);
+  void Bcond(MipsLabel* label, BranchCondition condition, Register lhs, Register rhs = ZERO);
+  void Call(MipsLabel* label, Register indirect_reg);
+  void FinalizeLabeledBranch(MipsLabel* label);
 
-  FRegister ConvertDRegToFReg(DRegister reg) {
-    return static_cast<FRegister>(reg * 2);
+  Branch* GetBranch(uint32_t branch_id);
+  const Branch* GetBranch(uint32_t branch_id) const;
+
+  void PromoteBranches();
+  void EmitBranch(Branch* branch);
+  void EmitBranches();
+
+  // Emits exception block.
+  void EmitExceptionPoll(MipsExceptionSlowPath* exception);
+
+  bool IsR6() const {
+    if (isa_features_ != nullptr) {
+      return isa_features_->IsR6();
+    } else {
+      return false;
+    }
   }
-  Register ConvertDRegToReg(DRegister reg) {
-    return static_cast<Register>(reg * 2);
+
+  bool Is32BitFPU() const {
+    if (isa_features_ != nullptr) {
+      return isa_features_->Is32BitFloatingPoint();
+    } else {
+      return true;
+    }
   }
-  Register ConvertFRegToReg(FRegister reg) {
-    return static_cast<Register>(reg);
-  }
-  FRegister ConvertRegToFReg(Register reg) {
-    return static_cast<FRegister>(reg);
-  }
+
+  // List of exception blocks to generate at the end of the code cache.
+  std::vector<MipsExceptionSlowPath> exception_blocks_;
+
+  std::vector<Branch> branches_;
+
+  // Whether appending instructions at the end of the buffer or overwriting the existing ones.
+  bool overwriting_;
+  // The current overwrite location.
+  uint32_t overwrite_location_;
+
+  // Data for AdjustedPosition(), see the description there.
+  uint32_t last_position_adjustment_;
+  uint32_t last_old_position_;
+  uint32_t last_branch_id_;
+
+  const MipsInstructionSetFeatures* isa_features_;
 
   DISALLOW_COPY_AND_ASSIGN(MipsAssembler);
 };
 
-// Slowpath entered when Thread::Current()->_exception is non-null
-class MipsExceptionSlowPath FINAL : public SlowPath {
- public:
-  MipsExceptionSlowPath(MipsManagedRegister scratch, size_t stack_adjust)
-      : scratch_(scratch), stack_adjust_(stack_adjust) {}
-  virtual void Emit(Assembler *sp_asm) OVERRIDE;
- private:
-  const MipsManagedRegister scratch_;
-  const size_t stack_adjust_;
-};
-
 }  // namespace mips
 }  // namespace art
 
diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc
new file mode 100644
index 0000000..063d8bd
--- /dev/null
+++ b/compiler/utils/mips/assembler_mips_test.cc
@@ -0,0 +1,1324 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "assembler_mips.h"
+
+#include <map>
+
+#include "base/stl_util.h"
+#include "utils/assembler_test.h"
+
+namespace art {
+
+struct MIPSCpuRegisterCompare {
+  bool operator()(const mips::Register& a, const mips::Register& b) const {
+    return a < b;
+  }
+};
+
+class AssemblerMIPSTest : public AssemblerTest<mips::MipsAssembler,
+                                               mips::Register,
+                                               mips::FRegister,
+                                               uint32_t> {
+ public:
+  typedef AssemblerTest<mips::MipsAssembler, mips::Register, mips::FRegister, uint32_t> Base;
+
+ protected:
+  // Get the typically used name for this architecture, e.g., aarch64, x86-64, ...
+  std::string GetArchitectureString() OVERRIDE {
+    return "mips";
+  }
+
+  std::string GetAssemblerParameters() OVERRIDE {
+    return " --no-warn -32 -march=mips32r2";
+  }
+
+  std::string GetDisassembleParameters() OVERRIDE {
+    return " -D -bbinary -mmips:isa32r2";
+  }
+
+  void SetUpHelpers() OVERRIDE {
+    if (registers_.size() == 0) {
+      registers_.push_back(new mips::Register(mips::ZERO));
+      registers_.push_back(new mips::Register(mips::AT));
+      registers_.push_back(new mips::Register(mips::V0));
+      registers_.push_back(new mips::Register(mips::V1));
+      registers_.push_back(new mips::Register(mips::A0));
+      registers_.push_back(new mips::Register(mips::A1));
+      registers_.push_back(new mips::Register(mips::A2));
+      registers_.push_back(new mips::Register(mips::A3));
+      registers_.push_back(new mips::Register(mips::T0));
+      registers_.push_back(new mips::Register(mips::T1));
+      registers_.push_back(new mips::Register(mips::T2));
+      registers_.push_back(new mips::Register(mips::T3));
+      registers_.push_back(new mips::Register(mips::T4));
+      registers_.push_back(new mips::Register(mips::T5));
+      registers_.push_back(new mips::Register(mips::T6));
+      registers_.push_back(new mips::Register(mips::T7));
+      registers_.push_back(new mips::Register(mips::S0));
+      registers_.push_back(new mips::Register(mips::S1));
+      registers_.push_back(new mips::Register(mips::S2));
+      registers_.push_back(new mips::Register(mips::S3));
+      registers_.push_back(new mips::Register(mips::S4));
+      registers_.push_back(new mips::Register(mips::S5));
+      registers_.push_back(new mips::Register(mips::S6));
+      registers_.push_back(new mips::Register(mips::S7));
+      registers_.push_back(new mips::Register(mips::T8));
+      registers_.push_back(new mips::Register(mips::T9));
+      registers_.push_back(new mips::Register(mips::K0));
+      registers_.push_back(new mips::Register(mips::K1));
+      registers_.push_back(new mips::Register(mips::GP));
+      registers_.push_back(new mips::Register(mips::SP));
+      registers_.push_back(new mips::Register(mips::FP));
+      registers_.push_back(new mips::Register(mips::RA));
+
+      secondary_register_names_.emplace(mips::Register(mips::ZERO), "zero");
+      secondary_register_names_.emplace(mips::Register(mips::AT), "at");
+      secondary_register_names_.emplace(mips::Register(mips::V0), "v0");
+      secondary_register_names_.emplace(mips::Register(mips::V1), "v1");
+      secondary_register_names_.emplace(mips::Register(mips::A0), "a0");
+      secondary_register_names_.emplace(mips::Register(mips::A1), "a1");
+      secondary_register_names_.emplace(mips::Register(mips::A2), "a2");
+      secondary_register_names_.emplace(mips::Register(mips::A3), "a3");
+      secondary_register_names_.emplace(mips::Register(mips::T0), "t0");
+      secondary_register_names_.emplace(mips::Register(mips::T1), "t1");
+      secondary_register_names_.emplace(mips::Register(mips::T2), "t2");
+      secondary_register_names_.emplace(mips::Register(mips::T3), "t3");
+      secondary_register_names_.emplace(mips::Register(mips::T4), "t4");
+      secondary_register_names_.emplace(mips::Register(mips::T5), "t5");
+      secondary_register_names_.emplace(mips::Register(mips::T6), "t6");
+      secondary_register_names_.emplace(mips::Register(mips::T7), "t7");
+      secondary_register_names_.emplace(mips::Register(mips::S0), "s0");
+      secondary_register_names_.emplace(mips::Register(mips::S1), "s1");
+      secondary_register_names_.emplace(mips::Register(mips::S2), "s2");
+      secondary_register_names_.emplace(mips::Register(mips::S3), "s3");
+      secondary_register_names_.emplace(mips::Register(mips::S4), "s4");
+      secondary_register_names_.emplace(mips::Register(mips::S5), "s5");
+      secondary_register_names_.emplace(mips::Register(mips::S6), "s6");
+      secondary_register_names_.emplace(mips::Register(mips::S7), "s7");
+      secondary_register_names_.emplace(mips::Register(mips::T8), "t8");
+      secondary_register_names_.emplace(mips::Register(mips::T9), "t9");
+      secondary_register_names_.emplace(mips::Register(mips::K0), "k0");
+      secondary_register_names_.emplace(mips::Register(mips::K1), "k1");
+      secondary_register_names_.emplace(mips::Register(mips::GP), "gp");
+      secondary_register_names_.emplace(mips::Register(mips::SP), "sp");
+      secondary_register_names_.emplace(mips::Register(mips::FP), "fp");
+      secondary_register_names_.emplace(mips::Register(mips::RA), "ra");
+
+      fp_registers_.push_back(new mips::FRegister(mips::F0));
+      fp_registers_.push_back(new mips::FRegister(mips::F1));
+      fp_registers_.push_back(new mips::FRegister(mips::F2));
+      fp_registers_.push_back(new mips::FRegister(mips::F3));
+      fp_registers_.push_back(new mips::FRegister(mips::F4));
+      fp_registers_.push_back(new mips::FRegister(mips::F5));
+      fp_registers_.push_back(new mips::FRegister(mips::F6));
+      fp_registers_.push_back(new mips::FRegister(mips::F7));
+      fp_registers_.push_back(new mips::FRegister(mips::F8));
+      fp_registers_.push_back(new mips::FRegister(mips::F9));
+      fp_registers_.push_back(new mips::FRegister(mips::F10));
+      fp_registers_.push_back(new mips::FRegister(mips::F11));
+      fp_registers_.push_back(new mips::FRegister(mips::F12));
+      fp_registers_.push_back(new mips::FRegister(mips::F13));
+      fp_registers_.push_back(new mips::FRegister(mips::F14));
+      fp_registers_.push_back(new mips::FRegister(mips::F15));
+      fp_registers_.push_back(new mips::FRegister(mips::F16));
+      fp_registers_.push_back(new mips::FRegister(mips::F17));
+      fp_registers_.push_back(new mips::FRegister(mips::F18));
+      fp_registers_.push_back(new mips::FRegister(mips::F19));
+      fp_registers_.push_back(new mips::FRegister(mips::F20));
+      fp_registers_.push_back(new mips::FRegister(mips::F21));
+      fp_registers_.push_back(new mips::FRegister(mips::F22));
+      fp_registers_.push_back(new mips::FRegister(mips::F23));
+      fp_registers_.push_back(new mips::FRegister(mips::F24));
+      fp_registers_.push_back(new mips::FRegister(mips::F25));
+      fp_registers_.push_back(new mips::FRegister(mips::F26));
+      fp_registers_.push_back(new mips::FRegister(mips::F27));
+      fp_registers_.push_back(new mips::FRegister(mips::F28));
+      fp_registers_.push_back(new mips::FRegister(mips::F29));
+      fp_registers_.push_back(new mips::FRegister(mips::F30));
+      fp_registers_.push_back(new mips::FRegister(mips::F31));
+    }
+  }
+
+  void TearDown() OVERRIDE {
+    AssemblerTest::TearDown();
+    STLDeleteElements(&registers_);
+    STLDeleteElements(&fp_registers_);
+  }
+
+  std::vector<mips::Register*> GetRegisters() OVERRIDE {
+    return registers_;
+  }
+
+  std::vector<mips::FRegister*> GetFPRegisters() OVERRIDE {
+    return fp_registers_;
+  }
+
+  uint32_t CreateImmediate(int64_t imm_value) OVERRIDE {
+    return imm_value;
+  }
+
+  std::string GetSecondaryRegisterName(const mips::Register& reg) OVERRIDE {
+    CHECK(secondary_register_names_.find(reg) != secondary_register_names_.end());
+    return secondary_register_names_[reg];
+  }
+
+  std::string RepeatInsn(size_t count, const std::string& insn) {
+    std::string result;
+    for (; count != 0u; --count) {
+      result += insn;
+    }
+    return result;
+  }
+
+ private:
+  std::vector<mips::Register*> registers_;
+  std::map<mips::Register, std::string, MIPSCpuRegisterCompare> secondary_register_names_;
+
+  std::vector<mips::FRegister*> fp_registers_;
+};
+
+
+TEST_F(AssemblerMIPSTest, Toolchain) {
+  EXPECT_TRUE(CheckTools());
+}
+
+#define __ GetAssembler()->
+
+TEST_F(AssemblerMIPSTest, Addu) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Addu, "addu ${reg1}, ${reg2}, ${reg3}"), "Addu");
+}
+
+TEST_F(AssemblerMIPSTest, Addiu) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Addiu, -16, "addiu ${reg1}, ${reg2}, {imm}"), "Addiu");
+}
+
+TEST_F(AssemblerMIPSTest, Subu) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Subu, "subu ${reg1}, ${reg2}, ${reg3}"), "Subu");
+}
+
+TEST_F(AssemblerMIPSTest, MultR2) {
+  DriverStr(RepeatRR(&mips::MipsAssembler::MultR2, "mult ${reg1}, ${reg2}"), "MultR2");
+}
+
+TEST_F(AssemblerMIPSTest, MultuR2) {
+  DriverStr(RepeatRR(&mips::MipsAssembler::MultuR2, "multu ${reg1}, ${reg2}"), "MultuR2");
+}
+
+TEST_F(AssemblerMIPSTest, DivR2Basic) {
+  DriverStr(RepeatRR(&mips::MipsAssembler::DivR2, "div $zero, ${reg1}, ${reg2}"), "DivR2Basic");
+}
+
+TEST_F(AssemblerMIPSTest, DivuR2Basic) {
+  DriverStr(RepeatRR(&mips::MipsAssembler::DivuR2, "divu $zero, ${reg1}, ${reg2}"), "DivuR2Basic");
+}
+
+TEST_F(AssemblerMIPSTest, MulR2) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::MulR2, "mul ${reg1}, ${reg2}, ${reg3}"), "MulR2");
+}
+
+TEST_F(AssemblerMIPSTest, DivR2) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::DivR2, "div $zero, ${reg2}, ${reg3}\nmflo ${reg1}"),
+            "DivR2");
+}
+
+TEST_F(AssemblerMIPSTest, ModR2) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::ModR2, "div $zero, ${reg2}, ${reg3}\nmfhi ${reg1}"),
+            "ModR2");
+}
+
+TEST_F(AssemblerMIPSTest, DivuR2) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::DivuR2, "divu $zero, ${reg2}, ${reg3}\nmflo ${reg1}"),
+            "DivuR2");
+}
+
+TEST_F(AssemblerMIPSTest, ModuR2) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::ModuR2, "divu $zero, ${reg2}, ${reg3}\nmfhi ${reg1}"),
+            "ModuR2");
+}
+
+TEST_F(AssemblerMIPSTest, And) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::And, "and ${reg1}, ${reg2}, ${reg3}"), "And");
+}
+
+TEST_F(AssemblerMIPSTest, Andi) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Andi, 16, "andi ${reg1}, ${reg2}, {imm}"), "Andi");
+}
+
+TEST_F(AssemblerMIPSTest, Or) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Or, "or ${reg1}, ${reg2}, ${reg3}"), "Or");
+}
+
+TEST_F(AssemblerMIPSTest, Ori) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Ori, 16, "ori ${reg1}, ${reg2}, {imm}"), "Ori");
+}
+
+TEST_F(AssemblerMIPSTest, Xor) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Xor, "xor ${reg1}, ${reg2}, ${reg3}"), "Xor");
+}
+
+TEST_F(AssemblerMIPSTest, Xori) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Xori, 16, "xori ${reg1}, ${reg2}, {imm}"), "Xori");
+}
+
+TEST_F(AssemblerMIPSTest, Nor) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Nor, "nor ${reg1}, ${reg2}, ${reg3}"), "Nor");
+}
+
+TEST_F(AssemblerMIPSTest, Seb) {
+  DriverStr(RepeatRR(&mips::MipsAssembler::Seb, "seb ${reg1}, ${reg2}"), "Seb");
+}
+
+TEST_F(AssemblerMIPSTest, Seh) {
+  DriverStr(RepeatRR(&mips::MipsAssembler::Seh, "seh ${reg1}, ${reg2}"), "Seh");
+}
+
+TEST_F(AssemblerMIPSTest, Sll) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Sll, 5, "sll ${reg1}, ${reg2}, {imm}"), "Sll");
+}
+
+TEST_F(AssemblerMIPSTest, Srl) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Srl, 5, "srl ${reg1}, ${reg2}, {imm}"), "Srl");
+}
+
+TEST_F(AssemblerMIPSTest, Sra) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Sra, 5, "sra ${reg1}, ${reg2}, {imm}"), "Sra");
+}
+
+TEST_F(AssemblerMIPSTest, Sllv) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Sllv, "sllv ${reg1}, ${reg2}, ${reg3}"), "Sllv");
+}
+
+TEST_F(AssemblerMIPSTest, Srlv) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Srlv, "srlv ${reg1}, ${reg2}, ${reg3}"), "Srlv");
+}
+
+TEST_F(AssemblerMIPSTest, Srav) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Srav, "srav ${reg1}, ${reg2}, ${reg3}"), "Srav");
+}
+
+TEST_F(AssemblerMIPSTest, Lb) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Lb, -16, "lb ${reg1}, {imm}(${reg2})"), "Lb");
+}
+
+TEST_F(AssemblerMIPSTest, Lh) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Lh, -16, "lh ${reg1}, {imm}(${reg2})"), "Lh");
+}
+
+TEST_F(AssemblerMIPSTest, Lw) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Lw, -16, "lw ${reg1}, {imm}(${reg2})"), "Lw");
+}
+
+TEST_F(AssemblerMIPSTest, Lbu) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Lbu, -16, "lbu ${reg1}, {imm}(${reg2})"), "Lbu");
+}
+
+TEST_F(AssemblerMIPSTest, Lhu) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Lhu, -16, "lhu ${reg1}, {imm}(${reg2})"), "Lhu");
+}
+
+TEST_F(AssemblerMIPSTest, Lui) {
+  DriverStr(RepeatRIb(&mips::MipsAssembler::Lui, 16, "lui ${reg}, {imm}"), "Lui");
+}
+
+TEST_F(AssemblerMIPSTest, Mfhi) {
+  DriverStr(RepeatR(&mips::MipsAssembler::Mfhi, "mfhi ${reg}"), "Mfhi");
+}
+
+TEST_F(AssemblerMIPSTest, Mflo) {
+  DriverStr(RepeatR(&mips::MipsAssembler::Mflo, "mflo ${reg}"), "Mflo");
+}
+
+TEST_F(AssemblerMIPSTest, Sb) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Sb, -16, "sb ${reg1}, {imm}(${reg2})"), "Sb");
+}
+
+TEST_F(AssemblerMIPSTest, Sh) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Sh, -16, "sh ${reg1}, {imm}(${reg2})"), "Sh");
+}
+
+TEST_F(AssemblerMIPSTest, Sw) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Sw, -16, "sw ${reg1}, {imm}(${reg2})"), "Sw");
+}
+
+TEST_F(AssemblerMIPSTest, Slt) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Slt, "slt ${reg1}, ${reg2}, ${reg3}"), "Slt");
+}
+
+TEST_F(AssemblerMIPSTest, Sltu) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Sltu, "sltu ${reg1}, ${reg2}, ${reg3}"), "Sltu");
+}
+
+TEST_F(AssemblerMIPSTest, Slti) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Slti, -16, "slti ${reg1}, ${reg2}, {imm}"), "Slti");
+}
+
+TEST_F(AssemblerMIPSTest, Sltiu) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Sltiu, -16, "sltiu ${reg1}, ${reg2}, {imm}"), "Sltiu");
+}
+
+TEST_F(AssemblerMIPSTest, AddS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::AddS, "add.s ${reg1}, ${reg2}, ${reg3}"), "AddS");
+}
+
+TEST_F(AssemblerMIPSTest, AddD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::AddD, "add.d ${reg1}, ${reg2}, ${reg3}"), "AddD");
+}
+
+TEST_F(AssemblerMIPSTest, SubS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::SubS, "sub.s ${reg1}, ${reg2}, ${reg3}"), "SubS");
+}
+
+TEST_F(AssemblerMIPSTest, SubD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::SubD, "sub.d ${reg1}, ${reg2}, ${reg3}"), "SubD");
+}
+
+TEST_F(AssemblerMIPSTest, MulS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::MulS, "mul.s ${reg1}, ${reg2}, ${reg3}"), "MulS");
+}
+
+TEST_F(AssemblerMIPSTest, MulD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::MulD, "mul.d ${reg1}, ${reg2}, ${reg3}"), "MulD");
+}
+
+TEST_F(AssemblerMIPSTest, DivS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::DivS, "div.s ${reg1}, ${reg2}, ${reg3}"), "DivS");
+}
+
+TEST_F(AssemblerMIPSTest, DivD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::DivD, "div.d ${reg1}, ${reg2}, ${reg3}"), "DivD");
+}
+
+TEST_F(AssemblerMIPSTest, MovS) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::MovS, "mov.s ${reg1}, ${reg2}"), "MovS");
+}
+
+TEST_F(AssemblerMIPSTest, MovD) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::MovD, "mov.d ${reg1}, ${reg2}"), "MovD");
+}
+
+TEST_F(AssemblerMIPSTest, NegS) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::NegS, "neg.s ${reg1}, ${reg2}"), "NegS");
+}
+
+TEST_F(AssemblerMIPSTest, NegD) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::NegD, "neg.d ${reg1}, ${reg2}"), "NegD");
+}
+
+TEST_F(AssemblerMIPSTest, CvtSW) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::Cvtsw, "cvt.s.w ${reg1}, ${reg2}"), "CvtSW");
+}
+
+TEST_F(AssemblerMIPSTest, CvtDW) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::Cvtdw, "cvt.d.w ${reg1}, ${reg2}"), "CvtDW");
+}
+
+TEST_F(AssemblerMIPSTest, CvtSD) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::Cvtsd, "cvt.s.d ${reg1}, ${reg2}"), "CvtSD");
+}
+
+TEST_F(AssemblerMIPSTest, CvtDS) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::Cvtds, "cvt.d.s ${reg1}, ${reg2}"), "CvtDS");
+}
+
+TEST_F(AssemblerMIPSTest, Mfc1) {
+  DriverStr(RepeatRF(&mips::MipsAssembler::Mfc1, "mfc1 ${reg1}, ${reg2}"), "Mfc1");
+}
+
+TEST_F(AssemblerMIPSTest, Mtc1) {
+  DriverStr(RepeatRF(&mips::MipsAssembler::Mtc1, "mtc1 ${reg1}, ${reg2}"), "Mtc1");
+}
+
+TEST_F(AssemblerMIPSTest, Mfhc1) {
+  DriverStr(RepeatRF(&mips::MipsAssembler::Mfhc1, "mfhc1 ${reg1}, ${reg2}"), "Mfhc1");
+}
+
+TEST_F(AssemblerMIPSTest, Mthc1) {
+  DriverStr(RepeatRF(&mips::MipsAssembler::Mthc1, "mthc1 ${reg1}, ${reg2}"), "Mthc1");
+}
+
+TEST_F(AssemblerMIPSTest, Lwc1) {
+  DriverStr(RepeatFRIb(&mips::MipsAssembler::Lwc1, -16, "lwc1 ${reg1}, {imm}(${reg2})"), "Lwc1");
+}
+
+TEST_F(AssemblerMIPSTest, Ldc1) {
+  DriverStr(RepeatFRIb(&mips::MipsAssembler::Ldc1, -16, "ldc1 ${reg1}, {imm}(${reg2})"), "Ldc1");
+}
+
+TEST_F(AssemblerMIPSTest, Swc1) {
+  DriverStr(RepeatFRIb(&mips::MipsAssembler::Swc1, -16, "swc1 ${reg1}, {imm}(${reg2})"), "Swc1");
+}
+
+TEST_F(AssemblerMIPSTest, Sdc1) {
+  DriverStr(RepeatFRIb(&mips::MipsAssembler::Sdc1, -16, "sdc1 ${reg1}, {imm}(${reg2})"), "Sdc1");
+}
+
+TEST_F(AssemblerMIPSTest, Move) {
+  DriverStr(RepeatRR(&mips::MipsAssembler::Move, "or ${reg1}, ${reg2}, $zero"), "Move");
+}
+
+TEST_F(AssemblerMIPSTest, Clear) {
+  DriverStr(RepeatR(&mips::MipsAssembler::Clear, "or ${reg}, $zero, $zero"), "Clear");
+}
+
+TEST_F(AssemblerMIPSTest, Not) {
+  DriverStr(RepeatRR(&mips::MipsAssembler::Not, "nor ${reg1}, ${reg2}, $zero"), "Not");
+}
+
+TEST_F(AssemblerMIPSTest, LoadFromOffset) {
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A0, 0);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 0);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 256);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 1000);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 0x8000);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 0x10000);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 0x12345678);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, -256);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 0xFFFF8000);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A0, 0);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 0);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 256);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 1000);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 0x8000);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 0x10000);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 0x12345678);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, -256);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 0xFFFF8000);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A0, 0);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 0);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 256);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 1000);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 0x8000);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 0x10000);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 0x12345678);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, -256);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 0xFFFF8000);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A0, 0);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 0);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 256);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 1000);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 0x8000);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 0x10000);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 0x12345678);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, -256);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 0xFFFF8000);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A0, 0);
+  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 0);
+  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 256);
+  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 1000);
+  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 0x8000);
+  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 0x10000);
+  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 0x12345678);
+  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, -256);
+  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 0xFFFF8000);
+  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A0, 0);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A1, 0);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A1, mips::A0, 0);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 0);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 256);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 1000);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 0x8000);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 0x10000);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 0x12345678);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, -256);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 0xFFFF8000);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 0xABCDEF00);
+
+  const char* expected =
+      "lb $a0, 0($a0)\n"
+      "lb $a0, 0($a1)\n"
+      "lb $a0, 256($a1)\n"
+      "lb $a0, 1000($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "addu $at, $at, $a1\n"
+      "lb $a0, 0($at)\n"
+      "lui $at, 1\n"
+      "addu $at, $at, $a1\n"
+      "lb $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "addu $at, $at, $a1\n"
+      "lb $a0, 0($at)\n"
+      "lb $a0, -256($a1)\n"
+      "lb $a0, 0xFFFF8000($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "addu $at, $at, $a1\n"
+      "lb $a0, 0($at)\n"
+
+      "lbu $a0, 0($a0)\n"
+      "lbu $a0, 0($a1)\n"
+      "lbu $a0, 256($a1)\n"
+      "lbu $a0, 1000($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "addu $at, $at, $a1\n"
+      "lbu $a0, 0($at)\n"
+      "lui $at, 1\n"
+      "addu $at, $at, $a1\n"
+      "lbu $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "addu $at, $at, $a1\n"
+      "lbu $a0, 0($at)\n"
+      "lbu $a0, -256($a1)\n"
+      "lbu $a0, 0xFFFF8000($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "addu $at, $at, $a1\n"
+      "lbu $a0, 0($at)\n"
+
+      "lh $a0, 0($a0)\n"
+      "lh $a0, 0($a1)\n"
+      "lh $a0, 256($a1)\n"
+      "lh $a0, 1000($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "addu $at, $at, $a1\n"
+      "lh $a0, 0($at)\n"
+      "lui $at, 1\n"
+      "addu $at, $at, $a1\n"
+      "lh $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "addu $at, $at, $a1\n"
+      "lh $a0, 0($at)\n"
+      "lh $a0, -256($a1)\n"
+      "lh $a0, 0xFFFF8000($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "addu $at, $at, $a1\n"
+      "lh $a0, 0($at)\n"
+
+      "lhu $a0, 0($a0)\n"
+      "lhu $a0, 0($a1)\n"
+      "lhu $a0, 256($a1)\n"
+      "lhu $a0, 1000($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "addu $at, $at, $a1\n"
+      "lhu $a0, 0($at)\n"
+      "lui $at, 1\n"
+      "addu $at, $at, $a1\n"
+      "lhu $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "addu $at, $at, $a1\n"
+      "lhu $a0, 0($at)\n"
+      "lhu $a0, -256($a1)\n"
+      "lhu $a0, 0xFFFF8000($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "addu $at, $at, $a1\n"
+      "lhu $a0, 0($at)\n"
+
+      "lw $a0, 0($a0)\n"
+      "lw $a0, 0($a1)\n"
+      "lw $a0, 256($a1)\n"
+      "lw $a0, 1000($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "addu $at, $at, $a1\n"
+      "lw $a0, 0($at)\n"
+      "lui $at, 1\n"
+      "addu $at, $at, $a1\n"
+      "lw $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "addu $at, $at, $a1\n"
+      "lw $a0, 0($at)\n"
+      "lw $a0, -256($a1)\n"
+      "lw $a0, 0xFFFF8000($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "addu $at, $at, $a1\n"
+      "lw $a0, 0($at)\n"
+
+      "lw $a1, 4($a0)\n"
+      "lw $a0, 0($a0)\n"
+      "lw $a0, 0($a1)\n"
+      "lw $a1, 4($a1)\n"
+      "lw $a1, 0($a0)\n"
+      "lw $a2, 4($a0)\n"
+      "lw $a0, 0($a2)\n"
+      "lw $a1, 4($a2)\n"
+      "lw $a0, 256($a2)\n"
+      "lw $a1, 260($a2)\n"
+      "lw $a0, 1000($a2)\n"
+      "lw $a1, 1004($a2)\n"
+      "ori $at, $zero, 0x8000\n"
+      "addu $at, $at, $a2\n"
+      "lw $a0, 0($at)\n"
+      "lw $a1, 4($at)\n"
+      "lui $at, 1\n"
+      "addu $at, $at, $a2\n"
+      "lw $a0, 0($at)\n"
+      "lw $a1, 4($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "addu $at, $at, $a2\n"
+      "lw $a0, 0($at)\n"
+      "lw $a1, 4($at)\n"
+      "lw $a0, -256($a2)\n"
+      "lw $a1, -252($a2)\n"
+      "lw $a0, 0xFFFF8000($a2)\n"
+      "lw $a1, 0xFFFF8004($a2)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "addu $at, $at, $a2\n"
+      "lw $a0, 0($at)\n"
+      "lw $a1, 4($at)\n";
+  DriverStr(expected, "LoadFromOffset");
+}
+
+TEST_F(AssemblerMIPSTest, LoadSFromOffset) {
+  __ LoadSFromOffset(mips::F0, mips::A0, 0);
+  __ LoadSFromOffset(mips::F0, mips::A0, 4);
+  __ LoadSFromOffset(mips::F0, mips::A0, 256);
+  __ LoadSFromOffset(mips::F0, mips::A0, 0x8000);
+  __ LoadSFromOffset(mips::F0, mips::A0, 0x10000);
+  __ LoadSFromOffset(mips::F0, mips::A0, 0x12345678);
+  __ LoadSFromOffset(mips::F0, mips::A0, -256);
+  __ LoadSFromOffset(mips::F0, mips::A0, 0xFFFF8000);
+  __ LoadSFromOffset(mips::F0, mips::A0, 0xABCDEF00);
+
+  const char* expected =
+      "lwc1 $f0, 0($a0)\n"
+      "lwc1 $f0, 4($a0)\n"
+      "lwc1 $f0, 256($a0)\n"
+      "ori $at, $zero, 0x8000\n"
+      "addu $at, $at, $a0\n"
+      "lwc1 $f0, 0($at)\n"
+      "lui $at, 1\n"
+      "addu $at, $at, $a0\n"
+      "lwc1 $f0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "addu $at, $at, $a0\n"
+      "lwc1 $f0, 0($at)\n"
+      "lwc1 $f0, -256($a0)\n"
+      "lwc1 $f0, 0xFFFF8000($a0)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "addu $at, $at, $a0\n"
+      "lwc1 $f0, 0($at)\n";
+  DriverStr(expected, "LoadSFromOffset");
+}
+
+
+TEST_F(AssemblerMIPSTest, LoadDFromOffset) {
+  __ LoadDFromOffset(mips::F0, mips::A0, 0);
+  __ LoadDFromOffset(mips::F0, mips::A0, 4);
+  __ LoadDFromOffset(mips::F0, mips::A0, 256);
+  __ LoadDFromOffset(mips::F0, mips::A0, 0x8000);
+  __ LoadDFromOffset(mips::F0, mips::A0, 0x10000);
+  __ LoadDFromOffset(mips::F0, mips::A0, 0x12345678);
+  __ LoadDFromOffset(mips::F0, mips::A0, -256);
+  __ LoadDFromOffset(mips::F0, mips::A0, 0xFFFF8000);
+  __ LoadDFromOffset(mips::F0, mips::A0, 0xABCDEF00);
+
+  const char* expected =
+      "ldc1 $f0, 0($a0)\n"
+      "lwc1 $f0, 4($a0)\n"
+      "lwc1 $f1, 8($a0)\n"
+      "ldc1 $f0, 256($a0)\n"
+      "ori $at, $zero, 0x8000\n"
+      "addu $at, $at, $a0\n"
+      "ldc1 $f0, 0($at)\n"
+      "lui $at, 1\n"
+      "addu $at, $at, $a0\n"
+      "ldc1 $f0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "addu $at, $at, $a0\n"
+      "ldc1 $f0, 0($at)\n"
+      "ldc1 $f0, -256($a0)\n"
+      "ldc1 $f0, 0xFFFF8000($a0)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "addu $at, $at, $a0\n"
+      "ldc1 $f0, 0($at)\n";
+  DriverStr(expected, "LoadDFromOffset");
+}
+
+TEST_F(AssemblerMIPSTest, StoreToOffset) {
+  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A0, 0);
+  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 0);
+  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 256);
+  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 1000);
+  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 0x8000);
+  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 0x10000);
+  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 0x12345678);
+  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, -256);
+  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 0xFFFF8000);
+  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 0xABCDEF00);
+
+  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A0, 0);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 0);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 256);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 1000);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 0x8000);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 0x10000);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 0x12345678);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, -256);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 0xFFFF8000);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 0xABCDEF00);
+
+  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A0, 0);
+  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 0);
+  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 256);
+  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 1000);
+  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 0x8000);
+  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 0x10000);
+  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 0x12345678);
+  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, -256);
+  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 0xFFFF8000);
+  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 0xABCDEF00);
+
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 0);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 256);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 1000);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 0x8000);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 0x10000);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 0x12345678);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, -256);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 0xFFFF8000);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 0xABCDEF00);
+
+  const char* expected =
+      "sb $a0, 0($a0)\n"
+      "sb $a0, 0($a1)\n"
+      "sb $a0, 256($a1)\n"
+      "sb $a0, 1000($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "addu $at, $at, $a1\n"
+      "sb $a0, 0($at)\n"
+      "lui $at, 1\n"
+      "addu $at, $at, $a1\n"
+      "sb $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "addu $at, $at, $a1\n"
+      "sb $a0, 0($at)\n"
+      "sb $a0, -256($a1)\n"
+      "sb $a0, 0xFFFF8000($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "addu $at, $at, $a1\n"
+      "sb $a0, 0($at)\n"
+
+      "sh $a0, 0($a0)\n"
+      "sh $a0, 0($a1)\n"
+      "sh $a0, 256($a1)\n"
+      "sh $a0, 1000($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "addu $at, $at, $a1\n"
+      "sh $a0, 0($at)\n"
+      "lui $at, 1\n"
+      "addu $at, $at, $a1\n"
+      "sh $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "addu $at, $at, $a1\n"
+      "sh $a0, 0($at)\n"
+      "sh $a0, -256($a1)\n"
+      "sh $a0, 0xFFFF8000($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "addu $at, $at, $a1\n"
+      "sh $a0, 0($at)\n"
+
+      "sw $a0, 0($a0)\n"
+      "sw $a0, 0($a1)\n"
+      "sw $a0, 256($a1)\n"
+      "sw $a0, 1000($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "addu $at, $at, $a1\n"
+      "sw $a0, 0($at)\n"
+      "lui $at, 1\n"
+      "addu $at, $at, $a1\n"
+      "sw $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "addu $at, $at, $a1\n"
+      "sw $a0, 0($at)\n"
+      "sw $a0, -256($a1)\n"
+      "sw $a0, 0xFFFF8000($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "addu $at, $at, $a1\n"
+      "sw $a0, 0($at)\n"
+
+      "sw $a0, 0($a2)\n"
+      "sw $a1, 4($a2)\n"
+      "sw $a0, 256($a2)\n"
+      "sw $a1, 260($a2)\n"
+      "sw $a0, 1000($a2)\n"
+      "sw $a1, 1004($a2)\n"
+      "ori $at, $zero, 0x8000\n"
+      "addu $at, $at, $a2\n"
+      "sw $a0, 0($at)\n"
+      "sw $a1, 4($at)\n"
+      "lui $at, 1\n"
+      "addu $at, $at, $a2\n"
+      "sw $a0, 0($at)\n"
+      "sw $a1, 4($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "addu $at, $at, $a2\n"
+      "sw $a0, 0($at)\n"
+      "sw $a1, 4($at)\n"
+      "sw $a0, -256($a2)\n"
+      "sw $a1, -252($a2)\n"
+      "sw $a0, 0xFFFF8000($a2)\n"
+      "sw $a1, 0xFFFF8004($a2)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "addu $at, $at, $a2\n"
+      "sw $a0, 0($at)\n"
+      "sw $a1, 4($at)\n";
+  DriverStr(expected, "StoreToOffset");
+}
+
+TEST_F(AssemblerMIPSTest, StoreSToOffset) {
+  __ StoreSToOffset(mips::F0, mips::A0, 0);
+  __ StoreSToOffset(mips::F0, mips::A0, 4);
+  __ StoreSToOffset(mips::F0, mips::A0, 256);
+  __ StoreSToOffset(mips::F0, mips::A0, 0x8000);
+  __ StoreSToOffset(mips::F0, mips::A0, 0x10000);
+  __ StoreSToOffset(mips::F0, mips::A0, 0x12345678);
+  __ StoreSToOffset(mips::F0, mips::A0, -256);
+  __ StoreSToOffset(mips::F0, mips::A0, 0xFFFF8000);
+  __ StoreSToOffset(mips::F0, mips::A0, 0xABCDEF00);
+
+  const char* expected =
+      "swc1 $f0, 0($a0)\n"
+      "swc1 $f0, 4($a0)\n"
+      "swc1 $f0, 256($a0)\n"
+      "ori $at, $zero, 0x8000\n"
+      "addu $at, $at, $a0\n"
+      "swc1 $f0, 0($at)\n"
+      "lui $at, 1\n"
+      "addu $at, $at, $a0\n"
+      "swc1 $f0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "addu $at, $at, $a0\n"
+      "swc1 $f0, 0($at)\n"
+      "swc1 $f0, -256($a0)\n"
+      "swc1 $f0, 0xFFFF8000($a0)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "addu $at, $at, $a0\n"
+      "swc1 $f0, 0($at)\n";
+  DriverStr(expected, "StoreSToOffset");
+}
+
+TEST_F(AssemblerMIPSTest, StoreDToOffset) {
+  __ StoreDToOffset(mips::F0, mips::A0, 0);
+  __ StoreDToOffset(mips::F0, mips::A0, 4);
+  __ StoreDToOffset(mips::F0, mips::A0, 256);
+  __ StoreDToOffset(mips::F0, mips::A0, 0x8000);
+  __ StoreDToOffset(mips::F0, mips::A0, 0x10000);
+  __ StoreDToOffset(mips::F0, mips::A0, 0x12345678);
+  __ StoreDToOffset(mips::F0, mips::A0, -256);
+  __ StoreDToOffset(mips::F0, mips::A0, 0xFFFF8000);
+  __ StoreDToOffset(mips::F0, mips::A0, 0xABCDEF00);
+
+  const char* expected =
+      "sdc1 $f0, 0($a0)\n"
+      "swc1 $f0, 4($a0)\n"
+      "swc1 $f1, 8($a0)\n"
+      "sdc1 $f0, 256($a0)\n"
+      "ori $at, $zero, 0x8000\n"
+      "addu $at, $at, $a0\n"
+      "sdc1 $f0, 0($at)\n"
+      "lui $at, 1\n"
+      "addu $at, $at, $a0\n"
+      "sdc1 $f0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "addu $at, $at, $a0\n"
+      "sdc1 $f0, 0($at)\n"
+      "sdc1 $f0, -256($a0)\n"
+      "sdc1 $f0, 0xFFFF8000($a0)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "addu $at, $at, $a0\n"
+      "sdc1 $f0, 0($at)\n";
+  DriverStr(expected, "StoreDToOffset");
+}
+
+TEST_F(AssemblerMIPSTest, B) {
+  mips::MipsLabel label1, label2;
+  __ B(&label1);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label1);
+  __ B(&label2);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label2);
+  __ B(&label1);
+
+  std::string expected =
+      ".set noreorder\n"
+      "b 1f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n"
+      "b 2f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "2:\n"
+      "b 1b\n"
+      "nop\n";
+  DriverStr(expected, "B");
+}
+
+TEST_F(AssemblerMIPSTest, Beq) {
+  mips::MipsLabel label;
+  __ Beq(mips::A0, mips::A1, &label);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Beq(mips::A2, mips::A3, &label);
+
+  std::string expected =
+      ".set noreorder\n"
+      "beq $a0, $a1, 1f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "beq $a2, $a3, 1b\n"
+      "nop\n";
+  DriverStr(expected, "Beq");
+}
+
+TEST_F(AssemblerMIPSTest, Bne) {
+  mips::MipsLabel label;
+  __ Bne(mips::A0, mips::A1, &label);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bne(mips::A2, mips::A3, &label);
+
+  std::string expected =
+      ".set noreorder\n"
+      "bne $a0, $a1, 1f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "bne $a2, $a3, 1b\n"
+      "nop\n";
+  DriverStr(expected, "Bne");
+}
+
+TEST_F(AssemblerMIPSTest, Beqz) {
+  mips::MipsLabel label;
+  __ Beqz(mips::A0, &label);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Beqz(mips::A1, &label);
+
+  std::string expected =
+      ".set noreorder\n"
+      "beq $zero, $a0, 1f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "beq $zero, $a1, 1b\n"
+      "nop\n";
+  DriverStr(expected, "Beqz");
+}
+
+TEST_F(AssemblerMIPSTest, Bnez) {
+  mips::MipsLabel label;
+  __ Bnez(mips::A0, &label);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bnez(mips::A1, &label);
+
+  std::string expected =
+      ".set noreorder\n"
+      "bne $zero, $a0, 1f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "bne $zero, $a1, 1b\n"
+      "nop\n";
+  DriverStr(expected, "Bnez");
+}
+
+TEST_F(AssemblerMIPSTest, Bltz) {
+  mips::MipsLabel label;
+  __ Bltz(mips::A0, &label);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bltz(mips::A1, &label);
+
+  std::string expected =
+      ".set noreorder\n"
+      "bltz $a0, 1f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "bltz $a1, 1b\n"
+      "nop\n";
+  DriverStr(expected, "Bltz");
+}
+
+TEST_F(AssemblerMIPSTest, Bgez) {
+  mips::MipsLabel label;
+  __ Bgez(mips::A0, &label);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bgez(mips::A1, &label);
+
+  std::string expected =
+      ".set noreorder\n"
+      "bgez $a0, 1f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "bgez $a1, 1b\n"
+      "nop\n";
+  DriverStr(expected, "Bgez");
+}
+
+TEST_F(AssemblerMIPSTest, Blez) {
+  mips::MipsLabel label;
+  __ Blez(mips::A0, &label);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Blez(mips::A1, &label);
+
+  std::string expected =
+      ".set noreorder\n"
+      "blez $a0, 1f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "blez $a1, 1b\n"
+      "nop\n";
+  DriverStr(expected, "Blez");
+}
+
+TEST_F(AssemblerMIPSTest, Bgtz) {
+  mips::MipsLabel label;
+  __ Bgtz(mips::A0, &label);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bgtz(mips::A1, &label);
+
+  std::string expected =
+      ".set noreorder\n"
+      "bgtz $a0, 1f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "bgtz $a1, 1b\n"
+      "nop\n";
+  DriverStr(expected, "Bgtz");
+}
+
+TEST_F(AssemblerMIPSTest, Blt) {
+  mips::MipsLabel label;
+  __ Blt(mips::A0, mips::A1, &label);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Blt(mips::A2, mips::A3, &label);
+
+  std::string expected =
+      ".set noreorder\n"
+      "slt $at, $a0, $a1\n"
+      "bne $zero, $at, 1f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "slt $at, $a2, $a3\n"
+      "bne $zero, $at, 1b\n"
+      "nop\n";
+  DriverStr(expected, "Blt");
+}
+
+TEST_F(AssemblerMIPSTest, Bge) {
+  mips::MipsLabel label;
+  __ Bge(mips::A0, mips::A1, &label);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bge(mips::A2, mips::A3, &label);
+
+  std::string expected =
+      ".set noreorder\n"
+      "slt $at, $a0, $a1\n"
+      "beq $zero, $at, 1f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "slt $at, $a2, $a3\n"
+      "beq $zero, $at, 1b\n"
+      "nop\n";
+  DriverStr(expected, "Bge");
+}
+
+TEST_F(AssemblerMIPSTest, Bltu) {
+  mips::MipsLabel label;
+  __ Bltu(mips::A0, mips::A1, &label);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bltu(mips::A2, mips::A3, &label);
+
+  std::string expected =
+      ".set noreorder\n"
+      "sltu $at, $a0, $a1\n"
+      "bne $zero, $at, 1f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "sltu $at, $a2, $a3\n"
+      "bne $zero, $at, 1b\n"
+      "nop\n";
+  DriverStr(expected, "Bltu");
+}
+
+TEST_F(AssemblerMIPSTest, Bgeu) {
+  mips::MipsLabel label;
+  __ Bgeu(mips::A0, mips::A1, &label);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bgeu(mips::A2, mips::A3, &label);
+
+  std::string expected =
+      ".set noreorder\n"
+      "sltu $at, $a0, $a1\n"
+      "beq $zero, $at, 1f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "sltu $at, $a2, $a3\n"
+      "beq $zero, $at, 1b\n"
+      "nop\n";
+  DriverStr(expected, "Bgeu");
+}
+
+#undef __
+
+}  // namespace art
diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc
index c55d285..faa2d2d 100644
--- a/disassembler/disassembler_mips.cc
+++ b/disassembler/disassembler_mips.cc
@@ -280,6 +280,7 @@
   { kITypeMask, 41u << kOpcodeShift, "sh", "TO", },
   { kITypeMask, 43u << kOpcodeShift, "sw", "TO", },
   { kITypeMask, 49u << kOpcodeShift, "lwc1", "tO", },
+  { kJTypeMask, 50u << kOpcodeShift, "bc", "P" },
   { kITypeMask, 53u << kOpcodeShift, "ldc1", "tO", },
   { kITypeMask | (0x1f << 21), 54u << kOpcodeShift, "jic", "Ti" },
   { kITypeMask | (1 << 21), (54u << kOpcodeShift) | (1 << 21), "beqzc", "Sb" },  // TODO: de-dup?
@@ -290,6 +291,7 @@
   { kITypeMask, 55u << kOpcodeShift, "ld", "TO", },
   { kITypeMask, 57u << kOpcodeShift, "swc1", "tO", },
   { kITypeMask | (0x1f << 16), (59u << kOpcodeShift) | (30 << 16), "auipc", "Si" },
+  { kITypeMask | (0x3 << 19), (59u << kOpcodeShift) | (0 << 19), "addiupc", "Sp" },
   { kITypeMask, 61u << kOpcodeShift, "sdc1", "tO", },
   { kITypeMask | (0x1f << 21), 62u << kOpcodeShift, "jialc", "Ti" },
   { kITypeMask | (1 << 21), (62u << kOpcodeShift) | (1 << 21), "bnezc", "Sb" },  // TODO: de-dup?
@@ -432,6 +434,22 @@
               }
             }
             break;
+          case 'P':  // 26-bit offset in bc.
+            {
+              int32_t offset = (instruction & 0x3ffffff) - ((instruction & 0x2000000) << 1);
+              offset <<= 2;
+              offset += 4;
+              args << FormatInstructionPointer(instr_ptr + offset);
+              args << StringPrintf("  ; %+d", offset);
+            }
+            break;
+          case 'p':  // 19-bit offset in addiupc.
+            {
+              int32_t offset = (instruction & 0x7ffff) - ((instruction & 0x40000) << 1);
+              args << offset << "  ; move r" << rs << ", ";
+              args << FormatInstructionPointer(instr_ptr + (offset << 2));
+            }
+            break;
           case 'S': args << 'r' << rs; break;
           case 's': args << 'f' << rs; break;
           case 'T': args << 'r' << rt; break;
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index b9d81a7..f5f7748 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -26,6 +26,7 @@
 #include <vector>
 
 #include "arch/instruction_set_features.h"
+#include "art_code.h"
 #include "art_field-inl.h"
 #include "art_method-inl.h"
 #include "base/unix_file/fd_file.h"
@@ -54,6 +55,7 @@
 #include "output_stream.h"
 #include "safe_map.h"
 #include "scoped_thread_state_change.h"
+#include "stack_map.h"
 #include "ScopedLocalRef.h"
 #include "thread_list.h"
 #include "verifier/dex_gc_map.h"
@@ -1961,24 +1963,27 @@
     DCHECK(method != nullptr);
     const auto image_pointer_size =
         InstructionSetPointerSize(state->oat_dumper_->GetOatInstructionSet());
+    const void* quick_oat_code_begin = state->GetQuickOatCodeBegin(method);
+    const void* quick_oat_code_end = state->GetQuickOatCodeEnd(method);
+    ArtCode art_code(method);
     if (method->IsNative()) {
-      DCHECK(method->GetNativeGcMap(image_pointer_size) == nullptr) << PrettyMethod(method);
-      DCHECK(method->GetMappingTable(image_pointer_size) == nullptr) << PrettyMethod(method);
+      DCHECK(art_code.GetNativeGcMap(image_pointer_size) == nullptr) << PrettyMethod(method);
+      DCHECK(art_code.GetMappingTable(image_pointer_size) == nullptr) << PrettyMethod(method);
       bool first_occurrence;
-      const void* quick_oat_code = state->GetQuickOatCodeBegin(method);
       uint32_t quick_oat_code_size = state->GetQuickOatCodeSize(method);
-      state->ComputeOatSize(quick_oat_code, &first_occurrence);
+      state->ComputeOatSize(quick_oat_code_begin, &first_occurrence);
       if (first_occurrence) {
         state->stats_.native_to_managed_code_bytes += quick_oat_code_size;
       }
-      if (quick_oat_code != method->GetEntryPointFromQuickCompiledCodePtrSize(image_pointer_size)) {
-        indent_os << StringPrintf("OAT CODE: %p\n", quick_oat_code);
+      if (quick_oat_code_begin !=
+            method->GetEntryPointFromQuickCompiledCodePtrSize(image_pointer_size)) {
+        indent_os << StringPrintf("OAT CODE: %p\n", quick_oat_code_begin);
       }
     } else if (method->IsAbstract() || method->IsCalleeSaveMethod() ||
       method->IsResolutionMethod() || method->IsImtConflictMethod() ||
       method->IsImtUnimplementedMethod() || method->IsClassInitializer()) {
-      DCHECK(method->GetNativeGcMap(image_pointer_size) == nullptr) << PrettyMethod(method);
-      DCHECK(method->GetMappingTable(image_pointer_size) == nullptr) << PrettyMethod(method);
+      DCHECK(art_code.GetNativeGcMap(image_pointer_size) == nullptr) << PrettyMethod(method);
+      DCHECK(art_code.GetMappingTable(image_pointer_size) == nullptr) << PrettyMethod(method);
     } else {
       const DexFile::CodeItem* code_item = method->GetCodeItem();
       size_t dex_instruction_bytes = code_item->insns_size_in_code_units_ * 2;
@@ -1986,29 +1991,27 @@
 
       bool first_occurrence;
       size_t gc_map_bytes = state->ComputeOatSize(
-          method->GetNativeGcMap(image_pointer_size), &first_occurrence);
+          art_code.GetNativeGcMap(image_pointer_size), &first_occurrence);
       if (first_occurrence) {
         state->stats_.gc_map_bytes += gc_map_bytes;
       }
 
       size_t pc_mapping_table_bytes = state->ComputeOatSize(
-          method->GetMappingTable(image_pointer_size), &first_occurrence);
+          art_code.GetMappingTable(image_pointer_size), &first_occurrence);
       if (first_occurrence) {
         state->stats_.pc_mapping_table_bytes += pc_mapping_table_bytes;
       }
 
       size_t vmap_table_bytes = 0u;
-      if (!method->IsOptimized(image_pointer_size)) {
+      if (!art_code.IsOptimized(image_pointer_size)) {
         // Method compiled with the optimizing compiler have no vmap table.
         vmap_table_bytes = state->ComputeOatSize(
-            method->GetVmapTable(image_pointer_size), &first_occurrence);
+            art_code.GetVmapTable(image_pointer_size), &first_occurrence);
         if (first_occurrence) {
           state->stats_.vmap_table_bytes += vmap_table_bytes;
         }
       }
 
-      const void* quick_oat_code_begin = state->GetQuickOatCodeBegin(method);
-      const void* quick_oat_code_end = state->GetQuickOatCodeEnd(method);
       uint32_t quick_oat_code_size = state->GetQuickOatCodeSize(method);
       state->ComputeOatSize(quick_oat_code_begin, &first_occurrence);
       if (first_occurrence) {
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 2eb5db1..8fe3fa2 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -19,6 +19,7 @@
 include art/build/Android.common_build.mk
 
 LIBART_COMMON_SRC_FILES := \
+  art_code.cc \
   art_field.cc \
   art_method.cc \
   atomic.cc.arm \
diff --git a/runtime/arch/arch_test.cc b/runtime/arch/arch_test.cc
index 4a45f49..e676a09 100644
--- a/runtime/arch/arch_test.cc
+++ b/runtime/arch/arch_test.cc
@@ -39,7 +39,7 @@
     runtime->SetInstructionSet(isa);
     ArtMethod* save_method = runtime->CreateCalleeSaveMethod();
     runtime->SetCalleeSaveMethod(save_method, type);
-    QuickMethodFrameInfo frame_info = save_method->GetQuickFrameInfo();
+    QuickMethodFrameInfo frame_info = ArtCode(save_method).GetQuickFrameInfo();
     EXPECT_EQ(frame_info.FrameSizeInBytes(), save_size) << "Expected and real size differs for "
         << type << " core spills=" << std::hex << frame_info.CoreSpillMask() << " fp spills="
         << frame_info.FpSpillMask() << std::dec;
diff --git a/runtime/arch/arm/context_arm.cc b/runtime/arch/arm/context_arm.cc
index 8f6b1ff..d5c7846 100644
--- a/runtime/arch/arm/context_arm.cc
+++ b/runtime/arch/arm/context_arm.cc
@@ -38,8 +38,8 @@
 }
 
 void ArmContext::FillCalleeSaves(const StackVisitor& fr) {
-  ArtMethod* method = fr.GetMethod();
-  const QuickMethodFrameInfo frame_info = method->GetQuickFrameInfo();
+  ArtCode art_code = fr.GetCurrentCode();
+  const QuickMethodFrameInfo frame_info = art_code.GetQuickFrameInfo();
   int spill_pos = 0;
 
   // Core registers come first, from the highest down to the lowest.
diff --git a/runtime/arch/arm64/context_arm64.cc b/runtime/arch/arm64/context_arm64.cc
index 4477631..cdc03fe 100644
--- a/runtime/arch/arm64/context_arm64.cc
+++ b/runtime/arch/arm64/context_arm64.cc
@@ -40,8 +40,8 @@
 }
 
 void Arm64Context::FillCalleeSaves(const StackVisitor& fr) {
-  ArtMethod* method = fr.GetMethod();
-  const QuickMethodFrameInfo frame_info = method->GetQuickFrameInfo();
+  ArtCode code = fr.GetCurrentCode();
+  const QuickMethodFrameInfo frame_info = code.GetQuickFrameInfo();
   int spill_pos = 0;
 
   // Core registers come first, from the highest down to the lowest.
diff --git a/runtime/arch/mips/context_mips.cc b/runtime/arch/mips/context_mips.cc
index 08ab356..dba62d9 100644
--- a/runtime/arch/mips/context_mips.cc
+++ b/runtime/arch/mips/context_mips.cc
@@ -38,8 +38,8 @@
 }
 
 void MipsContext::FillCalleeSaves(const StackVisitor& fr) {
-  ArtMethod* method = fr.GetMethod();
-  const QuickMethodFrameInfo frame_info = method->GetQuickFrameInfo();
+  ArtCode code = fr.GetCurrentCode();
+  const QuickMethodFrameInfo frame_info = code.GetQuickFrameInfo();
   int spill_pos = 0;
 
   // Core registers come first, from the highest down to the lowest.
diff --git a/runtime/arch/mips64/context_mips64.cc b/runtime/arch/mips64/context_mips64.cc
index 2c17f1c..d808c9e 100644
--- a/runtime/arch/mips64/context_mips64.cc
+++ b/runtime/arch/mips64/context_mips64.cc
@@ -38,8 +38,8 @@
 }
 
 void Mips64Context::FillCalleeSaves(const StackVisitor& fr) {
-  ArtMethod* method = fr.GetMethod();
-  const QuickMethodFrameInfo frame_info = method->GetQuickFrameInfo();
+  ArtCode code = fr.GetCurrentCode();
+  const QuickMethodFrameInfo frame_info = code.GetQuickFrameInfo();
   int spill_pos = 0;
 
   // Core registers come first, from the highest down to the lowest.
diff --git a/runtime/arch/x86/context_x86.cc b/runtime/arch/x86/context_x86.cc
index 987ad60..0d88dd0 100644
--- a/runtime/arch/x86/context_x86.cc
+++ b/runtime/arch/x86/context_x86.cc
@@ -16,9 +16,10 @@
 
 #include "context_x86.h"
 
-#include "art_method-inl.h"
+#include "art_code.h"
 #include "base/bit_utils.h"
 #include "quick/quick_method_frame_info.h"
+#include "stack.h"
 
 namespace art {
 namespace x86 {
@@ -37,8 +38,8 @@
 }
 
 void X86Context::FillCalleeSaves(const StackVisitor& fr) {
-  ArtMethod* method = fr.GetMethod();
-  const QuickMethodFrameInfo frame_info = method->GetQuickFrameInfo();
+  ArtCode code = fr.GetCurrentCode();
+  const QuickMethodFrameInfo frame_info = code.GetQuickFrameInfo();
   int spill_pos = 0;
 
   // Core registers come first, from the highest down to the lowest.
diff --git a/runtime/arch/x86_64/context_x86_64.cc b/runtime/arch/x86_64/context_x86_64.cc
index 3dc7d71..12c94bc 100644
--- a/runtime/arch/x86_64/context_x86_64.cc
+++ b/runtime/arch/x86_64/context_x86_64.cc
@@ -16,9 +16,10 @@
 
 #include "context_x86_64.h"
 
-#include "art_method-inl.h"
+#include "art_code.h"
 #include "base/bit_utils.h"
 #include "quick/quick_method_frame_info.h"
+#include "stack.h"
 
 namespace art {
 namespace x86_64 {
@@ -37,8 +38,8 @@
 }
 
 void X86_64Context::FillCalleeSaves(const StackVisitor& fr) {
-  ArtMethod* method = fr.GetMethod();
-  const QuickMethodFrameInfo frame_info = method->GetQuickFrameInfo();
+  ArtCode code = fr.GetCurrentCode();
+  const QuickMethodFrameInfo frame_info = code.GetQuickFrameInfo();
   int spill_pos = 0;
 
   // Core registers come first, from the highest down to the lowest.
diff --git a/runtime/art_code.cc b/runtime/art_code.cc
new file mode 100644
index 0000000..b999ec8
--- /dev/null
+++ b/runtime/art_code.cc
@@ -0,0 +1,331 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "art_code.h"
+
+#include "art_method.h"
+#include "art_method-inl.h"
+#include "class_linker.h"
+#include "entrypoints/runtime_asm_entrypoints.h"
+#include "handle_scope.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
+#include "mapping_table.h"
+#include "oat.h"
+#include "runtime.h"
+#include "utils.h"
+
+namespace art {
+
+  // Converts a dex PC to a native PC.
+uintptr_t ArtCode::ToNativeQuickPc(const uint32_t dex_pc,
+                                   bool is_for_catch_handler,
+                                   bool abort_on_failure)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+  const void* entry_point = GetQuickOatEntryPoint(sizeof(void*));
+  if (IsOptimized(sizeof(void*))) {
+    // Optimized code does not have a mapping table. Search for the dex-to-pc
+    // mapping in stack maps.
+    CodeInfo code_info = GetOptimizedCodeInfo();
+    StackMapEncoding encoding = code_info.ExtractEncoding();
+
+    // All stack maps are stored in the same CodeItem section, safepoint stack
+    // maps first, then catch stack maps. We use `is_for_catch_handler` to select
+    // the order of iteration.
+    StackMap stack_map =
+        LIKELY(is_for_catch_handler) ? code_info.GetCatchStackMapForDexPc(dex_pc, encoding)
+                                     : code_info.GetStackMapForDexPc(dex_pc, encoding);
+    if (stack_map.IsValid()) {
+      return reinterpret_cast<uintptr_t>(entry_point) + stack_map.GetNativePcOffset(encoding);
+    }
+  } else {
+    MappingTable table((entry_point != nullptr) ? GetMappingTable(sizeof(void*)) : nullptr);
+    if (table.TotalSize() == 0) {
+      DCHECK_EQ(dex_pc, 0U);
+      return 0;   // Special no mapping/pc == 0 case
+    }
+    // Assume the caller wants a dex-to-pc mapping so check here first.
+    typedef MappingTable::DexToPcIterator It;
+    for (It cur = table.DexToPcBegin(), end = table.DexToPcEnd(); cur != end; ++cur) {
+      if (cur.DexPc() == dex_pc) {
+        return reinterpret_cast<uintptr_t>(entry_point) + cur.NativePcOffset();
+      }
+    }
+    // Now check pc-to-dex mappings.
+    typedef MappingTable::PcToDexIterator It2;
+    for (It2 cur = table.PcToDexBegin(), end = table.PcToDexEnd(); cur != end; ++cur) {
+      if (cur.DexPc() == dex_pc) {
+        return reinterpret_cast<uintptr_t>(entry_point) + cur.NativePcOffset();
+      }
+    }
+  }
+
+  if (abort_on_failure) {
+    LOG(FATAL) << "Failed to find native offset for dex pc 0x" << std::hex << dex_pc
+               << " in " << PrettyMethod(method_);
+  }
+  return UINTPTR_MAX;
+}
+
+bool ArtCode::IsOptimized(size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_) {
+  // Temporary solution for detecting if a method has been optimized: the compiler
+  // does not create a GC map. Instead, the vmap table contains the stack map
+  // (as in stack_map.h).
+  return !method_->IsNative()
+      && method_->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size) != nullptr
+      && GetQuickOatEntryPoint(pointer_size) != nullptr
+      && GetNativeGcMap(pointer_size) == nullptr;
+}
+
+CodeInfo ArtCode::GetOptimizedCodeInfo() {
+  DCHECK(IsOptimized(sizeof(void*)));
+  const void* code_pointer = EntryPointToCodePointer(GetQuickOatEntryPoint(sizeof(void*)));
+  DCHECK(code_pointer != nullptr);
+  uint32_t offset =
+      reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].vmap_table_offset_;
+  const void* data =
+      reinterpret_cast<const void*>(reinterpret_cast<const uint8_t*>(code_pointer) - offset);
+  return CodeInfo(data);
+}
+
+uintptr_t ArtCode::NativeQuickPcOffset(const uintptr_t pc) {
+  const void* quick_entry_point = GetQuickOatEntryPoint(sizeof(void*));
+  CHECK_NE(quick_entry_point, GetQuickToInterpreterBridge());
+  CHECK_EQ(quick_entry_point,
+           Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(method_, sizeof(void*)));
+  return pc - reinterpret_cast<uintptr_t>(quick_entry_point);
+}
+
+uint32_t ArtCode::ToDexPc(const uintptr_t pc, bool abort_on_failure) {
+  const void* entry_point = GetQuickOatEntryPoint(sizeof(void*));
+  uint32_t sought_offset = pc - reinterpret_cast<uintptr_t>(entry_point);
+  if (IsOptimized(sizeof(void*))) {
+    CodeInfo code_info = GetOptimizedCodeInfo();
+    StackMapEncoding encoding = code_info.ExtractEncoding();
+    StackMap stack_map = code_info.GetStackMapForNativePcOffset(sought_offset, encoding);
+    if (stack_map.IsValid()) {
+      return stack_map.GetDexPc(encoding);
+    }
+  } else {
+    MappingTable table(entry_point != nullptr ? GetMappingTable(sizeof(void*)) : nullptr);
+    if (table.TotalSize() == 0) {
+      // NOTE: Special methods (see Mir2Lir::GenSpecialCase()) have an empty mapping
+      // but they have no suspend checks and, consequently, we never call ToDexPc() for them.
+      DCHECK(method_->IsNative() || method_->IsCalleeSaveMethod() || method_->IsProxyMethod())
+          << PrettyMethod(method_);
+      return DexFile::kDexNoIndex;   // Special no mapping case
+    }
+    // Assume the caller wants a pc-to-dex mapping so check here first.
+    typedef MappingTable::PcToDexIterator It;
+    for (It cur = table.PcToDexBegin(), end = table.PcToDexEnd(); cur != end; ++cur) {
+      if (cur.NativePcOffset() == sought_offset) {
+        return cur.DexPc();
+      }
+    }
+    // Now check dex-to-pc mappings.
+    typedef MappingTable::DexToPcIterator It2;
+    for (It2 cur = table.DexToPcBegin(), end = table.DexToPcEnd(); cur != end; ++cur) {
+      if (cur.NativePcOffset() == sought_offset) {
+        return cur.DexPc();
+      }
+    }
+  }
+  if (abort_on_failure) {
+      LOG(FATAL) << "Failed to find Dex offset for PC offset " << reinterpret_cast<void*>(sought_offset)
+             << "(PC " << reinterpret_cast<void*>(pc) << ", entry_point=" << entry_point
+             << " current entry_point=" << GetQuickOatEntryPoint(sizeof(void*))
+             << ") in " << PrettyMethod(method_);
+  }
+  return DexFile::kDexNoIndex;
+}
+
+const uint8_t* ArtCode::GetNativeGcMap(size_t pointer_size) {
+  const void* code_pointer = EntryPointToCodePointer(GetQuickOatEntryPoint(pointer_size));
+  if (code_pointer == nullptr) {
+    return nullptr;
+  }
+  uint32_t offset =
+      reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].gc_map_offset_;
+  if (UNLIKELY(offset == 0u)) {
+    return nullptr;
+  }
+  return reinterpret_cast<const uint8_t*>(code_pointer) - offset;
+}
+
+const uint8_t* ArtCode::GetVmapTable(size_t pointer_size) {
+  CHECK(!IsOptimized(pointer_size)) << "Unimplemented vmap table for optimized compiler";
+  const void* code_pointer = EntryPointToCodePointer(GetQuickOatEntryPoint(pointer_size));
+  if (code_pointer == nullptr) {
+    return nullptr;
+  }
+  uint32_t offset =
+      reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].vmap_table_offset_;
+  if (UNLIKELY(offset == 0u)) {
+    return nullptr;
+  }
+  return reinterpret_cast<const uint8_t*>(code_pointer) - offset;
+}
+
+const uint8_t* ArtCode::GetMappingTable(size_t pointer_size) {
+  const void* code_pointer = EntryPointToCodePointer(GetQuickOatEntryPoint(pointer_size));
+  if (code_pointer == nullptr) {
+    return nullptr;
+  }
+  uint32_t offset =
+      reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].mapping_table_offset_;
+  if (UNLIKELY(offset == 0u)) {
+    return nullptr;
+  }
+  return reinterpret_cast<const uint8_t*>(code_pointer) - offset;
+}
+
+// Counts the number of references in the parameter list of the corresponding method.
+// Note: Thus does _not_ include "this" for non-static methods.
+static uint32_t GetNumberOfReferenceArgsWithoutReceiver(ArtMethod* method)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  uint32_t shorty_len;
+  const char* shorty = method->GetShorty(&shorty_len);
+  uint32_t refs = 0;
+  for (uint32_t i = 1; i < shorty_len ; ++i) {
+    if (shorty[i] == 'L') {
+      refs++;
+    }
+  }
+  return refs;
+}
+
+QuickMethodFrameInfo ArtCode::GetQuickFrameInfo() {
+  Runtime* runtime = Runtime::Current();
+
+  if (UNLIKELY(method_->IsAbstract())) {
+    return runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
+  }
+
+  // This goes before IsProxyMethod since runtime methods have a null declaring class.
+  if (UNLIKELY(method_->IsRuntimeMethod())) {
+    return runtime->GetRuntimeMethodFrameInfo(method_);
+  }
+
+  // For Proxy method we add special handling for the direct method case  (there is only one
+  // direct method - constructor). Direct method is cloned from original
+  // java.lang.reflect.Proxy class together with code and as a result it is executed as usual
+  // quick compiled method without any stubs. So the frame info should be returned as it is a
+  // quick method not a stub. However, if instrumentation stubs are installed, the
+  // instrumentation->GetQuickCodeFor() returns the artQuickProxyInvokeHandler instead of an
+  // oat code pointer, thus we have to add a special case here.
+  if (UNLIKELY(method_->IsProxyMethod())) {
+    if (method_->IsDirect()) {
+      CHECK(method_->IsConstructor());
+      const void* code_pointer =
+          EntryPointToCodePointer(method_->GetEntryPointFromQuickCompiledCode());
+      return reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].frame_info_;
+    } else {
+      return runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
+    }
+  }
+
+  const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(method_, sizeof(void*));
+  ClassLinker* class_linker = runtime->GetClassLinker();
+  // On failure, instead of null we get the quick-generic-jni-trampoline for native method
+  // indicating the generic JNI, or the quick-to-interpreter-bridge (but not the trampoline)
+  // for non-native methods. And we really shouldn't see a failure for non-native methods here.
+  DCHECK(!class_linker->IsQuickToInterpreterBridge(entry_point));
+
+  if (class_linker->IsQuickGenericJniStub(entry_point)) {
+    // Generic JNI frame.
+    DCHECK(method_->IsNative());
+    uint32_t handle_refs = GetNumberOfReferenceArgsWithoutReceiver(method_) + 1;
+    size_t scope_size = HandleScope::SizeOf(handle_refs);
+    QuickMethodFrameInfo callee_info = runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
+
+    // Callee saves + handle scope + method ref + alignment
+    // Note: -sizeof(void*) since callee-save frame stores a whole method pointer.
+    size_t frame_size = RoundUp(callee_info.FrameSizeInBytes() - sizeof(void*) +
+                                sizeof(ArtMethod*) + scope_size, kStackAlignment);
+    return QuickMethodFrameInfo(frame_size, callee_info.CoreSpillMask(), callee_info.FpSpillMask());
+  }
+
+  const void* code_pointer = EntryPointToCodePointer(entry_point);
+  return reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].frame_info_;
+}
+
+void ArtCode::AssertPcIsWithinQuickCode(uintptr_t pc) {
+  if (method_->IsNative() || method_->IsRuntimeMethod() || method_->IsProxyMethod()) {
+    return;
+  }
+  if (pc == reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc())) {
+    return;
+  }
+  const void* code = method_->GetEntryPointFromQuickCompiledCode();
+  if (code == GetQuickInstrumentationEntryPoint()) {
+    return;
+  }
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  if (class_linker->IsQuickToInterpreterBridge(code) ||
+      class_linker->IsQuickResolutionStub(code)) {
+    return;
+  }
+  // If we are the JIT then we may have just compiled the method after the
+  // IsQuickToInterpreterBridge check.
+  jit::Jit* const jit = Runtime::Current()->GetJit();
+  if (jit != nullptr &&
+      jit->GetCodeCache()->ContainsCodePtr(reinterpret_cast<const void*>(code))) {
+    return;
+  }
+
+  uint32_t code_size = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].code_size_;
+  CHECK(PcIsWithinQuickCode(pc))
+      << PrettyMethod(method_)
+      << " pc=" << std::hex << pc
+      << " code=" << code
+      << " size=" << code_size;
+}
+
+bool ArtCode::PcIsWithinQuickCode(uintptr_t pc) {
+  /*
+   * During a stack walk, a return PC may point past-the-end of the code
+   * in the case that the last instruction is a call that isn't expected to
+   * return.  Thus, we check <= code + GetCodeSize().
+   *
+   * NOTE: For Thumb both pc and code are offset by 1 indicating the Thumb state.
+   */
+  uintptr_t code = reinterpret_cast<uintptr_t>(EntryPointToCodePointer(
+      method_->GetEntryPointFromQuickCompiledCode()));
+  if (code == 0) {
+    return pc == 0;
+  }
+  uintptr_t code_size = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].code_size_;
+  return code <= pc && pc <= (code + code_size);
+}
+
+const void* ArtCode::GetQuickOatEntryPoint(size_t pointer_size) {
+  if (method_->IsAbstract() || method_->IsRuntimeMethod() || method_->IsProxyMethod()) {
+    return nullptr;
+  }
+  Runtime* runtime = Runtime::Current();
+  ClassLinker* class_linker = runtime->GetClassLinker();
+  const void* code = runtime->GetInstrumentation()->GetQuickCodeFor(method_, pointer_size);
+  // On failure, instead of null we get the quick-generic-jni-trampoline for native method
+  // indicating the generic JNI, or the quick-to-interpreter-bridge (but not the trampoline)
+  // for non-native methods.
+  if (class_linker->IsQuickToInterpreterBridge(code) ||
+      class_linker->IsQuickGenericJniStub(code)) {
+    return nullptr;
+  }
+  return code;
+}
+
+}  // namespace art
diff --git a/runtime/art_code.h b/runtime/art_code.h
new file mode 100644
index 0000000..1d2d898
--- /dev/null
+++ b/runtime/art_code.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ART_CODE_H_
+#define ART_RUNTIME_ART_CODE_H_
+
+#include "base/mutex.h"
+#include "offsets.h"
+#include "quick/quick_method_frame_info.h"
+#include "stack_map.h"
+
+namespace art {
+
+class ArtMethod;
+
+class ArtCode FINAL {
+ public:
+  explicit ArtCode(ArtMethod** method) : method_(*method) {}
+  explicit ArtCode(ArtMethod* method) : method_(method) {}
+  ArtCode() : method_(nullptr) {}
+
+  // Converts a dex PC to a native PC.
+  uintptr_t ToNativeQuickPc(const uint32_t dex_pc,
+                            bool is_for_catch_handler,
+                            bool abort_on_failure = true)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  bool IsOptimized(size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  CodeInfo GetOptimizedCodeInfo() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  uintptr_t NativeQuickPcOffset(const uintptr_t pc) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Converts a native PC to a dex PC.
+  uint32_t ToDexPc(const uintptr_t pc, bool abort_on_failure = true)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Callers should wrap the uint8_t* in a GcMap instance for convenient access.
+  const uint8_t* GetNativeGcMap(size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  const uint8_t* GetVmapTable(size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  const uint8_t* GetMappingTable(size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  QuickMethodFrameInfo GetQuickFrameInfo() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  FrameOffset GetReturnPcOffset() SHARED_REQUIRES(Locks::mutator_lock_) {
+    return FrameOffset(GetFrameSizeInBytes() - sizeof(void*));
+  }
+
+  template <bool kCheckFrameSize = true>
+  uint32_t GetFrameSizeInBytes() SHARED_REQUIRES(Locks::mutator_lock_) {
+    uint32_t result = GetQuickFrameInfo().FrameSizeInBytes();
+    if (kCheckFrameSize) {
+      DCHECK_LE(static_cast<size_t>(kStackAlignment), result);
+    }
+    return result;
+  }
+
+  const void* GetQuickOatEntryPoint(size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void AssertPcIsWithinQuickCode(uintptr_t pc) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  bool PcIsWithinQuickCode(uintptr_t pc) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  FrameOffset GetHandleScopeOffset() SHARED_REQUIRES(Locks::mutator_lock_) {
+    constexpr size_t handle_scope_offset = sizeof(ArtMethod*);
+    DCHECK_LT(handle_scope_offset, GetFrameSizeInBytes());
+    return FrameOffset(handle_scope_offset);
+  }
+
+  ArtMethod* GetMethod() const { return method_; }
+
+ private:
+  ArtMethod* method_;
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_ART_CODE_H_
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index 632a50f..c415073 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -212,18 +212,6 @@
   return type;
 }
 
-inline uint32_t ArtMethod::GetCodeSize() {
-  DCHECK(!IsRuntimeMethod() && !IsProxyMethod()) << PrettyMethod(this);
-  return GetCodeSize(EntryPointToCodePointer(GetEntryPointFromQuickCompiledCode()));
-}
-
-inline uint32_t ArtMethod::GetCodeSize(const void* code) {
-  if (code == nullptr) {
-    return 0u;
-  }
-  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].code_size_;
-}
-
 inline bool ArtMethod::CheckIncompatibleClassChange(InvokeType type) {
   switch (type) {
     case kStatic:
@@ -248,85 +236,6 @@
   }
 }
 
-inline uint32_t ArtMethod::GetQuickOatCodeOffset() {
-  DCHECK(!Runtime::Current()->IsStarted());
-  return PointerToLowMemUInt32(GetEntryPointFromQuickCompiledCode());
-}
-
-inline void ArtMethod::SetQuickOatCodeOffset(uint32_t code_offset) {
-  DCHECK(!Runtime::Current()->IsStarted());
-  SetEntryPointFromQuickCompiledCode(reinterpret_cast<void*>(code_offset));
-}
-
-inline const uint8_t* ArtMethod::GetMappingTable(size_t pointer_size) {
-  const void* code_pointer = GetQuickOatCodePointer(pointer_size);
-  if (code_pointer == nullptr) {
-    return nullptr;
-  }
-  return GetMappingTable(code_pointer, pointer_size);
-}
-
-inline const uint8_t* ArtMethod::GetMappingTable(const void* code_pointer, size_t pointer_size) {
-  DCHECK(code_pointer != nullptr);
-  DCHECK_EQ(code_pointer, GetQuickOatCodePointer(pointer_size));
-  uint32_t offset =
-      reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].mapping_table_offset_;
-  if (UNLIKELY(offset == 0u)) {
-    return nullptr;
-  }
-  return reinterpret_cast<const uint8_t*>(code_pointer) - offset;
-}
-
-inline const uint8_t* ArtMethod::GetVmapTable(size_t pointer_size) {
-  const void* code_pointer = GetQuickOatCodePointer(pointer_size);
-  if (code_pointer == nullptr) {
-    return nullptr;
-  }
-  return GetVmapTable(code_pointer, pointer_size);
-}
-
-inline const uint8_t* ArtMethod::GetVmapTable(const void* code_pointer, size_t pointer_size) {
-  CHECK(!IsOptimized(pointer_size)) << "Unimplemented vmap table for optimized compiler";
-  DCHECK(code_pointer != nullptr);
-  DCHECK_EQ(code_pointer, GetQuickOatCodePointer(pointer_size));
-  uint32_t offset =
-      reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].vmap_table_offset_;
-  if (UNLIKELY(offset == 0u)) {
-    return nullptr;
-  }
-  return reinterpret_cast<const uint8_t*>(code_pointer) - offset;
-}
-
-inline CodeInfo ArtMethod::GetOptimizedCodeInfo() {
-  DCHECK(IsOptimized(sizeof(void*)));
-  const void* code_pointer = GetQuickOatCodePointer(sizeof(void*));
-  DCHECK(code_pointer != nullptr);
-  uint32_t offset =
-      reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].vmap_table_offset_;
-  const void* data =
-      reinterpret_cast<const void*>(reinterpret_cast<const uint8_t*>(code_pointer) - offset);
-  return CodeInfo(data);
-}
-
-inline const uint8_t* ArtMethod::GetNativeGcMap(size_t pointer_size) {
-  const void* code_pointer = GetQuickOatCodePointer(pointer_size);
-  if (code_pointer == nullptr) {
-    return nullptr;
-  }
-  return GetNativeGcMap(code_pointer, pointer_size);
-}
-
-inline const uint8_t* ArtMethod::GetNativeGcMap(const void* code_pointer, size_t pointer_size) {
-  DCHECK(code_pointer != nullptr);
-  DCHECK_EQ(code_pointer, GetQuickOatCodePointer(pointer_size));
-  uint32_t offset =
-      reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].gc_map_offset_;
-  if (UNLIKELY(offset == 0u)) {
-    return nullptr;
-  }
-  return reinterpret_cast<const uint8_t*>(code_pointer) - offset;
-}
-
 inline bool ArtMethod::IsRuntimeMethod() {
   return dex_method_index_ == DexFile::kDexNoIndex;
 }
@@ -367,20 +276,6 @@
   return result;
 }
 
-inline uintptr_t ArtMethod::NativeQuickPcOffset(const uintptr_t pc) {
-  const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(
-      this, sizeof(void*));
-  return pc - reinterpret_cast<uintptr_t>(code);
-}
-
-inline QuickMethodFrameInfo ArtMethod::GetQuickFrameInfo(const void* code_pointer) {
-  DCHECK(code_pointer != nullptr);
-  if (kIsDebugBuild && !IsProxyMethod()) {
-    CHECK_EQ(code_pointer, GetQuickOatCodePointer(sizeof(void*)));
-  }
-  return reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].frame_info_;
-}
-
 inline const DexFile* ArtMethod::GetDexFile() {
   return GetDexCache()->GetDexFile();
 }
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index 92648b9..f9d9077 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -180,98 +180,6 @@
   return DexFile::kDexNoIndex;
 }
 
-uint32_t ArtMethod::ToDexPc(const uintptr_t pc, bool abort_on_failure) {
-  const void* entry_point = GetQuickOatEntryPoint(sizeof(void*));
-  uint32_t sought_offset = pc - reinterpret_cast<uintptr_t>(entry_point);
-  if (IsOptimized(sizeof(void*))) {
-    CodeInfo code_info = GetOptimizedCodeInfo();
-    StackMapEncoding encoding = code_info.ExtractEncoding();
-    StackMap stack_map = code_info.GetStackMapForNativePcOffset(sought_offset, encoding);
-    if (stack_map.IsValid()) {
-      return stack_map.GetDexPc(encoding);
-    }
-  } else {
-    MappingTable table(entry_point != nullptr ?
-        GetMappingTable(EntryPointToCodePointer(entry_point), sizeof(void*)) : nullptr);
-    if (table.TotalSize() == 0) {
-      // NOTE: Special methods (see Mir2Lir::GenSpecialCase()) have an empty mapping
-      // but they have no suspend checks and, consequently, we never call ToDexPc() for them.
-      DCHECK(IsNative() || IsCalleeSaveMethod() || IsProxyMethod()) << PrettyMethod(this);
-      return DexFile::kDexNoIndex;   // Special no mapping case
-    }
-    // Assume the caller wants a pc-to-dex mapping so check here first.
-    typedef MappingTable::PcToDexIterator It;
-    for (It cur = table.PcToDexBegin(), end = table.PcToDexEnd(); cur != end; ++cur) {
-      if (cur.NativePcOffset() == sought_offset) {
-        return cur.DexPc();
-      }
-    }
-    // Now check dex-to-pc mappings.
-    typedef MappingTable::DexToPcIterator It2;
-    for (It2 cur = table.DexToPcBegin(), end = table.DexToPcEnd(); cur != end; ++cur) {
-      if (cur.NativePcOffset() == sought_offset) {
-        return cur.DexPc();
-      }
-    }
-  }
-  if (abort_on_failure) {
-      LOG(FATAL) << "Failed to find Dex offset for PC offset " << reinterpret_cast<void*>(sought_offset)
-             << "(PC " << reinterpret_cast<void*>(pc) << ", entry_point=" << entry_point
-             << " current entry_point=" << GetQuickOatEntryPoint(sizeof(void*))
-             << ") in " << PrettyMethod(this);
-  }
-  return DexFile::kDexNoIndex;
-}
-
-uintptr_t ArtMethod::ToNativeQuickPc(const uint32_t dex_pc,
-                                     bool is_for_catch_handler,
-                                     bool abort_on_failure) {
-  const void* entry_point = GetQuickOatEntryPoint(sizeof(void*));
-  if (IsOptimized(sizeof(void*))) {
-    // Optimized code does not have a mapping table. Search for the dex-to-pc
-    // mapping in stack maps.
-    CodeInfo code_info = GetOptimizedCodeInfo();
-    StackMapEncoding encoding = code_info.ExtractEncoding();
-
-    // All stack maps are stored in the same CodeItem section, safepoint stack
-    // maps first, then catch stack maps. We use `is_for_catch_handler` to select
-    // the order of iteration.
-    StackMap stack_map =
-        LIKELY(is_for_catch_handler) ? code_info.GetCatchStackMapForDexPc(dex_pc, encoding)
-                                     : code_info.GetStackMapForDexPc(dex_pc, encoding);
-    if (stack_map.IsValid()) {
-      return reinterpret_cast<uintptr_t>(entry_point) + stack_map.GetNativePcOffset(encoding);
-    }
-  } else {
-    MappingTable table(entry_point != nullptr ?
-        GetMappingTable(EntryPointToCodePointer(entry_point), sizeof(void*)) : nullptr);
-    if (table.TotalSize() == 0) {
-      DCHECK_EQ(dex_pc, 0U);
-      return 0;   // Special no mapping/pc == 0 case
-    }
-    // Assume the caller wants a dex-to-pc mapping so check here first.
-    typedef MappingTable::DexToPcIterator It;
-    for (It cur = table.DexToPcBegin(), end = table.DexToPcEnd(); cur != end; ++cur) {
-      if (cur.DexPc() == dex_pc) {
-        return reinterpret_cast<uintptr_t>(entry_point) + cur.NativePcOffset();
-      }
-    }
-    // Now check pc-to-dex mappings.
-    typedef MappingTable::PcToDexIterator It2;
-    for (It2 cur = table.PcToDexBegin(), end = table.PcToDexEnd(); cur != end; ++cur) {
-      if (cur.DexPc() == dex_pc) {
-        return reinterpret_cast<uintptr_t>(entry_point) + cur.NativePcOffset();
-      }
-    }
-  }
-
-  if (abort_on_failure) {
-    LOG(FATAL) << "Failed to find native offset for dex pc 0x" << std::hex << dex_pc
-               << " in " << PrettyMethod(this);
-  }
-  return UINTPTR_MAX;
-}
-
 uint32_t ArtMethod::FindCatchBlock(Handle<mirror::Class> exception_type,
                                    uint32_t dex_pc, bool* has_no_move_exception) {
   const DexFile::CodeItem* code_item = GetCodeItem();
@@ -322,76 +230,6 @@
   return found_dex_pc;
 }
 
-void ArtMethod::AssertPcIsWithinQuickCode(uintptr_t pc) {
-  if (IsNative() || IsRuntimeMethod() || IsProxyMethod()) {
-    return;
-  }
-  if (pc == reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc())) {
-    return;
-  }
-  const void* code = GetEntryPointFromQuickCompiledCode();
-  if (code == GetQuickInstrumentationEntryPoint()) {
-    return;
-  }
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  if (class_linker->IsQuickToInterpreterBridge(code) ||
-      class_linker->IsQuickResolutionStub(code)) {
-    return;
-  }
-  // If we are the JIT then we may have just compiled the method after the
-  // IsQuickToInterpreterBridge check.
-  jit::Jit* const jit = Runtime::Current()->GetJit();
-  if (jit != nullptr &&
-      jit->GetCodeCache()->ContainsCodePtr(reinterpret_cast<const void*>(code))) {
-    return;
-  }
-  /*
-   * During a stack walk, a return PC may point past-the-end of the code
-   * in the case that the last instruction is a call that isn't expected to
-   * return.  Thus, we check <= code + GetCodeSize().
-   *
-   * NOTE: For Thumb both pc and code are offset by 1 indicating the Thumb state.
-   */
-  CHECK(PcIsWithinQuickCode(reinterpret_cast<uintptr_t>(code), pc))
-      << PrettyMethod(this)
-      << " pc=" << std::hex << pc
-      << " code=" << code
-      << " size=" << GetCodeSize(
-          EntryPointToCodePointer(reinterpret_cast<const void*>(code)));
-}
-
-bool ArtMethod::IsEntrypointInterpreter() {
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  const void* oat_quick_code = class_linker->GetOatMethodQuickCodeFor(this);
-  return oat_quick_code == nullptr || oat_quick_code != GetEntryPointFromQuickCompiledCode();
-}
-
-const void* ArtMethod::GetQuickOatEntryPoint(size_t pointer_size) {
-  if (IsAbstract() || IsRuntimeMethod() || IsProxyMethod()) {
-    return nullptr;
-  }
-  Runtime* runtime = Runtime::Current();
-  ClassLinker* class_linker = runtime->GetClassLinker();
-  const void* code = runtime->GetInstrumentation()->GetQuickCodeFor(this, pointer_size);
-  // On failure, instead of null we get the quick-generic-jni-trampoline for native method
-  // indicating the generic JNI, or the quick-to-interpreter-bridge (but not the trampoline)
-  // for non-native methods.
-  if (class_linker->IsQuickToInterpreterBridge(code) ||
-      class_linker->IsQuickGenericJniStub(code)) {
-    return nullptr;
-  }
-  return code;
-}
-
-#ifndef NDEBUG
-uintptr_t ArtMethod::NativeQuickPcOffset(const uintptr_t pc, const void* quick_entry_point) {
-  CHECK_NE(quick_entry_point, GetQuickToInterpreterBridge());
-  CHECK_EQ(quick_entry_point,
-           Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(this, sizeof(void*)));
-  return pc - reinterpret_cast<uintptr_t>(quick_entry_point);
-}
-#endif
-
 void ArtMethod::Invoke(Thread* self, uint32_t* args, uint32_t args_size, JValue* result,
                        const char* shorty) {
   if (UNLIKELY(__builtin_frame_address(0) < self->GetStackEnd())) {
@@ -435,8 +273,9 @@
 
       // Ensure that we won't be accidentally calling quick compiled code when -Xint.
       if (kIsDebugBuild && runtime->GetInstrumentation()->IsForcedInterpretOnly()) {
-        DCHECK(!runtime->UseJit());
-        CHECK(IsEntrypointInterpreter())
+        CHECK(!runtime->UseJit());
+        const void* oat_quick_code = runtime->GetClassLinker()->GetOatMethodQuickCodeFor(this);
+        CHECK(oat_quick_code == nullptr || oat_quick_code != GetEntryPointFromQuickCompiledCode())
             << "Don't call compiled code when -Xint " << PrettyMethod(this);
       }
 
@@ -480,74 +319,6 @@
   self->PopManagedStackFragment(fragment);
 }
 
-// Counts the number of references in the parameter list of the corresponding method.
-// Note: Thus does _not_ include "this" for non-static methods.
-static uint32_t GetNumberOfReferenceArgsWithoutReceiver(ArtMethod* method)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  uint32_t shorty_len;
-  const char* shorty = method->GetShorty(&shorty_len);
-  uint32_t refs = 0;
-  for (uint32_t i = 1; i < shorty_len ; ++i) {
-    if (shorty[i] == 'L') {
-      refs++;
-    }
-  }
-  return refs;
-}
-
-QuickMethodFrameInfo ArtMethod::GetQuickFrameInfo() {
-  Runtime* runtime = Runtime::Current();
-
-  if (UNLIKELY(IsAbstract())) {
-    return runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
-  }
-
-  // This goes before IsProxyMethod since runtime methods have a null declaring class.
-  if (UNLIKELY(IsRuntimeMethod())) {
-    return runtime->GetRuntimeMethodFrameInfo(this);
-  }
-
-  // For Proxy method we add special handling for the direct method case  (there is only one
-  // direct method - constructor). Direct method is cloned from original
-  // java.lang.reflect.Proxy class together with code and as a result it is executed as usual
-  // quick compiled method without any stubs. So the frame info should be returned as it is a
-  // quick method not a stub. However, if instrumentation stubs are installed, the
-  // instrumentation->GetQuickCodeFor() returns the artQuickProxyInvokeHandler instead of an
-  // oat code pointer, thus we have to add a special case here.
-  if (UNLIKELY(IsProxyMethod())) {
-    if (IsDirect()) {
-      CHECK(IsConstructor());
-      return GetQuickFrameInfo(EntryPointToCodePointer(GetEntryPointFromQuickCompiledCode()));
-    } else {
-      return runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
-    }
-  }
-
-  const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(this, sizeof(void*));
-  ClassLinker* class_linker = runtime->GetClassLinker();
-  // On failure, instead of null we get the quick-generic-jni-trampoline for native method
-  // indicating the generic JNI, or the quick-to-interpreter-bridge (but not the trampoline)
-  // for non-native methods. And we really shouldn't see a failure for non-native methods here.
-  DCHECK(!class_linker->IsQuickToInterpreterBridge(entry_point));
-
-  if (class_linker->IsQuickGenericJniStub(entry_point)) {
-    // Generic JNI frame.
-    DCHECK(IsNative());
-    uint32_t handle_refs = GetNumberOfReferenceArgsWithoutReceiver(this) + 1;
-    size_t scope_size = HandleScope::SizeOf(handle_refs);
-    QuickMethodFrameInfo callee_info = runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
-
-    // Callee saves + handle scope + method ref + alignment
-    // Note: -sizeof(void*) since callee-save frame stores a whole method pointer.
-    size_t frame_size = RoundUp(callee_info.FrameSizeInBytes() - sizeof(void*) +
-                                sizeof(ArtMethod*) + scope_size, kStackAlignment);
-    return QuickMethodFrameInfo(frame_size, callee_info.CoreSpillMask(), callee_info.FpSpillMask());
-  }
-
-  const void* code_pointer = EntryPointToCodePointer(entry_point);
-  return GetQuickFrameInfo(code_pointer);
-}
-
 void ArtMethod::RegisterNative(const void* native_method, bool is_fast) {
   CHECK(IsNative()) << PrettyMethod(this);
   CHECK(!IsFastNative()) << PrettyMethod(this);
@@ -590,16 +361,6 @@
   return true;
 }
 
-const uint8_t* ArtMethod::GetQuickenedInfo() {
-  bool found = false;
-  OatFile::OatMethod oat_method =
-      Runtime::Current()->GetClassLinker()->FindOatMethodFor(this, &found);
-  if (!found || (oat_method.GetQuickCode() != nullptr)) {
-    return nullptr;
-  }
-  return oat_method.GetVmapTable();
-}
-
 ProfilingInfo* ArtMethod::CreateProfilingInfo() {
   DCHECK(!Runtime::Current()->IsAotCompiler());
   ProfilingInfo* info = ProfilingInfo::Create(this);
@@ -613,4 +374,14 @@
   }
 }
 
+const uint8_t* ArtMethod::GetQuickenedInfo() {
+  bool found = false;
+  OatFile::OatMethod oat_method =
+      Runtime::Current()->GetClassLinker()->FindOatMethodFor(this, &found);
+  if (!found || (oat_method.GetQuickCode() != nullptr)) {
+    return nullptr;
+  }
+  return oat_method.GetVmapTable();
+}
+
 }  // namespace art
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 0315c3a..3c58644 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_ART_METHOD_H_
 #define ART_RUNTIME_ART_METHOD_H_
 
+#include "base/bit_utils.h"
 #include "base/casts.h"
 #include "dex_file.h"
 #include "gc_root.h"
@@ -24,10 +25,8 @@
 #include "method_reference.h"
 #include "modifiers.h"
 #include "mirror/object.h"
-#include "quick/quick_method_frame_info.h"
 #include "read_barrier_option.h"
 #include "stack.h"
-#include "stack_map.h"
 #include "utils.h"
 
 namespace art {
@@ -164,16 +163,6 @@
     SetAccessFlags(GetAccessFlags() | kAccPreverified);
   }
 
-  bool IsOptimized(size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_) {
-    // Temporary solution for detecting if a method has been optimized: the compiler
-    // does not create a GC map. Instead, the vmap table contains the stack map
-    // (as in stack_map.h).
-    return !IsNative()
-        && GetEntryPointFromQuickCompiledCodePtrSize(pointer_size) != nullptr
-        && GetQuickOatCodePointer(pointer_size) != nullptr
-        && GetNativeGcMap(pointer_size) == nullptr;
-  }
-
   bool CheckIncompatibleClassChange(InvokeType type) SHARED_REQUIRES(Locks::mutator_lock_);
 
   uint16_t GetMethodIndex() SHARED_REQUIRES(Locks::mutator_lock_);
@@ -280,94 +269,6 @@
                      entry_point_from_quick_compiled_code, pointer_size);
   }
 
-  uint32_t GetCodeSize() SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Check whether the given PC is within the quick compiled code associated with this method's
-  // quick entrypoint. This code isn't robust for instrumentation, etc. and is only used for
-  // debug purposes.
-  bool PcIsWithinQuickCode(uintptr_t pc) {
-    return PcIsWithinQuickCode(
-        reinterpret_cast<uintptr_t>(GetEntryPointFromQuickCompiledCode()), pc);
-  }
-
-  void AssertPcIsWithinQuickCode(uintptr_t pc) SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Returns true if the entrypoint points to the interpreter, as
-  // opposed to the compiled code, that is, this method will be
-  // interpretered on invocation.
-  bool IsEntrypointInterpreter() SHARED_REQUIRES(Locks::mutator_lock_);
-
-  uint32_t GetQuickOatCodeOffset();
-  void SetQuickOatCodeOffset(uint32_t code_offset);
-
-  ALWAYS_INLINE static const void* EntryPointToCodePointer(const void* entry_point) {
-    uintptr_t code = reinterpret_cast<uintptr_t>(entry_point);
-    // TODO: Make this Thumb2 specific. It is benign on other architectures as code is always at
-    //       least 2 byte aligned.
-    code &= ~0x1;
-    return reinterpret_cast<const void*>(code);
-  }
-
-  // Actual entry point pointer to compiled oat code or null.
-  const void* GetQuickOatEntryPoint(size_t pointer_size)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  // Actual pointer to compiled oat code or null.
-  const void* GetQuickOatCodePointer(size_t pointer_size)
-      SHARED_REQUIRES(Locks::mutator_lock_) {
-    return EntryPointToCodePointer(GetQuickOatEntryPoint(pointer_size));
-  }
-
-  // Callers should wrap the uint8_t* in a MappingTable instance for convenient access.
-  const uint8_t* GetMappingTable(size_t pointer_size)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  const uint8_t* GetMappingTable(const void* code_pointer, size_t pointer_size)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Callers should wrap the uint8_t* in a VmapTable instance for convenient access.
-  const uint8_t* GetVmapTable(size_t pointer_size)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  const uint8_t* GetVmapTable(const void* code_pointer, size_t pointer_size)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  const uint8_t* GetQuickenedInfo() SHARED_REQUIRES(Locks::mutator_lock_);
-
-  CodeInfo GetOptimizedCodeInfo() SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Callers should wrap the uint8_t* in a GcMap instance for convenient access.
-  const uint8_t* GetNativeGcMap(size_t pointer_size)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  const uint8_t* GetNativeGcMap(const void* code_pointer, size_t pointer_size)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  template <bool kCheckFrameSize = true>
-  uint32_t GetFrameSizeInBytes() SHARED_REQUIRES(Locks::mutator_lock_) {
-    uint32_t result = GetQuickFrameInfo().FrameSizeInBytes();
-    if (kCheckFrameSize) {
-      DCHECK_LE(static_cast<size_t>(kStackAlignment), result);
-    }
-    return result;
-  }
-
-  QuickMethodFrameInfo GetQuickFrameInfo() SHARED_REQUIRES(Locks::mutator_lock_);
-  QuickMethodFrameInfo GetQuickFrameInfo(const void* code_pointer)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  FrameOffset GetReturnPcOffset() SHARED_REQUIRES(Locks::mutator_lock_) {
-    return GetReturnPcOffset(GetFrameSizeInBytes());
-  }
-
-  FrameOffset GetReturnPcOffset(uint32_t frame_size_in_bytes)
-      SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK_EQ(frame_size_in_bytes, GetFrameSizeInBytes());
-    return FrameOffset(frame_size_in_bytes - sizeof(void*));
-  }
-
-  FrameOffset GetHandleScopeOffset() SHARED_REQUIRES(Locks::mutator_lock_) {
-    constexpr size_t handle_scope_offset = sizeof(ArtMethod*);
-    DCHECK_LT(handle_scope_offset, GetFrameSizeInBytes());
-    return FrameOffset(handle_scope_offset);
-  }
-
   void RegisterNative(const void* native_method, bool is_fast)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -429,27 +330,6 @@
 
   bool IsImtUnimplementedMethod() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  uintptr_t NativeQuickPcOffset(const uintptr_t pc) SHARED_REQUIRES(Locks::mutator_lock_);
-#ifdef NDEBUG
-  uintptr_t NativeQuickPcOffset(const uintptr_t pc, const void* quick_entry_point)
-      SHARED_REQUIRES(Locks::mutator_lock_) {
-    return pc - reinterpret_cast<uintptr_t>(quick_entry_point);
-  }
-#else
-  uintptr_t NativeQuickPcOffset(const uintptr_t pc, const void* quick_entry_point)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-#endif
-
-  // Converts a native PC to a dex PC.
-  uint32_t ToDexPc(const uintptr_t pc, bool abort_on_failure = true)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Converts a dex PC to a native PC.
-  uintptr_t ToNativeQuickPc(const uint32_t dex_pc,
-                            bool is_for_catch_handler,
-                            bool abort_on_failure = true)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
   MethodReference ToMethodReference() SHARED_REQUIRES(Locks::mutator_lock_) {
     return MethodReference(GetDexFile(), GetDexMethodIndex());
   }
@@ -542,6 +422,8 @@
     return ++hotness_count_;
   }
 
+  const uint8_t* GetQuickenedInfo() SHARED_REQUIRES(Locks::mutator_lock_);
+
  protected:
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
   // The class we are a part of.
@@ -622,24 +504,6 @@
     }
   }
 
-  // Code points to the start of the quick code.
-  static uint32_t GetCodeSize(const void* code);
-
-  static bool PcIsWithinQuickCode(uintptr_t code, uintptr_t pc) {
-    if (code == 0) {
-      return pc == 0;
-    }
-    /*
-     * During a stack walk, a return PC may point past-the-end of the code
-     * in the case that the last instruction is a call that isn't expected to
-     * return.  Thus, we check <= code + GetCodeSize().
-     *
-     * NOTE: For Thumb both pc and code are offset by 1 indicating the Thumb state.
-     */
-    return code <= pc && pc <= code + GetCodeSize(
-        EntryPointToCodePointer(reinterpret_cast<const void*>(code)));
-  }
-
   DISALLOW_COPY_AND_ASSIGN(ArtMethod);  // Need to use CopyFrom to deal with 32 vs 64 bits.
 };
 
diff --git a/runtime/check_reference_map_visitor.h b/runtime/check_reference_map_visitor.h
index 7965cd7..e897351 100644
--- a/runtime/check_reference_map_visitor.h
+++ b/runtime/check_reference_map_visitor.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_CHECK_REFERENCE_MAP_VISITOR_H_
 #define ART_RUNTIME_CHECK_REFERENCE_MAP_VISITOR_H_
 
+#include "art_code.h"
 #include "art_method-inl.h"
 #include "gc_map.h"
 #include "scoped_thread_state_change.h"
@@ -53,7 +54,7 @@
 
   void CheckReferences(int* registers, int number_of_references, uint32_t native_pc_offset)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (GetMethod()->IsOptimized(sizeof(void*))) {
+    if (GetCurrentCode().IsOptimized(sizeof(void*))) {
       CheckOptimizedMethod(registers, number_of_references, native_pc_offset);
     } else {
       CheckQuickMethod(registers, number_of_references, native_pc_offset);
@@ -64,7 +65,7 @@
   void CheckOptimizedMethod(int* registers, int number_of_references, uint32_t native_pc_offset)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     ArtMethod* m = GetMethod();
-    CodeInfo code_info = m->GetOptimizedCodeInfo();
+    CodeInfo code_info = GetCurrentCode().GetOptimizedCodeInfo();
     StackMapEncoding encoding = code_info.ExtractEncoding();
     StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
     uint16_t number_of_dex_registers = m->GetCodeItem()->registers_size_;
@@ -108,7 +109,7 @@
   void CheckQuickMethod(int* registers, int number_of_references, uint32_t native_pc_offset)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     ArtMethod* m = GetMethod();
-    NativePcOffsetToReferenceMap map(m->GetNativeGcMap(sizeof(void*)));
+    NativePcOffsetToReferenceMap map(GetCurrentCode().GetNativeGcMap(sizeof(void*)));
     const uint8_t* ref_bitmap = map.FindBitMap(native_pc_offset);
     CHECK(ref_bitmap);
     for (int i = 0; i < number_of_references; ++i) {
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 6fa8fc1..9349fe3 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1023,9 +1023,9 @@
   class_roots_.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
   VisitClassRoots(visitor, flags);
   array_iftable_.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
-  for (GcRoot<mirror::Class>& root : find_array_class_cache_) {
-    root.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
-  }
+  // Instead of visiting the find_array_class_cache_ drop it so that it doesn't prevent class
+  // unloading if we are marking roots.
+  DropFindArrayClassCache();
 }
 
 class VisitClassLoaderClassesVisitor : public ClassLoaderVisitor {
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index f66628d..21e4e44 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -34,6 +34,7 @@
 #include "mirror/throwable.h"
 #include "nth_caller_visitor.h"
 #include "runtime.h"
+#include "stack_map.h"
 #include "thread.h"
 
 namespace art {
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index f193999..17e6aac 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -16,6 +16,7 @@
 
 #include "entrypoints/entrypoint_utils.h"
 
+#include "art_code.h"
 #include "art_field-inl.h"
 #include "art_method-inl.h"
 #include "base/mutex.h"
@@ -358,16 +359,17 @@
   const size_t callee_frame_size = GetCalleeSaveFrameSize(kRuntimeISA, type);
   auto** caller_sp = reinterpret_cast<ArtMethod**>(
       reinterpret_cast<uintptr_t>(sp) + callee_frame_size);
+  ArtCode current_code = GetCallingCodeFrom(caller_sp);
   ArtMethod* outer_method = *caller_sp;
   ArtMethod* caller = outer_method;
 
-  if ((outer_method != nullptr) && outer_method->IsOptimized(sizeof(void*))) {
+  if ((outer_method != nullptr) && current_code.IsOptimized(sizeof(void*))) {
     const size_t callee_return_pc_offset = GetCalleeSaveReturnPcOffset(kRuntimeISA, type);
     uintptr_t caller_pc = *reinterpret_cast<uintptr_t*>(
         (reinterpret_cast<uint8_t*>(sp) + callee_return_pc_offset));
     if (LIKELY(caller_pc != reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc()))) {
-      uintptr_t native_pc_offset = outer_method->NativeQuickPcOffset(caller_pc);
-      CodeInfo code_info = outer_method->GetOptimizedCodeInfo();
+      uintptr_t native_pc_offset = current_code.NativeQuickPcOffset(caller_pc);
+      CodeInfo code_info = current_code.GetOptimizedCodeInfo();
       StackMapEncoding encoding = code_info.ExtractEncoding();
       StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
       DCHECK(stack_map.IsValid());
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index 4217cab..171ace2 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -20,6 +20,7 @@
 #include <jni.h>
 #include <stdint.h>
 
+#include "art_code.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "dex_instruction.h"
@@ -184,6 +185,10 @@
                                      Runtime::CalleeSaveType type,
                                      bool do_caller_check = false);
 
+inline ArtCode GetCallingCodeFrom(ArtMethod** sp) {
+  return ArtCode(sp);
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_ENTRYPOINTS_ENTRYPOINT_UTILS_H_
diff --git a/runtime/entrypoints/quick/quick_lock_entrypoints.cc b/runtime/entrypoints/quick/quick_lock_entrypoints.cc
index 3bf001e..4adb39b 100644
--- a/runtime/entrypoints/quick/quick_lock_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_lock_entrypoints.cc
@@ -21,8 +21,9 @@
 namespace art {
 
 extern "C" int artLockObjectFromCode(mirror::Object* obj, Thread* self)
-    SHARED_REQUIRES(Locks::mutator_lock_)
-    NO_THREAD_SAFETY_ANALYSIS /* EXCLUSIVE_LOCK_FUNCTION(Monitor::monitor_lock_) */ {
+    NO_THREAD_SAFETY_ANALYSIS
+    REQUIRES(!Roles::uninterruptible_)
+    SHARED_REQUIRES(Locks::mutator_lock_) /* EXCLUSIVE_LOCK_FUNCTION(Monitor::monitor_lock_) */ {
   ScopedQuickEntrypointChecks sqec(self);
   if (UNLIKELY(obj == nullptr)) {
     ThrowNullPointerException("Null reference used for synchronization (monitor-enter)");
@@ -41,8 +42,9 @@
 }
 
 extern "C" int artUnlockObjectFromCode(mirror::Object* obj, Thread* self)
-    SHARED_REQUIRES(Locks::mutator_lock_)
-    NO_THREAD_SAFETY_ANALYSIS /* UNLOCK_FUNCTION(Monitor::monitor_lock_) */ {
+    NO_THREAD_SAFETY_ANALYSIS
+    REQUIRES(!Roles::uninterruptible_)
+    SHARED_REQUIRES(Locks::mutator_lock_) /* UNLOCK_FUNCTION(Monitor::monitor_lock_) */ {
   ScopedQuickEntrypointChecks sqec(self);
   if (UNLIKELY(obj == nullptr)) {
     ThrowNullPointerException("Null reference used for synchronization (monitor-exit)");
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 5d3ac73..377675e 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "art_code.h"
 #include "art_method-inl.h"
 #include "callee_save_frame.h"
 #include "common_throws.h"
@@ -294,7 +295,8 @@
   static mirror::Object* GetProxyThisObject(ArtMethod** sp)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     CHECK((*sp)->IsProxyMethod());
-    CHECK_EQ(kQuickCalleeSaveFrame_RefAndArgs_FrameSize, (*sp)->GetFrameSizeInBytes());
+    CHECK_EQ(kQuickCalleeSaveFrame_RefAndArgs_FrameSize,
+             GetCallingCodeFrom(sp).GetFrameSizeInBytes());
     CHECK_GT(kNumQuickGprArgs, 0u);
     constexpr uint32_t kThisGprIndex = 0u;  // 'this' is in the 1st GPR.
     size_t this_arg_offset = kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset +
@@ -320,12 +322,11 @@
     const size_t callee_frame_size = GetCalleeSaveFrameSize(kRuntimeISA, Runtime::kRefsAndArgs);
     ArtMethod** caller_sp = reinterpret_cast<ArtMethod**>(
         reinterpret_cast<uintptr_t>(sp) + callee_frame_size);
-    ArtMethod* outer_method = *caller_sp;
     uintptr_t outer_pc = QuickArgumentVisitor::GetCallingPc(sp);
-    uintptr_t outer_pc_offset = outer_method->NativeQuickPcOffset(outer_pc);
+    uintptr_t outer_pc_offset = GetCallingCodeFrom(caller_sp).NativeQuickPcOffset(outer_pc);
 
-    if (outer_method->IsOptimized(sizeof(void*))) {
-      CodeInfo code_info = outer_method->GetOptimizedCodeInfo();
+    if (GetCallingCodeFrom(caller_sp).IsOptimized(sizeof(void*))) {
+      CodeInfo code_info = GetCallingCodeFrom(caller_sp).GetOptimizedCodeInfo();
       StackMapEncoding encoding = code_info.ExtractEncoding();
       StackMap stack_map = code_info.GetStackMapForNativePcOffset(outer_pc_offset, encoding);
       DCHECK(stack_map.IsValid());
@@ -336,7 +337,7 @@
         return stack_map.GetDexPc(encoding);
       }
     } else {
-      return outer_method->ToDexPc(outer_pc);
+      return GetCallingCodeFrom(caller_sp).ToDexPc(outer_pc);
     }
   }
 
@@ -719,7 +720,7 @@
     uint16_t num_regs = code_item->registers_size_;
     // No last shadow coming from quick.
     ShadowFrameAllocaUniquePtr shadow_frame_unique_ptr =
-        CREATE_SHADOW_FRAME(num_regs, nullptr, method, 0);
+        CREATE_SHADOW_FRAME(num_regs, /* link */ nullptr, method, /* dex pc */ 0);
     ShadowFrame* shadow_frame = shadow_frame_unique_ptr.get();
     size_t first_arg_reg = code_item->registers_size_ - code_item->ins_size_;
     BuildQuickShadowFrameVisitor shadow_frame_builder(sp, method->IsStatic(), shorty, shorty_len,
@@ -841,8 +842,9 @@
       self->StartAssertNoThreadSuspension("Adding to IRT proxy object arguments");
   // Register the top of the managed stack, making stack crawlable.
   DCHECK_EQ((*sp), proxy_method) << PrettyMethod(proxy_method);
-  DCHECK_EQ(proxy_method->GetFrameSizeInBytes(),
-            Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs)->GetFrameSizeInBytes())
+  DCHECK_EQ(GetCallingCodeFrom(sp).GetFrameSizeInBytes(),
+            ArtCode(Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs))
+                .GetFrameSizeInBytes())
       << PrettyMethod(proxy_method);
   self->VerifyStack();
   // Start new JNI local reference state.
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
index 0b36694..5299394 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
@@ -49,7 +49,7 @@
   static void CheckFrameSize(InstructionSet isa, Runtime::CalleeSaveType type, uint32_t save_size)
       NO_THREAD_SAFETY_ANALYSIS {
     ArtMethod* save_method = CreateCalleeSaveMethod(isa, type);
-    QuickMethodFrameInfo frame_info = save_method->GetQuickFrameInfo();
+    QuickMethodFrameInfo frame_info = ArtCode(save_method).GetQuickFrameInfo();
     EXPECT_EQ(frame_info.FrameSizeInBytes(), save_size) << "Expected and real size differs for "
         << type << " core spills=" << std::hex << frame_info.CoreSpillMask() << " fp spills="
         << frame_info.FpSpillMask() << std::dec << " ISA " << isa;
@@ -58,8 +58,8 @@
   static void CheckPCOffset(InstructionSet isa, Runtime::CalleeSaveType type, size_t pc_offset)
       NO_THREAD_SAFETY_ANALYSIS {
     ArtMethod* save_method = CreateCalleeSaveMethod(isa, type);
-    QuickMethodFrameInfo frame_info = save_method->GetQuickFrameInfo();
-    EXPECT_EQ(save_method->GetReturnPcOffset().SizeValue(), pc_offset)
+    QuickMethodFrameInfo frame_info = ArtCode(save_method).GetQuickFrameInfo();
+    EXPECT_EQ(ArtCode(save_method).GetReturnPcOffset().SizeValue(), pc_offset)
         << "Expected and real pc offset differs for " << type
         << " core spills=" << std::hex << frame_info.CoreSpillMask()
         << " fp spills=" << frame_info.FpSpillMask() << std::dec << " ISA " << isa;
diff --git a/runtime/exception_test.cc b/runtime/exception_test.cc
index 9f84bd2..da1d80e 100644
--- a/runtime/exception_test.cc
+++ b/runtime/exception_test.cc
@@ -169,7 +169,7 @@
   r->SetInstructionSet(kRuntimeISA);
   ArtMethod* save_method = r->CreateCalleeSaveMethod();
   r->SetCalleeSaveMethod(save_method, Runtime::kSaveAll);
-  QuickMethodFrameInfo frame_info = save_method->GetQuickFrameInfo();
+  QuickMethodFrameInfo frame_info = ArtCode(save_method).GetQuickFrameInfo();
 
   ASSERT_EQ(kStackAlignment, 16U);
   // ASSERT_EQ(sizeof(uintptr_t), sizeof(uint32_t));
@@ -187,14 +187,14 @@
   }
 
   fake_stack.push_back(
-      method_g_->ToNativeQuickPc(dex_pc, /* is_catch_handler */ false));  // return pc
+      ArtCode(method_g_).ToNativeQuickPc(dex_pc, /* is_catch_handler */ false));  // return pc
 
   // Create/push fake 16byte stack frame for method g
   fake_stack.push_back(reinterpret_cast<uintptr_t>(method_g_));
   fake_stack.push_back(0);
   fake_stack.push_back(0);
   fake_stack.push_back(
-      method_g_->ToNativeQuickPc(dex_pc, /* is_catch_handler */ false));  // return pc
+      ArtCode(method_g_).ToNativeQuickPc(dex_pc, /* is_catch_handler */ false));  // return pc
 
   // Create/push fake 16byte stack frame for method f
   fake_stack.push_back(reinterpret_cast<uintptr_t>(method_f_));
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index c3a9627..30a0983 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -20,6 +20,7 @@
 #include <sys/mman.h>
 #include <sys/ucontext.h>
 
+#include "art_code.h"
 #include "art_method-inl.h"
 #include "base/stl_util.h"
 #include "mirror/class.h"
@@ -359,16 +360,17 @@
     return false;
   }
 
+  ArtCode art_code(method_obj);
+
   // We can be certain that this is a method now.  Check if we have a GC map
   // at the return PC address.
   if (true || kIsDebugBuild) {
     VLOG(signals) << "looking for dex pc for return pc " << std::hex << return_pc;
-    const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(method_obj,
-                                                                                 sizeof(void*));
-    uint32_t sought_offset = return_pc - reinterpret_cast<uintptr_t>(code);
+    uint32_t sought_offset = return_pc -
+        reinterpret_cast<uintptr_t>(art_code.GetQuickOatEntryPoint(sizeof(void*)));
     VLOG(signals) << "pc offset: " << std::hex << sought_offset;
   }
-  uint32_t dexpc = method_obj->ToDexPc(return_pc, false);
+  uint32_t dexpc = art_code.ToDexPc(return_pc, false);
   VLOG(signals) << "dexpc: " << dexpc;
   return !check_dex_pc || dexpc != DexFile::kDexNoIndex;
 }
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index deada4c..2dd2b7d 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -19,6 +19,7 @@
 #include <sstream>
 
 #include "arch/context.h"
+#include "art_code.h"
 #include "art_method-inl.h"
 #include "atomic.h"
 #include "class_linker.h"
@@ -251,7 +252,7 @@
         instrumentation_stack_->insert(it, instrumentation_frame);
         SetReturnPc(instrumentation_exit_pc_);
       }
-      dex_pcs_.push_back(m->ToDexPc(last_return_pc_));
+      dex_pcs_.push_back(GetCurrentCode().ToDexPc(last_return_pc_));
       last_return_pc_ = return_pc;
       ++instrumentation_stack_depth_;
       return true;  // Continue.
@@ -960,6 +961,15 @@
   }
 }
 
+// Computes a frame ID by ignoring inlined frames.
+size_t Instrumentation::ComputeFrameId(Thread* self,
+                                       size_t frame_depth,
+                                       size_t inlined_frames_before_frame) {
+  CHECK_GE(frame_depth, inlined_frames_before_frame);
+  size_t no_inline_depth = frame_depth - inlined_frames_before_frame;
+  return StackVisitor::ComputeNumFrames(self, kInstrumentationStackWalk) - no_inline_depth;
+}
+
 static void CheckStackDepth(Thread* self, const InstrumentationStackFrame& instrumentation_frame,
                             int delta)
     SHARED_REQUIRES(Locks::mutator_lock_) {
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 612ca14..8dd2357 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -397,6 +397,11 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!Locks::thread_list_lock_);
 
+  static size_t ComputeFrameId(Thread* self,
+                               size_t frame_depth,
+                               size_t inlined_frames_before_frame)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   InstrumentationLevel GetCurrentInstrumentationLevel() const;
 
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 2a76f94..b010504 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -333,7 +333,7 @@
   // Set up shadow frame with matching number of reference slots to vregs.
   ShadowFrame* last_shadow_frame = self->GetManagedStack()->GetTopShadowFrame();
   ShadowFrameAllocaUniquePtr shadow_frame_unique_ptr =
-      CREATE_SHADOW_FRAME(num_regs, last_shadow_frame, method, 0);
+      CREATE_SHADOW_FRAME(num_regs, last_shadow_frame, method, /* dex pc */ 0);
   ShadowFrame* shadow_frame = shadow_frame_unique_ptr.get();
   self->PushShadowFrame(shadow_frame);
 
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 6f5b843..44eb29e 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -619,7 +619,7 @@
   // Allocate shadow frame on the stack.
   const char* old_cause = self->StartAssertNoThreadSuspension("DoCallCommon");
   ShadowFrameAllocaUniquePtr shadow_frame_unique_ptr =
-      CREATE_SHADOW_FRAME(num_regs, &shadow_frame, called_method, 0);
+      CREATE_SHADOW_FRAME(num_regs, &shadow_frame, called_method, /* dex pc */ 0);
   ShadowFrame* new_shadow_frame = shadow_frame_unique_ptr.get();
 
   // Initialize new shadow frame by copying the registers from the callee shadow frame.
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index f57bddb..8c495fc 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -45,6 +45,7 @@
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/string-inl.h"
+#include "stack.h"
 #include "thread.h"
 #include "well_known_classes.h"
 
@@ -79,12 +80,28 @@
 void ThrowNullPointerExceptionFromInterpreter()
     SHARED_REQUIRES(Locks::mutator_lock_);
 
-static inline void DoMonitorEnter(Thread* self, Object* ref) NO_THREAD_SAFETY_ANALYSIS {
-  ref->MonitorEnter(self);
+template <bool kMonitorCounting>
+static inline void DoMonitorEnter(Thread* self,
+                                  ShadowFrame* frame,
+                                  Object* ref)
+    NO_THREAD_SAFETY_ANALYSIS
+    REQUIRES(!Roles::uninterruptible_) {
+  StackHandleScope<1> hs(self);
+  Handle<Object> h_ref(hs.NewHandle(ref));
+  h_ref->MonitorEnter(self);
+  frame->GetLockCountData().AddMonitor<kMonitorCounting>(self, h_ref.Get());
 }
 
-static inline void DoMonitorExit(Thread* self, Object* ref) NO_THREAD_SAFETY_ANALYSIS {
-  ref->MonitorExit(self);
+template <bool kMonitorCounting>
+static inline void DoMonitorExit(Thread* self,
+                                 ShadowFrame* frame,
+                                 Object* ref)
+    NO_THREAD_SAFETY_ANALYSIS
+    REQUIRES(!Roles::uninterruptible_) {
+  StackHandleScope<1> hs(self);
+  Handle<Object> h_ref(hs.NewHandle(ref));
+  h_ref->MonitorExit(self);
+  frame->GetLockCountData().RemoveMonitorOrThrow<kMonitorCounting>(self, h_ref.Get());
 }
 
 void AbortTransactionF(Thread* self, const char* fmt, ...)
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 9677d79..4265b50 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -89,6 +89,11 @@
       UnexpectedOpcode(inst, shadow_frame);                                                       \
   } HANDLE_INSTRUCTION_END();
 
+#define HANDLE_MONITOR_CHECKS()                                                                   \
+  if (!shadow_frame.GetLockCountData().                                                           \
+          CheckAllMonitorsReleasedOrThrow<do_assignability_check>(self)) {                        \
+    HANDLE_PENDING_EXCEPTION();                                                                   \
+  }
 
 /**
  * Interpreter based on computed goto tables.
@@ -275,6 +280,7 @@
   HANDLE_INSTRUCTION_START(RETURN_VOID_NO_BARRIER) {
     JValue result;
     self->AllowThreadSuspension();
+    HANDLE_MONITOR_CHECKS();
     instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
@@ -289,6 +295,7 @@
     QuasiAtomic::ThreadFenceForConstructor();
     JValue result;
     self->AllowThreadSuspension();
+    HANDLE_MONITOR_CHECKS();
     instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
@@ -304,6 +311,7 @@
     result.SetJ(0);
     result.SetI(shadow_frame.GetVReg(inst->VRegA_11x(inst_data)));
     self->AllowThreadSuspension();
+    HANDLE_MONITOR_CHECKS();
     instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
@@ -318,6 +326,7 @@
     JValue result;
     result.SetJ(shadow_frame.GetVRegLong(inst->VRegA_11x(inst_data)));
     self->AllowThreadSuspension();
+    HANDLE_MONITOR_CHECKS();
     instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
@@ -331,6 +340,7 @@
   HANDLE_INSTRUCTION_START(RETURN_OBJECT) {
     JValue result;
     self->AllowThreadSuspension();
+    HANDLE_MONITOR_CHECKS();
     const uint8_t vreg_index = inst->VRegA_11x(inst_data);
     Object* obj_result = shadow_frame.GetVRegReference(vreg_index);
     if (do_assignability_check && obj_result != nullptr) {
@@ -468,7 +478,7 @@
       ThrowNullPointerExceptionFromInterpreter();
       HANDLE_PENDING_EXCEPTION();
     } else {
-      DoMonitorEnter(self, obj);
+      DoMonitorEnter<do_access_check>(self, &shadow_frame, obj);
       POSSIBLY_HANDLE_PENDING_EXCEPTION(self->IsExceptionPending(), 1);
     }
   }
@@ -480,7 +490,7 @@
       ThrowNullPointerExceptionFromInterpreter();
       HANDLE_PENDING_EXCEPTION();
     } else {
-      DoMonitorExit(self, obj);
+      DoMonitorExit<do_access_check>(self, &shadow_frame, obj);
       POSSIBLY_HANDLE_PENDING_EXCEPTION(self->IsExceptionPending(), 1);
     }
   }
@@ -2544,6 +2554,8 @@
     uint32_t found_dex_pc = FindNextInstructionFollowingException(self, shadow_frame, dex_pc,
                                                                   instrumentation);
     if (found_dex_pc == DexFile::kDexNoIndex) {
+      // Structured locking is to be enforced for abnormal termination, too.
+      shadow_frame.GetLockCountData().CheckAllMonitorsReleasedOrThrow<do_assignability_check>(self);
       return JValue(); /* Handled in caller. */
     } else {
       int32_t displacement = static_cast<int32_t>(found_dex_pc) - static_cast<int32_t>(dex_pc);
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 083dfb5..76d4bb0fc 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -31,6 +31,9 @@
                                                                   inst->GetDexPc(insns),        \
                                                                   instrumentation);             \
     if (found_dex_pc == DexFile::kDexNoIndex) {                                                 \
+      /* Structured locking is to be enforced for abnormal termination, too. */                 \
+      shadow_frame.GetLockCountData().                                                          \
+          CheckAllMonitorsReleasedOrThrow<do_assignability_check>(self);                        \
       return JValue(); /* Handled in caller. */                                                 \
     } else {                                                                                    \
       int32_t displacement = static_cast<int32_t>(found_dex_pc) - static_cast<int32_t>(dex_pc); \
@@ -47,6 +50,12 @@
     }                                                                             \
   } while (false)
 
+#define HANDLE_MONITOR_CHECKS()                                                                   \
+  if (!shadow_frame.GetLockCountData().                                                           \
+          CheckAllMonitorsReleasedOrThrow<do_assignability_check>(self)) {                        \
+    HANDLE_PENDING_EXCEPTION();                                                                   \
+  }
+
 // Code to run before each dex instruction.
 #define PREAMBLE()                                                                              \
   do {                                                                                          \
@@ -182,6 +191,7 @@
         PREAMBLE();
         JValue result;
         self->AllowThreadSuspension();
+        HANDLE_MONITOR_CHECKS();
         if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
@@ -194,6 +204,7 @@
         QuasiAtomic::ThreadFenceForConstructor();
         JValue result;
         self->AllowThreadSuspension();
+        HANDLE_MONITOR_CHECKS();
         if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
@@ -207,6 +218,7 @@
         result.SetJ(0);
         result.SetI(shadow_frame.GetVReg(inst->VRegA_11x(inst_data)));
         self->AllowThreadSuspension();
+        HANDLE_MONITOR_CHECKS();
         if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
@@ -219,6 +231,7 @@
         JValue result;
         result.SetJ(shadow_frame.GetVRegLong(inst->VRegA_11x(inst_data)));
         self->AllowThreadSuspension();
+        HANDLE_MONITOR_CHECKS();
         if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
@@ -230,6 +243,7 @@
         PREAMBLE();
         JValue result;
         self->AllowThreadSuspension();
+        HANDLE_MONITOR_CHECKS();
         const size_t ref_idx = inst->VRegA_11x(inst_data);
         Object* obj_result = shadow_frame.GetVRegReference(ref_idx);
         if (do_assignability_check && obj_result != nullptr) {
@@ -366,7 +380,7 @@
           ThrowNullPointerExceptionFromInterpreter();
           HANDLE_PENDING_EXCEPTION();
         } else {
-          DoMonitorEnter(self, obj);
+          DoMonitorEnter<do_assignability_check>(self, &shadow_frame, obj);
           POSSIBLY_HANDLE_PENDING_EXCEPTION(self->IsExceptionPending(), Next_1xx);
         }
         break;
@@ -378,7 +392,7 @@
           ThrowNullPointerExceptionFromInterpreter();
           HANDLE_PENDING_EXCEPTION();
         } else {
-          DoMonitorExit(self, obj);
+          DoMonitorExit<do_assignability_check>(self, &shadow_frame, obj);
           POSSIBLY_HANDLE_PENDING_EXCEPTION(self->IsExceptionPending(), Next_1xx);
         }
         break;
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 50490bb..f75b8ae 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -137,9 +137,13 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   uint32_t GetLockOwnerThreadId();
 
-  mirror::Object* MonitorEnter(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_)
-      EXCLUSIVE_LOCK_FUNCTION();
-  bool MonitorExit(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_)
+  mirror::Object* MonitorEnter(Thread* self)
+      EXCLUSIVE_LOCK_FUNCTION()
+      REQUIRES(!Roles::uninterruptible_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  bool MonitorExit(Thread* self)
+      REQUIRES(!Roles::uninterruptible_)
+      SHARED_REQUIRES(Locks::mutator_lock_)
       UNLOCK_FUNCTION();
   void Notify(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_);
   void NotifyAll(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index fa58418..255a0f2 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -696,6 +696,7 @@
 mirror::Object* Monitor::MonitorEnter(Thread* self, mirror::Object* obj) {
   DCHECK(self != nullptr);
   DCHECK(obj != nullptr);
+  self->AssertThreadSuspensionIsAllowable();
   obj = FakeLock(obj);
   uint32_t thread_id = self->GetThreadId();
   size_t contention_count = 0;
@@ -771,6 +772,7 @@
 bool Monitor::MonitorExit(Thread* self, mirror::Object* obj) {
   DCHECK(self != nullptr);
   DCHECK(obj != nullptr);
+  self->AssertThreadSuspensionIsAllowable();
   obj = FakeUnlock(obj);
   StackHandleScope<1> hs(self);
   Handle<mirror::Object> h_obj(hs.NewHandle(obj));
diff --git a/runtime/monitor.h b/runtime/monitor.h
index 8cd93c6..61235ef 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -65,12 +65,16 @@
   // NO_THREAD_SAFETY_ANALYSIS for mon->Lock.
   static mirror::Object* MonitorEnter(Thread* thread, mirror::Object* obj)
       EXCLUSIVE_LOCK_FUNCTION(obj)
-      SHARED_REQUIRES(Locks::mutator_lock_) NO_THREAD_SAFETY_ANALYSIS;
+      NO_THREAD_SAFETY_ANALYSIS
+      REQUIRES(!Roles::uninterruptible_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // NO_THREAD_SAFETY_ANALYSIS for mon->Unlock.
   static bool MonitorExit(Thread* thread, mirror::Object* obj)
+      NO_THREAD_SAFETY_ANALYSIS
+      REQUIRES(!Roles::uninterruptible_)
       SHARED_REQUIRES(Locks::mutator_lock_)
-      UNLOCK_FUNCTION(obj) NO_THREAD_SAFETY_ANALYSIS;
+      UNLOCK_FUNCTION(obj);
 
   static void Notify(Thread* self, mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_) {
     DoNotify(self, obj, false);
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index 5da15df..3a73900 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -522,6 +522,10 @@
   }
   if (classes == nullptr) {
     // Return an empty array instead of a null pointer.
+    if (soa.Self()->IsExceptionPending()) {
+      // Pending exception from GetDeclaredClasses.
+      return nullptr;
+    }
     mirror::Class* class_class = mirror::Class::GetJavaLangClass();
     mirror::Class* class_array_class =
         Runtime::Current()->GetClassLinker()->FindArrayClass(soa.Self(), &class_class);
diff --git a/runtime/oat_file-inl.h b/runtime/oat_file-inl.h
index 5df6525..f7913e1 100644
--- a/runtime/oat_file-inl.h
+++ b/runtime/oat_file-inl.h
@@ -22,7 +22,7 @@
 namespace art {
 
 inline const OatQuickMethodHeader* OatFile::OatMethod::GetOatQuickMethodHeader() const {
-  const void* code = ArtMethod::EntryPointToCodePointer(GetOatPointer<const void*>(code_offset_));
+  const void* code = EntryPointToCodePointer(GetOatPointer<const void*>(code_offset_));
   if (code == nullptr) {
     return nullptr;
   }
@@ -47,7 +47,7 @@
 }
 
 inline size_t OatFile::OatMethod::GetFrameSizeInBytes() const {
-  const void* code = ArtMethod::EntryPointToCodePointer(GetQuickCode());
+  const void* code = EntryPointToCodePointer(GetQuickCode());
   if (code == nullptr) {
     return 0u;
   }
@@ -55,7 +55,7 @@
 }
 
 inline uint32_t OatFile::OatMethod::GetCoreSpillMask() const {
-  const void* code = ArtMethod::EntryPointToCodePointer(GetQuickCode());
+  const void* code = EntryPointToCodePointer(GetQuickCode());
   if (code == nullptr) {
     return 0u;
   }
@@ -63,7 +63,7 @@
 }
 
 inline uint32_t OatFile::OatMethod::GetFpSpillMask() const {
-  const void* code = ArtMethod::EntryPointToCodePointer(GetQuickCode());
+  const void* code = EntryPointToCodePointer(GetQuickCode());
   if (code == nullptr) {
     return 0u;
   }
@@ -71,7 +71,7 @@
 }
 
 inline const uint8_t* OatFile::OatMethod::GetGcMap() const {
-  const void* code = ArtMethod::EntryPointToCodePointer(GetOatPointer<const void*>(code_offset_));
+  const void* code = EntryPointToCodePointer(GetOatPointer<const void*>(code_offset_));
   if (code == nullptr) {
     return nullptr;
   }
@@ -122,7 +122,7 @@
 }
 
 inline const uint8_t* OatFile::OatMethod::GetMappingTable() const {
-  const void* code = ArtMethod::EntryPointToCodePointer(GetOatPointer<const void*>(code_offset_));
+  const void* code = EntryPointToCodePointer(GetOatPointer<const void*>(code_offset_));
   if (code == nullptr) {
     return nullptr;
   }
@@ -134,7 +134,7 @@
 }
 
 inline const uint8_t* OatFile::OatMethod::GetVmapTable() const {
-  const void* code = ArtMethod::EntryPointToCodePointer(GetOatPointer<const void*>(code_offset_));
+  const void* code = EntryPointToCodePointer(GetOatPointer<const void*>(code_offset_));
   if (code == nullptr) {
     return nullptr;
   }
@@ -146,7 +146,7 @@
 }
 
 inline uint32_t OatFile::OatMethod::GetQuickCodeSize() const {
-  const void* code = ArtMethod::EntryPointToCodePointer(GetOatPointer<const void*>(code_offset_));
+  const void* code = EntryPointToCodePointer(GetOatPointer<const void*>(code_offset_));
   if (code == nullptr) {
     return 0u;
   }
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index 27f8677..364b734 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -29,6 +29,7 @@
 #include "mirror/class.h"
 #include "oat.h"
 #include "os.h"
+#include "utils.h"
 
 namespace art {
 
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 63f43cf..7ba19ab 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -17,6 +17,7 @@
 #include "quick_exception_handler.h"
 
 #include "arch/context.h"
+#include "art_code.h"
 #include "art_method-inl.h"
 #include "dex_instruction.h"
 #include "entrypoints/entrypoint_utils.h"
@@ -26,6 +27,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
 #include "mirror/throwable.h"
+#include "stack_map.h"
 #include "verifier/method_verifier.h"
 
 namespace art {
@@ -99,7 +101,7 @@
         exception_handler_->SetHandlerMethod(method);
         exception_handler_->SetHandlerDexPc(found_dex_pc);
         exception_handler_->SetHandlerQuickFramePc(
-            method->ToNativeQuickPc(found_dex_pc, /* is_catch_handler */ true));
+            GetCurrentCode().ToNativeQuickPc(found_dex_pc, /* is_catch_handler */ true));
         exception_handler_->SetHandlerQuickFrame(GetCurrentQuickFrame());
         return false;  // End stack walk.
       } else if (UNLIKELY(GetThread()->HasDebuggerShadowFrames())) {
@@ -159,7 +161,7 @@
   // If the handler is in optimized code, we need to set the catch environment.
   if (*handler_quick_frame_ != nullptr &&
       handler_method_ != nullptr &&
-      handler_method_->IsOptimized(sizeof(void*))) {
+      ArtCode(handler_quick_frame_).IsOptimized(sizeof(void*))) {
     SetCatchEnvironmentForOptimizedHandler(&visitor);
   }
 }
@@ -200,14 +202,14 @@
 void QuickExceptionHandler::SetCatchEnvironmentForOptimizedHandler(StackVisitor* stack_visitor) {
   DCHECK(!is_deoptimization_);
   DCHECK(*handler_quick_frame_ != nullptr) << "Method should not be called on upcall exceptions";
-  DCHECK(handler_method_ != nullptr && handler_method_->IsOptimized(sizeof(void*)));
+  DCHECK(handler_method_ != nullptr && ArtCode(handler_quick_frame_).IsOptimized(sizeof(void*)));
 
   if (kDebugExceptionDelivery) {
     self_->DumpStack(LOG(INFO) << "Setting catch phis: ");
   }
 
   const size_t number_of_vregs = handler_method_->GetCodeItem()->registers_size_;
-  CodeInfo code_info = handler_method_->GetOptimizedCodeInfo();
+  CodeInfo code_info = ArtCode(handler_quick_frame_).GetOptimizedCodeInfo();
   StackMapEncoding encoding = code_info.ExtractEncoding();
 
   // Find stack map of the throwing instruction.
diff --git a/runtime/runtime_linux.cc b/runtime/runtime_linux.cc
index f0b3c4e..44a13c9 100644
--- a/runtime/runtime_linux.cc
+++ b/runtime/runtime_linux.cc
@@ -41,7 +41,7 @@
  public:
   explicit Backtrace(void* raw_context) : raw_context_(raw_context) {}
   void Dump(std::ostream& os) const {
-    DumpNativeStack(os, GetTid(), "\t", nullptr, raw_context_);
+    DumpNativeStack(os, GetTid(), "\t", nullptr, nullptr, raw_context_);
   }
  private:
   // Stores the context of the signal that was unexpected and will terminate the runtime. The
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 1d21a64..d8d916c 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -17,6 +17,7 @@
 #include "stack.h"
 
 #include "arch/context.h"
+#include "art_code.h"
 #include "art_method-inl.h"
 #include "base/hex_dump.h"
 #include "entrypoints/entrypoint_utils-inl.h"
@@ -110,9 +111,9 @@
 }
 
 InlineInfo StackVisitor::GetCurrentInlineInfo() const {
-  ArtMethod* outer_method = GetOuterMethod();
-  uint32_t native_pc_offset = outer_method->NativeQuickPcOffset(cur_quick_frame_pc_);
-  CodeInfo code_info = outer_method->GetOptimizedCodeInfo();
+  ArtCode outer_code = GetCurrentCode();
+  uint32_t native_pc_offset = outer_code.NativeQuickPcOffset(cur_quick_frame_pc_);
+  CodeInfo code_info = outer_code.GetOptimizedCodeInfo();
   StackMapEncoding encoding = code_info.ExtractEncoding();
   StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
   DCHECK(stack_map.IsValid());
@@ -142,7 +143,7 @@
       size_t depth_in_stack_map = current_inlining_depth_ - 1;
       return GetCurrentInlineInfo().GetDexPcAtDepth(depth_in_stack_map);
     } else {
-      return GetMethod()->ToDexPc(cur_quick_frame_pc_, abort_on_failure);
+      return GetCurrentCode().ToDexPc(cur_quick_frame_pc_, abort_on_failure);
     }
   } else {
     return 0;
@@ -160,7 +161,8 @@
   } else if (m->IsNative()) {
     if (cur_quick_frame_ != nullptr) {
       HandleScope* hs = reinterpret_cast<HandleScope*>(
-          reinterpret_cast<char*>(cur_quick_frame_) + m->GetHandleScopeOffset().SizeValue());
+          reinterpret_cast<char*>(cur_quick_frame_) +
+            GetCurrentCode().GetHandleScopeOffset().SizeValue());
       return hs->GetReference(0);
     } else {
       return cur_shadow_frame_->GetVRegReference(0);
@@ -190,7 +192,7 @@
 
 size_t StackVisitor::GetNativePcOffset() const {
   DCHECK(!IsShadowFrame());
-  return GetMethod()->NativeQuickPcOffset(cur_quick_frame_pc_);
+  return GetCurrentCode().NativeQuickPcOffset(cur_quick_frame_pc_);
 }
 
 bool StackVisitor::IsReferenceVReg(ArtMethod* m, uint16_t vreg) {
@@ -199,10 +201,10 @@
   if (m->IsNative() || m->IsRuntimeMethod() || m->IsProxyMethod()) {
     return false;
   }
-  if (GetOuterMethod()->IsOptimized(sizeof(void*))) {
+  if (GetCurrentCode().IsOptimized(sizeof(void*))) {
     return true;  // TODO: Implement.
   }
-  const uint8_t* native_gc_map = m->GetNativeGcMap(sizeof(void*));
+  const uint8_t* native_gc_map = GetCurrentCode().GetNativeGcMap(sizeof(void*));
   CHECK(native_gc_map != nullptr) << PrettyMethod(m);
   const DexFile::CodeItem* code_item = m->GetCodeItem();
   // Can't be null or how would we compile its instructions?
@@ -211,9 +213,7 @@
   size_t num_regs = std::min(map.RegWidth() * 8, static_cast<size_t>(code_item->registers_size_));
   const uint8_t* reg_bitmap = nullptr;
   if (num_regs > 0) {
-    Runtime* runtime = Runtime::Current();
-    const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(m, sizeof(void*));
-    uintptr_t native_pc_offset = m->NativeQuickPcOffset(GetCurrentQuickFramePc(), entry_point);
+    uintptr_t native_pc_offset = GetCurrentCode().NativeQuickPcOffset(GetCurrentQuickFramePc());
     reg_bitmap = map.FindBitMap(native_pc_offset);
     DCHECK(reg_bitmap != nullptr);
   }
@@ -252,7 +252,7 @@
     if (GetVRegFromDebuggerShadowFrame(vreg, kind, val)) {
       return true;
     }
-    if (GetOuterMethod()->IsOptimized(sizeof(void*))) {
+    if (GetCurrentCode().IsOptimized(sizeof(void*))) {
       return GetVRegFromOptimizedCode(m, vreg, kind, val);
     } else {
       return GetVRegFromQuickCode(m, vreg, kind, val);
@@ -266,10 +266,9 @@
 
 bool StackVisitor::GetVRegFromQuickCode(ArtMethod* m, uint16_t vreg, VRegKind kind,
                                         uint32_t* val) const {
-  const void* code_pointer = m->GetQuickOatCodePointer(sizeof(void*));
-  DCHECK(code_pointer != nullptr);
-  const VmapTable vmap_table(m->GetVmapTable(code_pointer, sizeof(void*)));
-  QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo(code_pointer);
+  DCHECK_EQ(m, GetMethod());
+  const VmapTable vmap_table(GetCurrentCode().GetVmapTable(sizeof(void*)));
+  QuickMethodFrameInfo frame_info = GetCurrentCode().GetQuickFrameInfo();
   uint32_t vmap_offset;
   // TODO: IsInContext stops before spotting floating point registers.
   if (vmap_table.IsInContext(vreg, kind, &vmap_offset)) {
@@ -289,19 +288,16 @@
 
 bool StackVisitor::GetVRegFromOptimizedCode(ArtMethod* m, uint16_t vreg, VRegKind kind,
                                             uint32_t* val) const {
-  ArtMethod* outer_method = GetOuterMethod();
-  const void* code_pointer = outer_method->GetQuickOatCodePointer(sizeof(void*));
-  DCHECK(code_pointer != nullptr);
   DCHECK_EQ(m, GetMethod());
   const DexFile::CodeItem* code_item = m->GetCodeItem();
   DCHECK(code_item != nullptr) << PrettyMethod(m);  // Can't be null or how would we compile
                                                     // its instructions?
   uint16_t number_of_dex_registers = code_item->registers_size_;
   DCHECK_LT(vreg, code_item->registers_size_);
-  CodeInfo code_info = outer_method->GetOptimizedCodeInfo();
+  CodeInfo code_info = GetCurrentCode().GetOptimizedCodeInfo();
   StackMapEncoding encoding = code_info.ExtractEncoding();
 
-  uint32_t native_pc_offset = outer_method->NativeQuickPcOffset(cur_quick_frame_pc_);
+  uint32_t native_pc_offset = GetCurrentCode().NativeQuickPcOffset(cur_quick_frame_pc_);
   StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
   DCHECK(stack_map.IsValid());
   size_t depth_in_stack_map = current_inlining_depth_ - 1;
@@ -406,7 +402,7 @@
   if (cur_quick_frame_ != nullptr) {
     DCHECK(context_ != nullptr);  // You can't reliably read registers without a context.
     DCHECK(m == GetMethod());
-    if (GetOuterMethod()->IsOptimized(sizeof(void*))) {
+    if (GetCurrentCode().IsOptimized(sizeof(void*))) {
       return GetVRegPairFromOptimizedCode(m, vreg, kind_lo, kind_hi, val);
     } else {
       return GetVRegPairFromQuickCode(m, vreg, kind_lo, kind_hi, val);
@@ -420,10 +416,9 @@
 
 bool StackVisitor::GetVRegPairFromQuickCode(ArtMethod* m, uint16_t vreg, VRegKind kind_lo,
                                             VRegKind kind_hi, uint64_t* val) const {
-  const void* code_pointer = m->GetQuickOatCodePointer(sizeof(void*));
-  DCHECK(code_pointer != nullptr);
-  const VmapTable vmap_table(m->GetVmapTable(code_pointer, sizeof(void*)));
-  QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo(code_pointer);
+  DCHECK_EQ(m, GetMethod());
+  const VmapTable vmap_table(GetCurrentCode().GetVmapTable(sizeof(void*)));
+  QuickMethodFrameInfo frame_info = GetCurrentCode().GetQuickFrameInfo();
   uint32_t vmap_offset_lo, vmap_offset_hi;
   // TODO: IsInContext stops before spotting floating point registers.
   if (vmap_table.IsInContext(vreg, kind_lo, &vmap_offset_lo) &&
@@ -482,7 +477,7 @@
   if (cur_quick_frame_ != nullptr) {
     DCHECK(context_ != nullptr);  // You can't reliably write registers without a context.
     DCHECK(m == GetMethod());
-    if (GetOuterMethod()->IsOptimized(sizeof(void*))) {
+    if (GetCurrentCode().IsOptimized(sizeof(void*))) {
       return false;
     } else {
       return SetVRegFromQuickCode(m, vreg, new_value, kind);
@@ -497,10 +492,8 @@
                                         VRegKind kind) {
   DCHECK(context_ != nullptr);  // You can't reliably write registers without a context.
   DCHECK(m == GetMethod());
-  const void* code_pointer = m->GetQuickOatCodePointer(sizeof(void*));
-  DCHECK(code_pointer != nullptr);
-  const VmapTable vmap_table(m->GetVmapTable(code_pointer, sizeof(void*)));
-  QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo(code_pointer);
+  const VmapTable vmap_table(GetCurrentCode().GetVmapTable(sizeof(void*)));
+  QuickMethodFrameInfo frame_info = GetCurrentCode().GetQuickFrameInfo();
   uint32_t vmap_offset;
   // TODO: IsInContext stops before spotting floating point registers.
   if (vmap_table.IsInContext(vreg, kind, &vmap_offset)) {
@@ -591,7 +584,7 @@
   if (cur_quick_frame_ != nullptr) {
     DCHECK(context_ != nullptr);  // You can't reliably write registers without a context.
     DCHECK(m == GetMethod());
-    if (GetOuterMethod()->IsOptimized(sizeof(void*))) {
+    if (GetCurrentCode().IsOptimized(sizeof(void*))) {
       return false;
     } else {
       return SetVRegPairFromQuickCode(m, vreg, new_value, kind_lo, kind_hi);
@@ -605,10 +598,9 @@
 
 bool StackVisitor::SetVRegPairFromQuickCode(
     ArtMethod* m, uint16_t vreg, uint64_t new_value, VRegKind kind_lo, VRegKind kind_hi) {
-  const void* code_pointer = m->GetQuickOatCodePointer(sizeof(void*));
-  DCHECK(code_pointer != nullptr);
-  const VmapTable vmap_table(m->GetVmapTable(code_pointer, sizeof(void*)));
-  QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo(code_pointer);
+  DCHECK_EQ(m, GetMethod());
+  const VmapTable vmap_table(GetCurrentCode().GetVmapTable(sizeof(void*)));
+  QuickMethodFrameInfo frame_info = GetCurrentCode().GetQuickFrameInfo();
   uint32_t vmap_offset_lo, vmap_offset_hi;
   // TODO: IsInContext stops before spotting floating point registers.
   if (vmap_table.IsInContext(vreg, kind_lo, &vmap_offset_lo) &&
@@ -725,14 +717,14 @@
 uintptr_t StackVisitor::GetReturnPc() const {
   uint8_t* sp = reinterpret_cast<uint8_t*>(GetCurrentQuickFrame());
   DCHECK(sp != nullptr);
-  uint8_t* pc_addr = sp + GetOuterMethod()->GetReturnPcOffset().SizeValue();
+  uint8_t* pc_addr = sp + GetCurrentCode().GetReturnPcOffset().SizeValue();
   return *reinterpret_cast<uintptr_t*>(pc_addr);
 }
 
 void StackVisitor::SetReturnPc(uintptr_t new_ret_pc) {
   uint8_t* sp = reinterpret_cast<uint8_t*>(GetCurrentQuickFrame());
   CHECK(sp != nullptr);
-  uint8_t* pc_addr = sp + GetOuterMethod()->GetReturnPcOffset().SizeValue();
+  uint8_t* pc_addr = sp + GetCurrentCode().GetReturnPcOffset().SizeValue();
   *reinterpret_cast<uintptr_t*>(pc_addr) = new_ret_pc;
 }
 
@@ -867,9 +859,9 @@
       }
     }
     if (cur_quick_frame_ != nullptr) {
-      method->AssertPcIsWithinQuickCode(cur_quick_frame_pc_);
+      GetCurrentCode().AssertPcIsWithinQuickCode(cur_quick_frame_pc_);
       // Frame sanity.
-      size_t frame_size = method->GetFrameSizeInBytes();
+      size_t frame_size = GetCurrentCode().GetFrameSizeInBytes();
       CHECK_NE(frame_size, 0u);
       // A rough guess at an upper size we expect to see for a frame.
       // 256 registers
@@ -880,7 +872,7 @@
       // const size_t kMaxExpectedFrameSize = (256 + 2 + 3 + 3) * sizeof(word);
       const size_t kMaxExpectedFrameSize = 2 * KB;
       CHECK_LE(frame_size, kMaxExpectedFrameSize);
-      size_t return_pc_offset = method->GetReturnPcOffset().SizeValue();
+      size_t return_pc_offset = GetCurrentCode().GetReturnPcOffset().SizeValue();
       CHECK_LT(return_pc_offset, frame_size);
     }
   }
@@ -891,6 +883,7 @@
   CHECK_EQ(cur_depth_, 0U);
   bool exit_stubs_installed = Runtime::Current()->GetInstrumentation()->AreExitStubsInstalled();
   uint32_t instrumentation_stack_depth = 0;
+  size_t inlined_frames_count = 0;
 
   for (const ManagedStack* current_fragment = thread_->GetManagedStack();
        current_fragment != nullptr; current_fragment = current_fragment->GetLink()) {
@@ -906,10 +899,10 @@
         SanityCheckFrame();
 
         if ((walk_kind_ == StackWalkKind::kIncludeInlinedFrames)
-            && method->IsOptimized(sizeof(void*))) {
-          CodeInfo code_info = method->GetOptimizedCodeInfo();
+            && GetCurrentCode().IsOptimized(sizeof(void*))) {
+          CodeInfo code_info = GetCurrentCode().GetOptimizedCodeInfo();
           StackMapEncoding encoding = code_info.ExtractEncoding();
-          uint32_t native_pc_offset = method->NativeQuickPcOffset(cur_quick_frame_pc_);
+          uint32_t native_pc_offset = GetCurrentCode().NativeQuickPcOffset(cur_quick_frame_pc_);
           StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
           if (stack_map.IsValid() && stack_map.HasInlineInfo(encoding)) {
             InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
@@ -922,6 +915,7 @@
                 return;
               }
               cur_depth_++;
+              inlined_frames_count++;
             }
           }
         }
@@ -934,9 +928,9 @@
         if (context_ != nullptr) {
           context_->FillCalleeSaves(*this);
         }
-        size_t frame_size = method->GetFrameSizeInBytes();
+        size_t frame_size = GetCurrentCode().GetFrameSizeInBytes();
         // Compute PC for next stack frame from return PC.
-        size_t return_pc_offset = method->GetReturnPcOffset(frame_size).SizeValue();
+        size_t return_pc_offset = GetCurrentCode().GetReturnPcOffset().SizeValue();
         uint8_t* return_pc_addr = reinterpret_cast<uint8_t*>(cur_quick_frame_) + return_pc_offset;
         uintptr_t return_pc = *reinterpret_cast<uintptr_t*>(return_pc_addr);
         if (UNLIKELY(exit_stubs_installed)) {
@@ -952,27 +946,32 @@
               ArtMethod* callee = Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs);
               CHECK_EQ(GetMethod(), callee) << "Expected: " << PrettyMethod(callee) << " Found: "
                                             << PrettyMethod(GetMethod());
-            } else if (instrumentation_frame.method_ != GetMethod()) {
-              LOG(FATAL)  << "Expected: " << PrettyMethod(instrumentation_frame.method_)
-                          << " Found: " << PrettyMethod(GetMethod());
+            } else {
+              CHECK_EQ(instrumentation_frame.method_, GetMethod())
+                  << "Expected: " << PrettyMethod(instrumentation_frame.method_)
+                  << " Found: " << PrettyMethod(GetMethod());
             }
             if (num_frames_ != 0) {
               // Check agreement of frame Ids only if num_frames_ is computed to avoid infinite
               // recursion.
-              CHECK(instrumentation_frame.frame_id_ == GetFrameId())
-                    << "Expected: " << instrumentation_frame.frame_id_
-                    << " Found: " << GetFrameId();
+              size_t frame_id = instrumentation::Instrumentation::ComputeFrameId(
+                  thread_,
+                  cur_depth_,
+                  inlined_frames_count);
+              CHECK_EQ(instrumentation_frame.frame_id_, frame_id);
             }
             return_pc = instrumentation_frame.return_pc_;
           }
         }
+        ArtCode code = GetCurrentCode();
+
         cur_quick_frame_pc_ = return_pc;
         uint8_t* next_frame = reinterpret_cast<uint8_t*>(cur_quick_frame_) + frame_size;
         cur_quick_frame_ = reinterpret_cast<ArtMethod**>(next_frame);
 
         if (kDebugStackWalk) {
           LOG(INFO) << PrettyMethod(method) << "@" << method << " size=" << frame_size
-              << " optimized=" << method->IsOptimized(sizeof(void*))
+              << " optimized=" << code.IsOptimized(sizeof(void*))
               << " native=" << method->IsNative()
               << " entrypoints=" << method->GetEntryPointFromQuickCompiledCode()
               << "," << method->GetEntryPointFromJni()
@@ -1051,4 +1050,87 @@
   }
 }
 
+void LockCountData::AddMonitorInternal(Thread* self, mirror::Object* obj) {
+  if (obj == nullptr) {
+    return;
+  }
+
+  // If there's an error during enter, we won't have locked the monitor. So check there's no
+  // exception.
+  if (self->IsExceptionPending()) {
+    return;
+  }
+
+  if (monitors_ == nullptr) {
+    monitors_.reset(new std::vector<mirror::Object*>());
+  }
+  monitors_->push_back(obj);
+}
+
+void LockCountData::RemoveMonitorInternal(Thread* self, const mirror::Object* obj) {
+  if (obj == nullptr) {
+    return;
+  }
+  bool found_object = false;
+  if (monitors_ != nullptr) {
+    // We need to remove one pointer to ref, as duplicates are used for counting recursive locks.
+    // We arbitrarily choose the first one.
+    auto it = std::find(monitors_->begin(), monitors_->end(), obj);
+    if (it != monitors_->end()) {
+      monitors_->erase(it);
+      found_object = true;
+    }
+  }
+  if (!found_object) {
+    // The object wasn't found. Time for an IllegalMonitorStateException.
+    // The order here isn't fully clear. Assume that any other pending exception is swallowed.
+    // TODO: Maybe make already pending exception a suppressed exception.
+    self->ClearException();
+    self->ThrowNewExceptionF("Ljava/lang/IllegalMonitorStateException;",
+                             "did not lock monitor on object of type '%s' before unlocking",
+                             PrettyTypeOf(const_cast<mirror::Object*>(obj)).c_str());
+  }
+}
+
+// Helper to unlock a monitor. Must be NO_THREAD_SAFETY_ANALYSIS, as we can't statically show
+// that the object was locked.
+void MonitorExitHelper(Thread* self, mirror::Object* obj) NO_THREAD_SAFETY_ANALYSIS {
+  DCHECK(self != nullptr);
+  DCHECK(obj != nullptr);
+  obj->MonitorExit(self);
+}
+
+bool LockCountData::CheckAllMonitorsReleasedInternal(Thread* self) {
+  DCHECK(self != nullptr);
+  if (monitors_ != nullptr) {
+    if (!monitors_->empty()) {
+      // There may be an exception pending, if the method is terminating abruptly. Clear it.
+      // TODO: Should we add this as a suppressed exception?
+      self->ClearException();
+
+      // OK, there are monitors that are still locked. To enforce structured locking (and avoid
+      // deadlocks) we unlock all of them before we raise the IllegalMonitorState exception.
+      for (mirror::Object* obj : *monitors_) {
+        MonitorExitHelper(self, obj);
+        // If this raised an exception, ignore. TODO: Should we add this as suppressed
+        // exceptions?
+        if (self->IsExceptionPending()) {
+          self->ClearException();
+        }
+      }
+      // Raise an exception, just give the first object as the sample.
+      mirror::Object* first = (*monitors_)[0];
+      self->ThrowNewExceptionF("Ljava/lang/IllegalMonitorStateException;",
+                               "did not unlock monitor on object of type '%s'",
+                               PrettyTypeOf(first).c_str());
+
+      // To make sure this path is not triggered again, clean out the monitors.
+      monitors_->clear();
+
+      return false;
+    }
+  }
+  return true;
+}
+
 }  // namespace art
diff --git a/runtime/stack.h b/runtime/stack.h
index 31acf0e..3e0566d 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -20,7 +20,10 @@
 #include <stdint.h>
 #include <string>
 
+#include "art_code.h"
 #include "arch/instruction_set.h"
+#include "base/macros.h"
+#include "base/mutex.h"
 #include "dex_file.h"
 #include "gc_root.h"
 #include "mirror/object_reference.h"
@@ -66,6 +69,72 @@
 struct ShadowFrameDeleter;
 using ShadowFrameAllocaUniquePtr = std::unique_ptr<ShadowFrame, ShadowFrameDeleter>;
 
+// Counting locks by storing object pointers into a vector. Duplicate entries mark recursive locks.
+// The vector will be visited with the ShadowFrame during GC (so all the locked-on objects are
+// thread roots).
+// Note: implementation is split so that the call sites may be optimized to no-ops in case no
+//       lock counting is necessary. The actual implementation is in the cc file to avoid
+//       dependencies.
+class LockCountData {
+ public:
+  // Add the given object to the list of monitors, that is, objects that have been locked. This
+  // will not throw (but be skipped if there is an exception pending on entry).
+  template <bool kLockCounting>
+  void AddMonitor(Thread* self, mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(self != nullptr);
+    if (!kLockCounting) {
+      return;
+    }
+    AddMonitorInternal(self, obj);
+  }
+
+  // Try to remove the given object from the monitor list, indicating an unlock operation.
+  // This will throw an IllegalMonitorStateException (clearing any already pending exception), in
+  // case that there wasn't a lock recorded for the object.
+  template <bool kLockCounting>
+  void RemoveMonitorOrThrow(Thread* self,
+                            const mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(self != nullptr);
+    if (!kLockCounting) {
+      return;
+    }
+    RemoveMonitorInternal(self, obj);
+  }
+
+  // Check whether all acquired monitors have been released. This will potentially throw an
+  // IllegalMonitorStateException, clearing any already pending exception. Returns true if the
+  // check shows that everything is OK wrt/ lock counting, false otherwise.
+  template <bool kLockCounting>
+  bool CheckAllMonitorsReleasedOrThrow(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(self != nullptr);
+    if (!kLockCounting) {
+      return true;
+    }
+    return CheckAllMonitorsReleasedInternal(self);
+  }
+
+  template <typename T, typename... Args>
+  void VisitMonitors(T visitor, Args&&... args) SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (monitors_ != nullptr) {
+      // Visitors may change the Object*. Be careful with the foreach loop.
+      for (mirror::Object*& obj : *monitors_) {
+        visitor(/* inout */ &obj, std::forward<Args>(args)...);
+      }
+    }
+  }
+
+ private:
+  // Internal implementations.
+  void AddMonitorInternal(Thread* self, mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_);
+  void RemoveMonitorInternal(Thread* self, const mirror::Object* obj)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  bool CheckAllMonitorsReleasedInternal(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Stores references to the locked-on objects. As noted, this should be visited during thread
+  // marking.
+  std::unique_ptr<std::vector<mirror::Object*>> monitors_;
+};
+
 // ShadowFrame has 2 possible layouts:
 //  - interpreter - separate VRegs and reference arrays. References are in the reference array.
 //  - JNI - just VRegs, but where every VReg holds a reference.
@@ -272,6 +341,10 @@
     }
   }
 
+  LockCountData& GetLockCountData() {
+    return lock_count_data_;
+  }
+
   static size_t LinkOffset() {
     return OFFSETOF_MEMBER(ShadowFrame, link_);
   }
@@ -330,6 +403,7 @@
   ShadowFrame* link_;
   ArtMethod* method_;
   uint32_t dex_pc_;
+  LockCountData lock_count_data_;  // This may contain GC roots when lock counting is active.
 
   // This is a two-part array:
   //  - [0..number_of_vregs) holds the raw virtual registers, and each element here is always 4
@@ -644,6 +718,10 @@
     return cur_shadow_frame_;
   }
 
+  bool IsCurrentFrameInInterpreter() const {
+    return cur_shadow_frame_ != nullptr;
+  }
+
   HandleScope* GetCurrentHandleScope(size_t pointer_size) const {
     ArtMethod** sp = GetCurrentQuickFrame();
     // Skip ArtMethod*; handle scope comes next;
@@ -657,6 +735,8 @@
 
   static void DescribeStack(Thread* thread) SHARED_REQUIRES(Locks::mutator_lock_);
 
+  ArtCode GetCurrentCode() const { return ArtCode(cur_quick_frame_); }
+
  private:
   // Private constructor known in the case that num_frames_ has already been computed.
   StackVisitor(Thread* thread, Context* context, StackWalkKind walk_kind, size_t num_frames)
diff --git a/runtime/thread.cc b/runtime/thread.cc
index c317591..8e0c288 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -32,6 +32,7 @@
 #include <sstream>
 
 #include "arch/context.h"
+#include "art_code.h"
 #include "art_field-inl.h"
 #include "art_method-inl.h"
 #include "base/bit_utils.h"
@@ -66,6 +67,7 @@
 #include "ScopedLocalRef.h"
 #include "ScopedUtfChars.h"
 #include "stack.h"
+#include "stack_map.h"
 #include "thread_list.h"
 #include "thread-inl.h"
 #include "utils.h"
@@ -1493,7 +1495,9 @@
     // If we're currently in native code, dump that stack before dumping the managed stack.
     if (dump_for_abort || ShouldShowNativeStack(this)) {
       DumpKernelStack(os, GetTid(), "  kernel: ", false);
-      DumpNativeStack(os, GetTid(), "  native: ", GetCurrentMethod(nullptr, !dump_for_abort));
+      ArtMethod* method = GetCurrentMethod(nullptr, !dump_for_abort);
+      ArtCode art_code(method);
+      DumpNativeStack(os, GetTid(), "  native: ", method, &art_code);
     }
     DumpJavaStack(os);
   } else {
@@ -2651,7 +2655,7 @@
     } else {
       // Java method.
       // Portable path use DexGcMap and store in Method.native_gc_map_.
-      const uint8_t* gc_map = m->GetNativeGcMap(sizeof(void*));
+      const uint8_t* gc_map = GetCurrentCode().GetNativeGcMap(sizeof(void*));
       CHECK(gc_map != nullptr) << PrettyMethod(m);
       verifier::DexPcToReferenceMap dex_gc_map(gc_map);
       uint32_t dex_pc = shadow_frame->GetDexPC();
@@ -2671,6 +2675,8 @@
         }
       }
     }
+    // Mark lock count map required for structured locking checks.
+    shadow_frame->GetLockCountData().VisitMonitors(visitor_, -1, this);
   }
 
  private:
@@ -2696,13 +2702,11 @@
 
     // Process register map (which native and runtime methods don't have)
     if (!m->IsNative() && !m->IsRuntimeMethod() && !m->IsProxyMethod()) {
-      if (m->IsOptimized(sizeof(void*))) {
+      if (GetCurrentCode().IsOptimized(sizeof(void*))) {
         auto* vreg_base = reinterpret_cast<StackReference<mirror::Object>*>(
             reinterpret_cast<uintptr_t>(cur_quick_frame));
-        Runtime* runtime = Runtime::Current();
-        const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(m, sizeof(void*));
-        uintptr_t native_pc_offset = m->NativeQuickPcOffset(GetCurrentQuickFramePc(), entry_point);
-        CodeInfo code_info = m->GetOptimizedCodeInfo();
+        uintptr_t native_pc_offset = GetCurrentCode().NativeQuickPcOffset(GetCurrentQuickFramePc());
+        CodeInfo code_info = GetCurrentCode().GetOptimizedCodeInfo();
         StackMapEncoding encoding = code_info.ExtractEncoding();
         StackMap map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
         DCHECK(map.IsValid());
@@ -2732,7 +2736,7 @@
           }
         }
       } else {
-        const uint8_t* native_gc_map = m->GetNativeGcMap(sizeof(void*));
+        const uint8_t* native_gc_map = GetCurrentCode().GetNativeGcMap(sizeof(void*));
         CHECK(native_gc_map != nullptr) << PrettyMethod(m);
         const DexFile::CodeItem* code_item = m->GetCodeItem();
         // Can't be null or how would we compile its instructions?
@@ -2740,14 +2744,12 @@
         NativePcOffsetToReferenceMap map(native_gc_map);
         size_t num_regs = map.RegWidth() * 8;
         if (num_regs > 0) {
-          Runtime* runtime = Runtime::Current();
-          const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(m, sizeof(void*));
-          uintptr_t native_pc_offset = m->NativeQuickPcOffset(GetCurrentQuickFramePc(), entry_point);
+          uintptr_t native_pc_offset =
+              GetCurrentCode().NativeQuickPcOffset(GetCurrentQuickFramePc());
           const uint8_t* reg_bitmap = map.FindBitMap(native_pc_offset);
           DCHECK(reg_bitmap != nullptr);
-          const void* code_pointer = ArtMethod::EntryPointToCodePointer(entry_point);
-          const VmapTable vmap_table(m->GetVmapTable(code_pointer, sizeof(void*)));
-          QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo(code_pointer);
+          const VmapTable vmap_table(GetCurrentCode().GetVmapTable(sizeof(void*)));
+          QuickMethodFrameInfo frame_info = GetCurrentCode().GetQuickFrameInfo();
           // For all dex registers in the bitmap
           DCHECK(cur_quick_frame != nullptr);
           for (size_t reg = 0; reg < num_regs; ++reg) {
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 27dacea..40cd6d3 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -25,6 +25,7 @@
 #include <unistd.h>
 #include <memory>
 
+#include "art_code.h"
 #include "art_field-inl.h"
 #include "art_method-inl.h"
 #include "base/stl_util.h"
@@ -1092,7 +1093,7 @@
 #endif
 
 void DumpNativeStack(std::ostream& os, pid_t tid, const char* prefix,
-    ArtMethod* current_method, void* ucontext_ptr) {
+    ArtMethod* current_method, ArtCode* current_code, void* ucontext_ptr) {
 #if __linux__
   // b/18119146
   if (RUNNING_ON_MEMORY_TOOL != 0) {
@@ -1148,8 +1149,8 @@
         try_addr2line = true;
       } else if (
           current_method != nullptr && Locks::mutator_lock_->IsSharedHeld(Thread::Current()) &&
-          current_method->PcIsWithinQuickCode(it->pc)) {
-        const void* start_of_code = current_method->GetEntryPointFromQuickCompiledCode();
+          current_code->PcIsWithinQuickCode(it->pc)) {
+        const void* start_of_code = current_code->GetQuickOatEntryPoint(sizeof(void*));
         os << JniLongName(current_method) << "+"
            << (it->pc - reinterpret_cast<uintptr_t>(start_of_code));
       } else {
@@ -1163,7 +1164,7 @@
     }
   }
 #else
-  UNUSED(os, tid, prefix, current_method, ucontext_ptr);
+  UNUSED(os, tid, prefix, current_method, current_code, ucontext_ptr);
 #endif
 }
 
diff --git a/runtime/utils.h b/runtime/utils.h
index 19cc462..b67f273 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -33,6 +33,7 @@
 
 namespace art {
 
+class ArtCode;
 class ArtField;
 class ArtMethod;
 class DexFile;
@@ -221,7 +222,7 @@
 
 // Dumps the native stack for thread 'tid' to 'os'.
 void DumpNativeStack(std::ostream& os, pid_t tid, const char* prefix = "",
-    ArtMethod* current_method = nullptr, void* ucontext = nullptr)
+    ArtMethod* current_method = nullptr, ArtCode* current_code = nullptr, void* ucontext = nullptr)
     NO_THREAD_SAFETY_ANALYSIS;
 
 // Dumps the kernel stack for thread 'tid' to 'os'. Note that this is only available on linux-x86.
@@ -306,6 +307,14 @@
 void DumpMethodCFG(ArtMethod* method, std::ostream& os) SHARED_REQUIRES(Locks::mutator_lock_);
 void DumpMethodCFG(const DexFile* dex_file, uint32_t dex_method_idx, std::ostream& os);
 
+static inline const void* EntryPointToCodePointer(const void* entry_point) {
+  uintptr_t code = reinterpret_cast<uintptr_t>(entry_point);
+  // TODO: Make this Thumb2 specific. It is benign on other architectures as code is always at
+  //       least 2 byte aligned.
+  code &= ~0x1;
+  return reinterpret_cast<const void*>(code);
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_UTILS_H_
diff --git a/test/004-ReferenceMap/stack_walk_refmap_jni.cc b/test/004-ReferenceMap/stack_walk_refmap_jni.cc
index 285df18..f8d321c 100644
--- a/test/004-ReferenceMap/stack_walk_refmap_jni.cc
+++ b/test/004-ReferenceMap/stack_walk_refmap_jni.cc
@@ -22,11 +22,11 @@
 #define CHECK_REGS_CONTAIN_REFS(dex_pc, abort_if_not_found, ...) do { \
   int t[] = {__VA_ARGS__}; \
   int t_size = sizeof(t) / sizeof(*t); \
-  uintptr_t native_quick_pc = m->ToNativeQuickPc(dex_pc, \
+  uintptr_t native_quick_pc = GetCurrentCode().ToNativeQuickPc(dex_pc, \
                                                  /* is_catch_handler */ false, \
                                                  abort_if_not_found); \
   if (native_quick_pc != UINTPTR_MAX) { \
-    CheckReferences(t, t_size, m->NativeQuickPcOffset(native_quick_pc)); \
+    CheckReferences(t, t_size, GetCurrentCode().NativeQuickPcOffset(native_quick_pc)); \
   } \
 } while (false);
 
@@ -49,7 +49,7 @@
       CHECK_REGS_CONTAIN_REFS(0x06U, true, 8, 1);  // v8: this, v1: x
       CHECK_REGS_CONTAIN_REFS(0x08U, true, 8, 3, 1);  // v8: this, v3: y, v1: x
       CHECK_REGS_CONTAIN_REFS(0x0cU, true, 8, 3, 1);  // v8: this, v3: y, v1: x
-      if (!m->IsOptimized(sizeof(void*))) {
+      if (!GetCurrentCode().IsOptimized(sizeof(void*))) {
         CHECK_REGS_CONTAIN_REFS(0x0eU, true, 8, 3, 1);  // v8: this, v3: y, v1: x
       }
       CHECK_REGS_CONTAIN_REFS(0x10U, true, 8, 3, 1);  // v8: this, v3: y, v1: x
@@ -65,7 +65,7 @@
       // Note that v0: ex can be eliminated because it's a dead merge of two different exceptions.
       CHECK_REGS_CONTAIN_REFS(0x18U, true, 8, 2, 1);  // v8: this, v2: y, v1: x (dead v0: ex)
       CHECK_REGS_CONTAIN_REFS(0x1aU, true, 8, 5, 2, 1);  // v8: this, v5: x[1], v2: y, v1: x (dead v0: ex)
-      if (!m->IsOptimized(sizeof(void*))) {
+      if (!GetCurrentCode().IsOptimized(sizeof(void*))) {
         // v8: this, v5: x[1], v2: y, v1: x (dead v0: ex)
         CHECK_REGS_CONTAIN_REFS(0x1dU, true, 8, 5, 2, 1);
         // v5 is removed from the root set because there is a "merge" operation.
@@ -74,7 +74,7 @@
       }
       CHECK_REGS_CONTAIN_REFS(0x21U, true, 8, 2, 1);  // v8: this, v2: y, v1: x (dead v0: ex)
 
-      if (!m->IsOptimized(sizeof(void*))) {
+      if (!GetCurrentCode().IsOptimized(sizeof(void*))) {
         CHECK_REGS_CONTAIN_REFS(0x27U, true, 8, 4, 2, 1);  // v8: this, v4: ex, v2: y, v1: x
       }
       CHECK_REGS_CONTAIN_REFS(0x29U, true, 8, 4, 2, 1);  // v8: this, v4: ex, v2: y, v1: x
diff --git a/test/088-monitor-verification/src/Main.java b/test/088-monitor-verification/src/Main.java
index 2188055..d742b14 100644
--- a/test/088-monitor-verification/src/Main.java
+++ b/test/088-monitor-verification/src/Main.java
@@ -27,6 +27,13 @@
      */
     public static void main(String[] args) {
         System.loadLibrary(args[0]);
+        if (!hasOatFile() || runtimeIsSoftFail() || isInterpreted()) {
+            // Some tests ensure that the verifier was able to guarantee balanced locking by
+            // asserting that the test function is running as compiled code. But skip this now,
+            // as this seems to be a non-compiled code test configuration.
+            disableStackFrameAsserts();
+        }
+
         Main m = new Main();
 
         m.recursiveSync(0);
@@ -49,7 +56,7 @@
         Object obj1 = new Object();
         Object obj2 = new Object();
 
-        m.twoPath(obj1, obj2, 0);
+        TwoPath.twoPath(obj1, obj2, 0);
         System.out.println("twoPath ok");
 
         m.triplet(obj1, obj2, 0);
@@ -62,6 +69,7 @@
      * Recursive synchronized method.
      */
     synchronized void recursiveSync(int iter) {
+        assertIsManaged();
         if (iter < 40) {
             recursiveSync(iter+1);
         } else {
@@ -73,6 +81,7 @@
      * Tests simple nesting, with and without a throw.
      */
     void nestedMayThrow(boolean doThrow) {
+        assertIsManaged();
         synchronized (this) {
             synchronized (Main.class) {
                 synchronized (new Object()) {
@@ -90,6 +99,7 @@
      * Exercises bug 3215458.
      */
     void constantLock() {
+        assertIsManaged();
         Class thing = Thread.class;
         synchronized (Thread.class) {}
     }
@@ -98,6 +108,7 @@
      * Confirms that we can have 32 nested monitors on one method.
      */
     void notExcessiveNesting() {
+        assertIsManaged();
         synchronized (this) {   // 1
         synchronized (this) {   // 2
         synchronized (this) {   // 3
@@ -138,6 +149,7 @@
      * method.
      */
     void notNested() {
+        assertIsManaged();
         synchronized (this) {}  // 1
         synchronized (this) {}  // 2
         synchronized (this) {}  // 3
@@ -178,25 +190,6 @@
     private void doNothing(Object obj) {}
 
     /**
-     * Conditionally uses one of the synchronized objects.
-     */
-    public void twoPath(Object obj1, Object obj2, int x) {
-        Object localObj;
-
-        synchronized (obj1) {
-            synchronized(obj2) {
-                if (x == 0) {
-                    localObj = obj2;
-                } else {
-                    localObj = obj1;
-                }
-            }
-        }
-
-        doNothing(localObj);
-    }
-
-    /**
      * Lock the monitor two or three times, and make use of the locked or
      * unlocked object.
      */
@@ -220,17 +213,12 @@
 
     // Smali testing code.
     private static void runSmaliTests() {
-        if (!hasOatFile() || runtimeIsSoftFail() || isInterpreted()) {
-            // Skip test, this seems to be a non-compiled code test configuration.
-            return;
-        }
-
         runTest("OK", new Object[] { new Object(), new Object() }, null);
         runTest("TooDeep", new Object[] { new Object() }, null);
         runTest("NotStructuredOverUnlock", new Object[] { new Object() },
                 IllegalMonitorStateException.class);
-        runTest("NotStructuredUnderUnlock", new Object[] { new Object() }, null);
-                // TODO: new IllegalMonitorStateException());
+        runTest("NotStructuredUnderUnlock", new Object[] { new Object() },
+                IllegalMonitorStateException.class);
         runTest("UnbalancedJoin", new Object[] { new Object(), new Object() }, null);
         runTest("UnbalancedStraight", new Object[] { new Object(), new Object() }, null);
     }
@@ -282,4 +270,5 @@
     public static native boolean hasOatFile();
     public static native boolean runtimeIsSoftFail();
     public static native boolean isInterpreted();
+    public static native void disableStackFrameAsserts();
 }
diff --git a/test/088-monitor-verification/src/TwoPath.java b/test/088-monitor-verification/src/TwoPath.java
new file mode 100644
index 0000000..2542de7
--- /dev/null
+++ b/test/088-monitor-verification/src/TwoPath.java
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.lang.reflect.Modifier;
+
+/*
+ * Test case for conditionally using one of two synchronized objects.
+ *
+ * This code cannot be verified at the moment, as the join point merges a register with two
+ * different lock options. Do not put it into Main to avoid the whole class being run in the
+ * interpreter.
+ */
+public class TwoPath {
+
+    /**
+     * Conditionally uses one of the synchronized objects.
+     */
+    public static void twoPath(Object obj1, Object obj2, int x) {
+        Object localObj;
+
+        synchronized (obj1) {
+            synchronized(obj2) {
+                if (x == 0) {
+                    localObj = obj2;
+                } else {
+                    localObj = obj1;
+                }
+            }
+        }
+
+        doNothing(localObj);
+    }
+
+    private static void doNothing(Object o) {
+    }
+}
diff --git a/test/454-get-vreg/get_vreg_jni.cc b/test/454-get-vreg/get_vreg_jni.cc
index 9facfdb..0ee2ff9 100644
--- a/test/454-get-vreg/get_vreg_jni.cc
+++ b/test/454-get-vreg/get_vreg_jni.cc
@@ -15,6 +15,7 @@
  */
 
 #include "arch/context.h"
+#include "art_code.h"
 #include "art_method-inl.h"
 #include "jni.h"
 #include "scoped_thread_state_change.h"
@@ -45,10 +46,14 @@
       CHECK_EQ(value, 42u);
 
       bool success = GetVReg(m, 1, kIntVReg, &value);
-      if (m->IsOptimized(sizeof(void*))) CHECK(!success);
+      if (!IsCurrentFrameInInterpreter() && GetCurrentCode().IsOptimized(sizeof(void*))) {
+        CHECK(!success);
+      }
 
       success = GetVReg(m, 2, kIntVReg, &value);
-      if (m->IsOptimized(sizeof(void*))) CHECK(!success);
+      if (!IsCurrentFrameInInterpreter() && GetCurrentCode().IsOptimized(sizeof(void*))) {
+        CHECK(!success);
+      }
 
       CHECK(GetVReg(m, 3, kReferenceVReg, &value));
       CHECK_EQ(reinterpret_cast<mirror::Object*>(value), this_value_);
@@ -78,10 +83,14 @@
       CHECK_EQ(value, 42u);
 
       bool success = GetVRegPair(m, 2, kLongLoVReg, kLongHiVReg, &value);
-      if (m->IsOptimized(sizeof(void*))) CHECK(!success);
+      if (!IsCurrentFrameInInterpreter() && GetCurrentCode().IsOptimized(sizeof(void*))) {
+        CHECK(!success);
+      }
 
       success = GetVRegPair(m, 4, kLongLoVReg, kLongHiVReg, &value);
-      if (m->IsOptimized(sizeof(void*))) CHECK(!success);
+      if (!IsCurrentFrameInInterpreter() && GetCurrentCode().IsOptimized(sizeof(void*))) {
+        CHECK(!success);
+      }
 
       uint32_t value32 = 0;
       CHECK(GetVReg(m, 6, kReferenceVReg, &value32));
diff --git a/test/457-regs/regs_jni.cc b/test/457-regs/regs_jni.cc
index c21168b..6fcebdb 100644
--- a/test/457-regs/regs_jni.cc
+++ b/test/457-regs/regs_jni.cc
@@ -15,6 +15,7 @@
  */
 
 #include "arch/context.h"
+#include "art_code.h"
 #include "art_method-inl.h"
 #include "jni.h"
 #include "scoped_thread_state_change.h"
@@ -63,7 +64,9 @@
       CHECK_EQ(value, 1u);
 
       bool success = GetVReg(m, 2, kIntVReg, &value);
-      if (m->IsOptimized(sizeof(void*))) CHECK(!success);
+      if (!IsCurrentFrameInInterpreter() && GetCurrentCode().IsOptimized(sizeof(void*))) {
+        CHECK(!success);
+      }
 
       CHECK(GetVReg(m, 3, kReferenceVReg, &value));
       CHECK_EQ(value, 1u);
diff --git a/test/466-get-live-vreg/get_live_vreg_jni.cc b/test/466-get-live-vreg/get_live_vreg_jni.cc
index 7e9a583..2a56a7f 100644
--- a/test/466-get-live-vreg/get_live_vreg_jni.cc
+++ b/test/466-get-live-vreg/get_live_vreg_jni.cc
@@ -15,6 +15,7 @@
  */
 
 #include "arch/context.h"
+#include "art_code.h"
 #include "art_method-inl.h"
 #include "jni.h"
 #include "scoped_thread_state_change.h"
@@ -43,7 +44,7 @@
       found_method_ = true;
       uint32_t value = 0;
       if (GetCurrentQuickFrame() != nullptr &&
-          m->IsOptimized(sizeof(void*)) &&
+          GetCurrentCode().IsOptimized(sizeof(void*)) &&
           !Runtime::Current()->IsDebuggable()) {
         CHECK_EQ(GetVReg(m, 0, kIntVReg, &value), false);
       } else {
diff --git a/tools/ahat/README.txt b/tools/ahat/README.txt
index a8e3884..1083c2f 100644
--- a/tools/ahat/README.txt
+++ b/tools/ahat/README.txt
@@ -31,9 +31,6 @@
       another) How about, always sort by name?
  * For long strings, limit the string length shown in the summary view to
    something reasonable.  Say 50 chars, then add a "..." at the end.
- * For string summaries, if the string is an offset into a bigger byte array,
-   make sure to show just the part that's in the bigger byte array, not the
-   entire byte array.
  * For HeapTable with single heap shown, the heap name isn't centered?
  * Consistently document functions.
  * Should help be part of an AhatHandler, that automatically gets the menu and
@@ -72,6 +69,8 @@
    time.
  * That we don't show the 'extra' column in the DominatedList if we are
    showing all the instances.
+ * That InstanceUtils.asString properly takes into account "offset" and
+   "count" fields, if they are present.
 
 Reported Issues:
  * Request to be able to sort tables by size.
diff --git a/tools/ahat/src/InstanceUtils.java b/tools/ahat/src/InstanceUtils.java
index a6ac3b8..eb9e363 100644
--- a/tools/ahat/src/InstanceUtils.java
+++ b/tools/ahat/src/InstanceUtils.java
@@ -37,22 +37,6 @@
   }
 
   /**
-   * Read the char[] value from an hprof Instance.
-   * Returns null if the object can't be interpreted as a char[].
-   */
-  private static char[] asCharArray(Instance inst) {
-    if (! (inst instanceof ArrayInstance)) {
-      return null;
-    }
-
-    ArrayInstance array = (ArrayInstance) inst;
-    if (array.getArrayType() != Type.CHAR) {
-      return null;
-    }
-    return array.asCharArray(0, array.getValues().length);
-  }
-
-  /**
    * Read the byte[] value from an hprof Instance.
    * Returns null if the instance is not a byte array.
    */
@@ -82,8 +66,32 @@
     if (!isInstanceOfClass(inst, "java.lang.String")) {
       return null;
     }
-    char[] value = getCharArrayField(inst, "value");
-    return (value == null) ? null : new String(value);
+
+    Object value = getField(inst, "value");
+    if (!(value instanceof ArrayInstance)) {
+      return null;
+    }
+
+    ArrayInstance chars = (ArrayInstance) value;
+    if (chars.getArrayType() != Type.CHAR) {
+      return null;
+    }
+
+    // TODO: When perflib provides a better way to get the length of the
+    // array, we should use that here.
+    int numChars = chars.getValues().length;
+    int count = getIntField(inst, "count", numChars);
+    int offset = getIntField(inst, "offset", 0);
+    int end = offset + count - 1;
+
+    if (count == 0) {
+      return "";
+    }
+
+    if (offset >= 0 && offset < numChars && end >= 0 && end < numChars) {
+      return new String(chars.asCharArray(offset, count));
+    }
+    return null;
   }
 
   /**
@@ -175,6 +183,15 @@
   }
 
   /**
+   * Read an int field of an instance, returning a default value if the field
+   * was not an int or could not be read.
+   */
+  private static int getIntField(Instance inst, String fieldName, int def) {
+    Integer value = getIntField(inst, fieldName);
+    return value == null ? def : value;
+  }
+
+  /**
    * Read the given field from the given instance.
    * The field is assumed to be a byte[] field.
    * Returns null if the field value is null, not a byte[] or could not be read.
@@ -187,14 +204,6 @@
     return asByteArray((Instance)value);
   }
 
-  private static char[] getCharArrayField(Instance inst, String fieldName) {
-    Object value = getField(inst, fieldName);
-    if (!(value instanceof Instance)) {
-      return null;
-    }
-    return asCharArray((Instance)value);
-  }
-
   // Return the bitmap instance associated with this object, or null if there
   // is none. This works for android.graphics.Bitmap instances and their
   // underlying Byte[] instances.