Merge "Fix interaction between JIT and instrumentation."
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index ed193c7..676e564 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -359,18 +359,10 @@
           // need a strategy for splitting exceptional edges. We split the block
           // after the move-exception (if present) and mark the first part not
           // throwing. The normal-flow edge between them will be split later.
-          HInstruction* first_insn = block->GetFirstInstruction();
-          if (first_insn->IsLoadException()) {
-            // Catch block starts with a LoadException. Split the block after
-            // the StoreLocal and ClearException which must come after the load.
-            DCHECK(first_insn->GetNext()->IsStoreLocal());
-            DCHECK(first_insn->GetNext()->GetNext()->IsClearException());
-            throwing_block = block->SplitBefore(first_insn->GetNext()->GetNext()->GetNext());
-          } else {
-            // Catch block does not load the exception. Split at the beginning
-            // to create an empty catch block.
-            throwing_block = block->SplitBefore(first_insn);
-          }
+          throwing_block = block->SplitCatchBlockAfterMoveException();
+          // Move-exception does not throw and the block has throwing insructions
+          // so it must have been possible to split it.
+          DCHECK(throwing_block != nullptr);
         }
 
         try_block_info.Put(throwing_block->GetBlockId(),
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 8106499..959adb4 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -40,12 +40,8 @@
 static constexpr Register kMethodRegisterArgument = A0;
 
 // We need extra temporary/scratch registers (in addition to AT) in some cases.
-static constexpr Register TMP = T8;
 static constexpr FRegister FTMP = F8;
 
-// ART Thread Register.
-static constexpr Register TR = S1;
-
 Location MipsReturnLocation(Primitive::Type return_type) {
   switch (return_type) {
     case Primitive::kPrimBoolean:
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index d4fcaf9..5e81c5f 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -16,13 +16,13 @@
 
 #include "code_generator_mips64.h"
 
+#include "art_method.h"
+#include "code_generator_utils.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "gc/accounting/card_table.h"
 #include "intrinsics.h"
 #include "intrinsics_mips64.h"
-#include "art_method.h"
-#include "code_generator_utils.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
 #include "offsets.h"
@@ -666,9 +666,19 @@
         gpr = destination.AsRegister<GpuRegister>();
       }
       if (dst_type == Primitive::kPrimInt || dst_type == Primitive::kPrimFloat) {
-        __ LoadConst32(gpr, GetInt32ValueOf(source.GetConstant()->AsConstant()));
+        int32_t value = GetInt32ValueOf(source.GetConstant()->AsConstant());
+        if (Primitive::IsFloatingPointType(dst_type) && value == 0) {
+          gpr = ZERO;
+        } else {
+          __ LoadConst32(gpr, value);
+        }
       } else {
-        __ LoadConst64(gpr, GetInt64ValueOf(source.GetConstant()->AsConstant()));
+        int64_t value = GetInt64ValueOf(source.GetConstant()->AsConstant());
+        if (Primitive::IsFloatingPointType(dst_type) && value == 0) {
+          gpr = ZERO;
+        } else {
+          __ LoadConst64(gpr, value);
+        }
       }
       if (dst_type == Primitive::kPrimFloat) {
         __ Mtc1(gpr, destination.AsFpuRegister<FpuRegister>());
@@ -734,12 +744,22 @@
       // Move to stack from constant
       HConstant* src_cst = source.GetConstant();
       StoreOperandType store_type = destination.IsStackSlot() ? kStoreWord : kStoreDoubleword;
+      GpuRegister gpr = ZERO;
       if (destination.IsStackSlot()) {
-        __ LoadConst32(TMP, GetInt32ValueOf(src_cst->AsConstant()));
+        int32_t value = GetInt32ValueOf(src_cst->AsConstant());
+        if (value != 0) {
+          gpr = TMP;
+          __ LoadConst32(gpr, value);
+        }
       } else {
-        __ LoadConst64(TMP, GetInt64ValueOf(src_cst->AsConstant()));
+        DCHECK(destination.IsDoubleStackSlot());
+        int64_t value = GetInt64ValueOf(src_cst->AsConstant());
+        if (value != 0) {
+          gpr = TMP;
+          __ LoadConst64(gpr, value);
+        }
       }
-      __ StoreToOffset(store_type, TMP, SP, destination.GetStackIndex());
+      __ StoreToOffset(store_type, gpr, SP, destination.GetStackIndex());
     } else {
       DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
       DCHECK_EQ(source.IsDoubleStackSlot(), destination.IsDoubleStackSlot());
@@ -755,9 +775,7 @@
   }
 }
 
-void CodeGeneratorMIPS64::SwapLocations(Location loc1,
-                                        Location loc2,
-                                        Primitive::Type type ATTRIBUTE_UNUSED) {
+void CodeGeneratorMIPS64::SwapLocations(Location loc1, Location loc2, Primitive::Type type) {
   DCHECK(!loc1.IsConstant());
   DCHECK(!loc2.IsConstant());
 
@@ -781,12 +799,16 @@
     // Swap 2 FPRs
     FpuRegister r1 = loc1.AsFpuRegister<FpuRegister>();
     FpuRegister r2 = loc2.AsFpuRegister<FpuRegister>();
-    // TODO: Can MOV.S/MOV.D be used here to save one instruction?
-    // Need to distinguish float from double, right?
-    __ Dmfc1(TMP, r2);
-    __ Dmfc1(AT, r1);
-    __ Dmtc1(TMP, r1);
-    __ Dmtc1(AT, r2);
+    if (type == Primitive::kPrimFloat) {
+      __ MovS(FTMP, r1);
+      __ MovS(r1, r2);
+      __ MovS(r2, FTMP);
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimDouble);
+      __ MovD(FTMP, r1);
+      __ MovD(r1, r2);
+      __ MovD(r2, FTMP);
+    }
   } else if (is_slot1 != is_slot2) {
     // Swap GPR/FPR and stack slot
     Location reg_loc = is_slot1 ? loc2 : loc1;
@@ -800,7 +822,6 @@
                           reg_loc.AsFpuRegister<FpuRegister>(),
                           SP,
                           mem_loc.GetStackIndex());
-      // TODO: review this MTC1/DMTC1 move
       if (mem_loc.IsStackSlot()) {
         __ Mtc1(TMP, reg_loc.AsFpuRegister<FpuRegister>());
       } else {
@@ -845,12 +866,22 @@
     } else {
       DCHECK(location.IsStackSlot() || location.IsDoubleStackSlot());
       // Move to stack from constant
+      GpuRegister gpr = ZERO;
       if (location.IsStackSlot()) {
-        __ LoadConst32(TMP, GetInt32ValueOf(instruction->AsConstant()));
-        __ StoreToOffset(kStoreWord, TMP, SP, location.GetStackIndex());
+        int32_t value = GetInt32ValueOf(instruction->AsConstant());
+        if (value != 0) {
+          gpr = TMP;
+          __ LoadConst32(gpr, value);
+        }
+        __ StoreToOffset(kStoreWord, gpr, SP, location.GetStackIndex());
       } else {
-        __ LoadConst64(TMP, instruction->AsLongConstant()->GetValue());
-        __ StoreToOffset(kStoreDoubleword, TMP, SP, location.GetStackIndex());
+        DCHECK(location.IsDoubleStackSlot());
+        int64_t value = instruction->AsLongConstant()->GetValue();
+        if (value != 0) {
+          gpr = TMP;
+          __ LoadConst64(gpr, value);
+        }
+        __ StoreToOffset(kStoreDoubleword, gpr, SP, location.GetStackIndex());
       }
     }
   } else if (instruction->IsTemporary()) {
@@ -1198,7 +1229,7 @@
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
-      locations->SetOut(Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
     default:
@@ -1707,7 +1738,7 @@
   switch (in_type) {
     case Primitive::kPrimLong:
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(compare->InputAt(1)));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
 
@@ -1736,8 +1767,18 @@
     case Primitive::kPrimLong: {
       GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
       GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
-      GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
-      // TODO: more efficient (direct) comparison with a constant
+      Location rhs_location = locations->InAt(1);
+      bool use_imm = rhs_location.IsConstant();
+      GpuRegister rhs = ZERO;
+      if (use_imm) {
+        int64_t value = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()->AsConstant());
+        if (value != 0) {
+          rhs = AT;
+          __ LoadConst64(rhs, value);
+        }
+      } else {
+        rhs = rhs_location.AsRegister<GpuRegister>();
+      }
       __ Slt(TMP, lhs, rhs);
       __ Slt(dst, rhs, lhs);
       __ Subu(dst, dst, TMP);
@@ -1902,6 +1943,252 @@
   }
 }
 
+void InstructionCodeGeneratorMIPS64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+  Primitive::Type type = instruction->GetResultType();
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+  DCHECK(second.IsConstant());
+
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+  GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>();
+  int64_t imm = Int64FromConstant(second.GetConstant());
+  DCHECK(imm == 1 || imm == -1);
+
+  if (instruction->IsRem()) {
+    __ Move(out, ZERO);
+  } else {
+    if (imm == -1) {
+      if (type == Primitive::kPrimInt) {
+        __ Subu(out, ZERO, dividend);
+      } else {
+        DCHECK_EQ(type, Primitive::kPrimLong);
+        __ Dsubu(out, ZERO, dividend);
+      }
+    } else if (out != dividend) {
+      __ Move(out, dividend);
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+  Primitive::Type type = instruction->GetResultType();
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+  DCHECK(second.IsConstant());
+
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+  GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>();
+  int64_t imm = Int64FromConstant(second.GetConstant());
+  uint64_t abs_imm = static_cast<uint64_t>(std::abs(imm));
+  DCHECK(IsPowerOfTwo(abs_imm));
+  int ctz_imm = CTZ(abs_imm);
+
+  if (instruction->IsDiv()) {
+    if (type == Primitive::kPrimInt) {
+      if (ctz_imm == 1) {
+        // Fast path for division by +/-2, which is very common.
+        __ Srl(TMP, dividend, 31);
+      } else {
+        __ Sra(TMP, dividend, 31);
+        __ Srl(TMP, TMP, 32 - ctz_imm);
+      }
+      __ Addu(out, dividend, TMP);
+      __ Sra(out, out, ctz_imm);
+      if (imm < 0) {
+        __ Subu(out, ZERO, out);
+      }
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimLong);
+      if (ctz_imm == 1) {
+        // Fast path for division by +/-2, which is very common.
+        __ Dsrl32(TMP, dividend, 31);
+      } else {
+        __ Dsra32(TMP, dividend, 31);
+        if (ctz_imm > 32) {
+          __ Dsrl(TMP, TMP, 64 - ctz_imm);
+        } else {
+          __ Dsrl32(TMP, TMP, 32 - ctz_imm);
+        }
+      }
+      __ Daddu(out, dividend, TMP);
+      if (ctz_imm < 32) {
+        __ Dsra(out, out, ctz_imm);
+      } else {
+        __ Dsra32(out, out, ctz_imm - 32);
+      }
+      if (imm < 0) {
+        __ Dsubu(out, ZERO, out);
+      }
+    }
+  } else {
+    if (type == Primitive::kPrimInt) {
+      if (ctz_imm == 1) {
+        // Fast path for modulo +/-2, which is very common.
+        __ Sra(TMP, dividend, 31);
+        __ Subu(out, dividend, TMP);
+        __ Andi(out, out, 1);
+        __ Addu(out, out, TMP);
+      } else {
+        __ Sra(TMP, dividend, 31);
+        __ Srl(TMP, TMP, 32 - ctz_imm);
+        __ Addu(out, dividend, TMP);
+        if (IsUint<16>(abs_imm - 1)) {
+          __ Andi(out, out, abs_imm - 1);
+        } else {
+          __ Sll(out, out, 32 - ctz_imm);
+          __ Srl(out, out, 32 - ctz_imm);
+        }
+        __ Subu(out, out, TMP);
+      }
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimLong);
+      if (ctz_imm == 1) {
+        // Fast path for modulo +/-2, which is very common.
+        __ Dsra32(TMP, dividend, 31);
+        __ Dsubu(out, dividend, TMP);
+        __ Andi(out, out, 1);
+        __ Daddu(out, out, TMP);
+      } else {
+        __ Dsra32(TMP, dividend, 31);
+        if (ctz_imm > 32) {
+          __ Dsrl(TMP, TMP, 64 - ctz_imm);
+        } else {
+          __ Dsrl32(TMP, TMP, 32 - ctz_imm);
+        }
+        __ Daddu(out, dividend, TMP);
+        if (IsUint<16>(abs_imm - 1)) {
+          __ Andi(out, out, abs_imm - 1);
+        } else {
+          if (ctz_imm > 32) {
+            __ Dsll(out, out, 64 - ctz_imm);
+            __ Dsrl(out, out, 64 - ctz_imm);
+          } else {
+            __ Dsll32(out, out, 32 - ctz_imm);
+            __ Dsrl32(out, out, 32 - ctz_imm);
+          }
+        }
+        __ Dsubu(out, out, TMP);
+      }
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+  DCHECK(second.IsConstant());
+
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+  GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>();
+  int64_t imm = Int64FromConstant(second.GetConstant());
+
+  Primitive::Type type = instruction->GetResultType();
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong) << type;
+
+  int64_t magic;
+  int shift;
+  CalculateMagicAndShiftForDivRem(imm,
+                                  (type == Primitive::kPrimLong),
+                                  &magic,
+                                  &shift);
+
+  if (type == Primitive::kPrimInt) {
+    __ LoadConst32(TMP, magic);
+    __ MuhR6(TMP, dividend, TMP);
+
+    if (imm > 0 && magic < 0) {
+      __ Addu(TMP, TMP, dividend);
+    } else if (imm < 0 && magic > 0) {
+      __ Subu(TMP, TMP, dividend);
+    }
+
+    if (shift != 0) {
+      __ Sra(TMP, TMP, shift);
+    }
+
+    if (instruction->IsDiv()) {
+      __ Sra(out, TMP, 31);
+      __ Subu(out, TMP, out);
+    } else {
+      __ Sra(AT, TMP, 31);
+      __ Subu(AT, TMP, AT);
+      __ LoadConst32(TMP, imm);
+      __ MulR6(TMP, AT, TMP);
+      __ Subu(out, dividend, TMP);
+    }
+  } else {
+    __ LoadConst64(TMP, magic);
+    __ Dmuh(TMP, dividend, TMP);
+
+    if (imm > 0 && magic < 0) {
+      __ Daddu(TMP, TMP, dividend);
+    } else if (imm < 0 && magic > 0) {
+      __ Dsubu(TMP, TMP, dividend);
+    }
+
+    if (shift >= 32) {
+      __ Dsra32(TMP, TMP, shift - 32);
+    } else if (shift > 0) {
+      __ Dsra(TMP, TMP, shift);
+    }
+
+    if (instruction->IsDiv()) {
+      __ Dsra32(out, TMP, 31);
+      __ Dsubu(out, TMP, out);
+    } else {
+      __ Dsra32(AT, TMP, 31);
+      __ Dsubu(AT, TMP, AT);
+      __ LoadConst64(TMP, imm);
+      __ Dmul(TMP, AT, TMP);
+      __ Dsubu(out, dividend, TMP);
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+  Primitive::Type type = instruction->GetResultType();
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong) << type;
+
+  LocationSummary* locations = instruction->GetLocations();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+  Location second = locations->InAt(1);
+
+  if (second.IsConstant()) {
+    int64_t imm = Int64FromConstant(second.GetConstant());
+    if (imm == 0) {
+      // Do not generate anything. DivZeroCheck would prevent any code to be executed.
+    } else if (imm == 1 || imm == -1) {
+      DivRemOneOrMinusOne(instruction);
+    } else if (IsPowerOfTwo(std::abs(imm))) {
+      DivRemByPowerOfTwo(instruction);
+    } else {
+      DCHECK(imm <= -2 || imm >= 2);
+      GenerateDivRemWithAnyConstant(instruction);
+    }
+  } else {
+    GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>();
+    GpuRegister divisor = second.AsRegister<GpuRegister>();
+    if (instruction->IsDiv()) {
+      if (type == Primitive::kPrimInt)
+        __ DivR6(out, dividend, divisor);
+      else
+        __ Ddiv(out, dividend, divisor);
+    } else {
+      if (type == Primitive::kPrimInt)
+        __ ModR6(out, dividend, divisor);
+      else
+        __ Dmod(out, dividend, divisor);
+    }
+  }
+}
+
 void LocationsBuilderMIPS64::VisitDiv(HDiv* div) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
@@ -1909,7 +2196,7 @@
     case Primitive::kPrimInt:
     case Primitive::kPrimLong:
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
 
@@ -1931,16 +2218,9 @@
 
   switch (type) {
     case Primitive::kPrimInt:
-    case Primitive::kPrimLong: {
-      GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
-      GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
-      GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
-      if (type == Primitive::kPrimInt)
-        __ DivR6(dst, lhs, rhs);
-      else
-        __ Ddiv(dst, lhs, rhs);
+    case Primitive::kPrimLong:
+      GenerateDivRemIntegral(instruction);
       break;
-    }
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
@@ -3112,7 +3392,7 @@
     case Primitive::kPrimInt:
     case Primitive::kPrimLong:
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
 
@@ -3132,20 +3412,12 @@
 
 void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) {
   Primitive::Type type = instruction->GetType();
-  LocationSummary* locations = instruction->GetLocations();
 
   switch (type) {
     case Primitive::kPrimInt:
-    case Primitive::kPrimLong: {
-      GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
-      GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
-      GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
-      if (type == Primitive::kPrimInt)
-        __ ModR6(dst, lhs, rhs);
-      else
-        __ Dmod(dst, lhs, rhs);
+    case Primitive::kPrimLong:
+      GenerateDivRemIntegral(instruction);
       break;
-    }
 
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 4f91c71..58c6e0f 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -230,6 +230,10 @@
                              Label* true_target,
                              Label* false_target,
                              Label* always_true_target);
+  void DivRemOneOrMinusOne(HBinaryOperation* instruction);
+  void DivRemByPowerOfTwo(HBinaryOperation* instruction);
+  void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
+  void GenerateDivRemIntegral(HBinaryOperation* instruction);
   void HandleGoto(HInstruction* got, HBasicBlock* successor);
 
   Mips64Assembler* const assembler_;
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 0db5837..8308d9e 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -4062,16 +4062,16 @@
     // Ensure the value is in a byte register.
     locations->SetInAt(1, Location::RegisterLocation(EAX));
   } else if (Primitive::IsFloatingPointType(field_type)) {
-    locations->SetInAt(1, Location::RequiresFpuRegister());
-  } else {
+    if (is_volatile && field_type == Primitive::kPrimDouble) {
+      // In order to satisfy the semantics of volatile, this must be a single instruction store.
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+    } else {
+      locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
+    }
+  } else if (is_volatile && field_type == Primitive::kPrimLong) {
+    // In order to satisfy the semantics of volatile, this must be a single instruction store.
     locations->SetInAt(1, Location::RequiresRegister());
-  }
-  if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
-    // Temporary registers for the write barrier.
-    locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
-    // Ensure the card is in a byte register.
-    locations->AddTemp(Location::RegisterLocation(ECX));
-  } else if (is_volatile && (field_type == Primitive::kPrimLong)) {
+
     // 64bits value can be atomically written to an address with movsd and an XMM register.
     // We need two XMM registers because there's no easier way to (bit) copy a register pair
     // into a single XMM register (we copy each pair part into the XMMs and then interleave them).
@@ -4079,6 +4079,15 @@
     // isolated cases when we need this it isn't worth adding the extra complexity.
     locations->AddTemp(Location::RequiresFpuRegister());
     locations->AddTemp(Location::RequiresFpuRegister());
+  } else {
+    locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+
+    if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
+      // Temporary registers for the write barrier.
+      locations->AddTemp(Location::RequiresRegister());  // May be used for reference poisoning too.
+      // Ensure the card is in a byte register.
+      locations->AddTemp(Location::RegisterLocation(ECX));
+    }
   }
 }
 
@@ -4100,6 +4109,8 @@
     GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
   }
 
+  bool maybe_record_implicit_null_check_done = false;
+
   switch (field_type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte: {
@@ -4109,7 +4120,12 @@
 
     case Primitive::kPrimShort:
     case Primitive::kPrimChar: {
-      __ movw(Address(base, offset), value.AsRegister<Register>());
+      if (value.IsConstant()) {
+        int16_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+        __ movw(Address(base, offset), Immediate(v));
+      } else {
+        __ movw(Address(base, offset), value.AsRegister<Register>());
+      }
       break;
     }
 
@@ -4124,6 +4140,9 @@
         __ movl(temp, value.AsRegister<Register>());
         __ PoisonHeapReference(temp);
         __ movl(Address(base, offset), temp);
+      } else if (value.IsConstant()) {
+        int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+        __ movl(Address(base, offset), Immediate(v));
       } else {
         __ movl(Address(base, offset), value.AsRegister<Register>());
       }
@@ -4139,21 +4158,40 @@
         __ punpckldq(temp1, temp2);
         __ movsd(Address(base, offset), temp1);
         codegen_->MaybeRecordImplicitNullCheck(instruction);
+      } else if (value.IsConstant()) {
+        int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
+        __ movl(Address(base, offset), Immediate(Low32Bits(v)));
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v)));
       } else {
         __ movl(Address(base, offset), value.AsRegisterPairLow<Register>());
         codegen_->MaybeRecordImplicitNullCheck(instruction);
         __ movl(Address(base, kX86WordSize + offset), value.AsRegisterPairHigh<Register>());
       }
+      maybe_record_implicit_null_check_done = true;
       break;
     }
 
     case Primitive::kPrimFloat: {
-      __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+      if (value.IsConstant()) {
+        int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+        __ movl(Address(base, offset), Immediate(v));
+      } else {
+        __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+      }
       break;
     }
 
     case Primitive::kPrimDouble: {
-      __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+      if (value.IsConstant()) {
+        int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
+        __ movl(Address(base, offset), Immediate(Low32Bits(v)));
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v)));
+        maybe_record_implicit_null_check_done = true;
+      } else {
+        __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+      }
       break;
     }
 
@@ -4162,8 +4200,7 @@
       UNREACHABLE();
   }
 
-  // Longs are handled in the switch.
-  if (field_type != Primitive::kPrimLong) {
+  if (!maybe_record_implicit_null_check_done) {
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 
@@ -4500,7 +4537,7 @@
     // Ensure the value is in a byte register.
     locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
   } else if (Primitive::IsFloatingPointType(value_type)) {
-    locations->SetInAt(2, Location::RequiresFpuRegister());
+    locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
   } else {
     locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
   }
@@ -4686,8 +4723,14 @@
       Address address = index.IsConstant()
           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
           : Address(array, index.AsRegister<Register>(), TIMES_4, offset);
-      DCHECK(value.IsFpuRegister());
-      __ movss(address, value.AsFpuRegister<XmmRegister>());
+      if (value.IsFpuRegister()) {
+        __ movss(address, value.AsFpuRegister<XmmRegister>());
+      } else {
+        DCHECK(value.IsConstant());
+        int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
+        __ movl(address, Immediate(v));
+      }
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
     }
 
@@ -4696,8 +4739,19 @@
       Address address = index.IsConstant()
           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
           : Address(array, index.AsRegister<Register>(), TIMES_8, offset);
-      DCHECK(value.IsFpuRegister());
-      __ movsd(address, value.AsFpuRegister<XmmRegister>());
+      if (value.IsFpuRegister()) {
+        __ movsd(address, value.AsFpuRegister<XmmRegister>());
+      } else {
+        DCHECK(value.IsConstant());
+        Address address_hi = index.IsConstant() ?
+            Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) +
+                           offset + kX86WordSize) :
+            Address(array, index.AsRegister<Register>(), TIMES_8, offset + kX86WordSize);
+        int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
+        __ movl(address, Immediate(Low32Bits(v)));
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        __ movl(address_hi, Immediate(High32Bits(v)));
+      }
       break;
     }
 
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 3de96b5..c32ef51 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -188,6 +188,21 @@
   VisitInstruction(try_boundary);
 }
 
+void GraphChecker::VisitLoadException(HLoadException* load) {
+  // Ensure that LoadException is the first instruction in a catch block.
+  if (!load->GetBlock()->IsCatchBlock()) {
+    AddError(StringPrintf("%s:%d is in a non-catch block %d.",
+                          load->DebugName(),
+                          load->GetId(),
+                          load->GetBlock()->GetBlockId()));
+  } else if (load->GetBlock()->GetFirstInstruction() != load) {
+    AddError(StringPrintf("%s:%d is not the first instruction in catch block %d.",
+                          load->DebugName(),
+                          load->GetId(),
+                          load->GetBlock()->GetBlockId()));
+  }
+}
+
 void GraphChecker::VisitInstruction(HInstruction* instruction) {
   if (seen_ids_.IsBitSet(instruction->GetId())) {
     AddError(StringPrintf("Instruction id %d is duplicate in graph.",
diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h
index abf3659..d5ddbab 100644
--- a/compiler/optimizing/graph_checker.h
+++ b/compiler/optimizing/graph_checker.h
@@ -50,6 +50,9 @@
   // Check successors of blocks ending in TryBoundary.
   void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE;
 
+  // Check that LoadException is the first instruction in a catch block.
+  void VisitLoadException(HLoadException* load) OVERRIDE;
+
   // Check that HCheckCast and HInstanceOf have HLoadClass as second input.
   void VisitCheckCast(HCheckCast* check) OVERRIDE;
   void VisitInstanceOf(HInstanceOf* check) OVERRIDE;
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index dbe7524..b01324e 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -89,10 +89,7 @@
   }
 }
 
-static Intrinsics GetIntrinsic(InlineMethod method, InstructionSet instruction_set) {
-  if (instruction_set == kMips) {
-    return Intrinsics::kNone;
-  }
+static Intrinsics GetIntrinsic(InlineMethod method) {
   switch (method.opcode) {
     // Floating-point conversions.
     case kIntrinsicDoubleCvt:
@@ -431,7 +428,7 @@
         DexFileMethodInliner* inliner = driver_->GetMethodInlinerMap()->GetMethodInliner(&dex_file);
         DCHECK(inliner != nullptr);
         if (inliner->IsIntrinsic(invoke->GetDexMethodIndex(), &method)) {
-          Intrinsics intrinsic = GetIntrinsic(method, graph_->GetInstructionSet());
+          Intrinsics intrinsic = GetIntrinsic(method);
 
           if (intrinsic != Intrinsics::kNone) {
             if (!CheckInvokeType(intrinsic, invoke, dex_file)) {
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 5efcf4e..a94e3a8 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -138,6 +138,108 @@
 
 #define __ assembler->
 
+// boolean java.lang.String.equals(Object anObject)
+void IntrinsicLocationsBuilderMIPS::VisitStringEquals(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+
+  // Temporary registers to store lengths of strings and for calculations.
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitStringEquals(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register str = locations->InAt(0).AsRegister<Register>();
+  Register arg = locations->InAt(1).AsRegister<Register>();
+  Register out = locations->Out().AsRegister<Register>();
+
+  Register temp1 = locations->GetTemp(0).AsRegister<Register>();
+  Register temp2 = locations->GetTemp(1).AsRegister<Register>();
+  Register temp3 = locations->GetTemp(2).AsRegister<Register>();
+
+  MipsLabel loop;
+  MipsLabel end;
+  MipsLabel return_true;
+  MipsLabel return_false;
+
+  // Get offsets of count, value, and class fields within a string object.
+  const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+  const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
+  const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  // If the register containing the pointer to "this", and the register
+  // containing the pointer to "anObject" are the same register then
+  // "this", and "anObject" are the same object and we can
+  // short-circuit the logic to a true result.
+  if (str == arg) {
+    __ LoadConst32(out, 1);
+    return;
+  }
+
+  // Check if input is null, return false if it is.
+  __ Beqz(arg, &return_false);
+
+  // Reference equality check, return true if same reference.
+  __ Beq(str, arg, &return_true);
+
+  // Instanceof check for the argument by comparing class fields.
+  // All string objects must have the same type since String cannot be subclassed.
+  // Receiver must be a string object, so its class field is equal to all strings' class fields.
+  // If the argument is a string object, its class field must be equal to receiver's class field.
+  __ Lw(temp1, str, class_offset);
+  __ Lw(temp2, arg, class_offset);
+  __ Bne(temp1, temp2, &return_false);
+
+  // Load lengths of this and argument strings.
+  __ Lw(temp1, str, count_offset);
+  __ Lw(temp2, arg, count_offset);
+  // Check if lengths are equal, return false if they're not.
+  __ Bne(temp1, temp2, &return_false);
+  // Return true if both strings are empty.
+  __ Beqz(temp1, &return_true);
+
+  // Don't overwrite input registers
+  __ Move(TMP, str);
+  __ Move(temp3, arg);
+
+  // Assertions that must hold in order to compare strings 2 characters at a time.
+  DCHECK_ALIGNED(value_offset, 4);
+  static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
+
+  // Loop to compare strings 2 characters at a time starting at the beginning of the string.
+  // Ok to do this because strings are zero-padded.
+  __ Bind(&loop);
+  __ Lw(out, TMP, value_offset);
+  __ Lw(temp2, temp3, value_offset);
+  __ Bne(out, temp2, &return_false);
+  __ Addiu(TMP, TMP, 4);
+  __ Addiu(temp3, temp3, 4);
+  __ Addiu(temp1, temp1, -2);
+  __ Bgtz(temp1, &loop);
+
+  // Return true and exit the function.
+  // If loop does not result in returning false, we return true.
+  __ Bind(&return_true);
+  __ LoadConst32(out, 1);
+  __ B(&end);
+
+  // Return false and exit the function.
+  __ Bind(&return_false);
+  __ LoadConst32(out, 0);
+  __ Bind(&end);
+}
+
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                  \
@@ -204,7 +306,6 @@
 UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
 UNIMPLEMENTED_INTRINSIC(StringCharAt)
 UNIMPLEMENTED_INTRINSIC(StringCompareTo)
-UNIMPLEMENTED_INTRINSIC(StringEquals)
 UNIMPLEMENTED_INTRINSIC(StringIndexOf)
 UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
 UNIMPLEMENTED_INTRINSIC(StringNewStringFromBytes)
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 68fb0ac..af3d8f4 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -335,14 +335,24 @@
       // instructions into `normal_block` and links the two blocks with a Goto.
       // Afterwards, incoming normal-flow edges are re-linked to `normal_block`,
       // leaving `catch_block` with the exceptional edges only.
+      //
       // Note that catch blocks with normal-flow predecessors cannot begin with
-      // a MOVE_EXCEPTION instruction, as guaranteed by the verifier.
-      DCHECK(!catch_block->GetFirstInstruction()->IsLoadException());
-      HBasicBlock* normal_block = catch_block->SplitBefore(catch_block->GetFirstInstruction());
-      for (size_t j = 0; j < catch_block->GetPredecessors().size(); ++j) {
-        if (!CheckIfPredecessorAtIsExceptional(*catch_block, j)) {
-          catch_block->GetPredecessors()[j]->ReplaceSuccessor(catch_block, normal_block);
-          --j;
+      // a move-exception instruction, as guaranteed by the verifier. However,
+      // trivially dead predecessors are ignored by the verifier and such code
+      // has not been removed at this stage. We therefore ignore the assumption
+      // and rely on GraphChecker to enforce it after initial DCE is run (b/25492628).
+      HBasicBlock* normal_block = catch_block->SplitCatchBlockAfterMoveException();
+      if (normal_block == nullptr) {
+        // Catch block is either empty or only contains a move-exception. It must
+        // therefore be dead and will be removed during initial DCE. Do nothing.
+        DCHECK(!catch_block->EndsWithControlFlowInstruction());
+      } else {
+        // Catch block was split. Re-link normal-flow edges to the new block.
+        for (size_t j = 0; j < catch_block->GetPredecessors().size(); ++j) {
+          if (!CheckIfPredecessorAtIsExceptional(*catch_block, j)) {
+            catch_block->GetPredecessors()[j]->ReplaceSuccessor(catch_block, normal_block);
+            --j;
+          }
         }
       }
     }
@@ -1163,7 +1173,7 @@
 }
 
 HBasicBlock* HBasicBlock::SplitBefore(HInstruction* cursor) {
-  DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented";
+  DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented.";
   DCHECK_EQ(cursor->GetBlock(), this);
 
   HBasicBlock* new_block = new (GetGraph()->GetArena()) HBasicBlock(GetGraph(),
@@ -1193,7 +1203,7 @@
 }
 
 HBasicBlock* HBasicBlock::CreateImmediateDominator() {
-  DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented";
+  DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented.";
   DCHECK(!IsCatchBlock()) << "Support for updating try/catch information not implemented.";
 
   HBasicBlock* new_block = new (GetGraph()->GetArena()) HBasicBlock(GetGraph(), GetDexPc());
@@ -1209,6 +1219,34 @@
   return new_block;
 }
 
+HBasicBlock* HBasicBlock::SplitCatchBlockAfterMoveException() {
+  DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented.";
+  DCHECK(IsCatchBlock()) << "This method is intended for catch blocks only.";
+
+  HInstruction* first_insn = GetFirstInstruction();
+  HInstruction* split_before = nullptr;
+
+  if (first_insn != nullptr && first_insn->IsLoadException()) {
+    // Catch block starts with a LoadException. Split the block after
+    // the StoreLocal and ClearException which must come after the load.
+    DCHECK(first_insn->GetNext()->IsStoreLocal());
+    DCHECK(first_insn->GetNext()->GetNext()->IsClearException());
+    split_before = first_insn->GetNext()->GetNext()->GetNext();
+  } else {
+    // Catch block does not load the exception. Split at the beginning
+    // to create an empty catch block.
+    split_before = first_insn;
+  }
+
+  if (split_before == nullptr) {
+    // Catch block has no instructions after the split point (must be dead).
+    // Do not split it but rather signal error by returning nullptr.
+    return nullptr;
+  } else {
+    return SplitBefore(split_before);
+  }
+}
+
 HBasicBlock* HBasicBlock::SplitAfter(HInstruction* cursor) {
   DCHECK(!cursor->IsControlFlow());
   DCHECK_NE(instructions_.last_instruction_, cursor);
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 0f2c1cf..ddd39a3 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -837,6 +837,15 @@
   // blocks are consistent (for example ending with a control flow instruction).
   HBasicBlock* SplitAfter(HInstruction* cursor);
 
+  // Split catch block into two blocks after the original move-exception bytecode
+  // instruction, or at the beginning if not present. Returns the newly created,
+  // latter block, or nullptr if such block could not be created (must be dead
+  // in that case). Note that this method just updates raw block information,
+  // like predecessors, successors, dominators, and instruction list. It does not
+  // update the graph, reverse post order, loop information, nor make sure the
+  // blocks are consistent (for example ending with a control flow instruction).
+  HBasicBlock* SplitCatchBlockAfterMoveException();
+
   // Merge `other` at the end of `this`. Successors and dominated blocks of
   // `other` are changed to be successors and dominated blocks of `this`. Note
   // that this method does not update the graph, reverse post order, loop
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 00e8995..ba2525e 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -117,14 +117,6 @@
   Emit(encoding);
 }
 
-void Mips64Assembler::Add(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  EmitR(0, rs, rt, rd, 0, 0x20);
-}
-
-void Mips64Assembler::Addi(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
-  EmitI(0x8, rs, rt, imm16);
-}
-
 void Mips64Assembler::Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, rd, 0, 0x21);
 }
@@ -141,10 +133,6 @@
   EmitI(0x19, rs, rt, imm16);
 }
 
-void Mips64Assembler::Sub(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  EmitR(0, rs, rt, rd, 0, 0x22);
-}
-
 void Mips64Assembler::Subu(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, rd, 0, 0x23);
 }
@@ -153,50 +141,14 @@
   EmitR(0, rs, rt, rd, 0, 0x2f);
 }
 
-void Mips64Assembler::MultR2(GpuRegister rs, GpuRegister rt) {
-  EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x18);
-}
-
-void Mips64Assembler::MultuR2(GpuRegister rs, GpuRegister rt) {
-  EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x19);
-}
-
-void Mips64Assembler::DivR2(GpuRegister rs, GpuRegister rt) {
-  EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x1a);
-}
-
-void Mips64Assembler::DivuR2(GpuRegister rs, GpuRegister rt) {
-  EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x1b);
-}
-
-void Mips64Assembler::MulR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  EmitR(0x1c, rs, rt, rd, 0, 2);
-}
-
-void Mips64Assembler::DivR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  DivR2(rs, rt);
-  Mflo(rd);
-}
-
-void Mips64Assembler::ModR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  DivR2(rs, rt);
-  Mfhi(rd);
-}
-
-void Mips64Assembler::DivuR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  DivuR2(rs, rt);
-  Mflo(rd);
-}
-
-void Mips64Assembler::ModuR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  DivuR2(rs, rt);
-  Mfhi(rd);
-}
-
 void Mips64Assembler::MulR6(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, rd, 2, 0x18);
 }
 
+void Mips64Assembler::MuhR6(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 3, 0x18);
+}
+
 void Mips64Assembler::DivR6(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, rd, 2, 0x1a);
 }
@@ -217,6 +169,10 @@
   EmitR(0, rs, rt, rd, 2, 0x1c);
 }
 
+void Mips64Assembler::Dmuh(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 3, 0x1c);
+}
+
 void Mips64Assembler::Ddiv(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, rd, 2, 0x1e);
 }
@@ -440,14 +396,6 @@
            static_cast<GpuRegister>(0), stype & 0x1f, 0xf);
 }
 
-void Mips64Assembler::Mfhi(GpuRegister rd) {
-  EmitR(0, static_cast<GpuRegister>(0), static_cast<GpuRegister>(0), rd, 0, 0x10);
-}
-
-void Mips64Assembler::Mflo(GpuRegister rd) {
-  EmitR(0, static_cast<GpuRegister>(0), static_cast<GpuRegister>(0), rd, 0, 0x12);
-}
-
 void Mips64Assembler::Sb(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
   EmitI(0x28, rs, rt, imm16);
 }
@@ -892,45 +840,58 @@
   } else if ((value & 0xFFFF) == 0 && ((value >> 31) & 0x1FFFF) == ((0x20000 - bit31) & 0x1FFFF)) {
     Lui(rd, value >> 16);
     Dati(rd, (value >> 48) + bit31);
+  } else if (IsPowerOfTwo(value + UINT64_C(1))) {
+    int shift_cnt = 64 - CTZ(value + UINT64_C(1));
+    Daddiu(rd, ZERO, -1);
+    if (shift_cnt < 32) {
+      Dsrl(rd, rd, shift_cnt);
+    } else {
+      Dsrl32(rd, rd, shift_cnt & 31);
+    }
   } else {
     int shift_cnt = CTZ(value);
     int64_t tmp = value >> shift_cnt;
     if (IsUint<16>(tmp)) {
       Ori(rd, ZERO, tmp);
-      if (shift_cnt < 32)
+      if (shift_cnt < 32) {
         Dsll(rd, rd, shift_cnt);
-      else
+      } else {
         Dsll32(rd, rd, shift_cnt & 31);
+      }
     } else if (IsInt<16>(tmp)) {
       Daddiu(rd, ZERO, tmp);
-      if (shift_cnt < 32)
+      if (shift_cnt < 32) {
         Dsll(rd, rd, shift_cnt);
-      else
+      } else {
         Dsll32(rd, rd, shift_cnt & 31);
+      }
     } else if (IsInt<32>(tmp)) {
       // Loads with 3 instructions.
       Lui(rd, tmp >> 16);
       Ori(rd, rd, tmp);
-      if (shift_cnt < 32)
+      if (shift_cnt < 32) {
         Dsll(rd, rd, shift_cnt);
-      else
+      } else {
         Dsll32(rd, rd, shift_cnt & 31);
+      }
     } else {
       shift_cnt = 16 + CTZ(value >> 16);
       tmp = value >> shift_cnt;
       if (IsUint<16>(tmp)) {
         Ori(rd, ZERO, tmp);
-        if (shift_cnt < 32)
+        if (shift_cnt < 32) {
           Dsll(rd, rd, shift_cnt);
-        else
+        } else {
           Dsll32(rd, rd, shift_cnt & 31);
+        }
         Ori(rd, rd, value);
       } else if (IsInt<16>(tmp)) {
         Daddiu(rd, ZERO, tmp);
-        if (shift_cnt < 32)
+        if (shift_cnt < 32) {
           Dsll(rd, rd, shift_cnt);
-        else
+        } else {
           Dsll32(rd, rd, shift_cnt & 31);
+        }
         Ori(rd, rd, value);
       } else {
         // Loads with 3-4 instructions.
@@ -941,10 +902,11 @@
           used_lui = true;
         }
         if ((tmp2 & 0xFFFF) != 0) {
-          if (used_lui)
+          if (used_lui) {
             Ori(rd, rd, tmp2);
-          else
+          } else {
             Ori(rd, ZERO, tmp2);
+          }
         }
         if (bit31) {
           tmp2 += UINT64_C(0x100000000);
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 33f22d2..42962bc 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -66,35 +66,25 @@
   virtual ~Mips64Assembler() {}
 
   // Emit Machine Instructions.
-  void Add(GpuRegister rd, GpuRegister rs, GpuRegister rt);
-  void Addi(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt);
   void Addiu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Daddu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
   void Daddiu(GpuRegister rt, GpuRegister rs, uint16_t imm16);  // MIPS64
-  void Sub(GpuRegister rd, GpuRegister rs, GpuRegister rt);
   void Subu(GpuRegister rd, GpuRegister rs, GpuRegister rt);
   void Dsubu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
 
-  void MultR2(GpuRegister rs, GpuRegister rt);  // R2
-  void MultuR2(GpuRegister rs, GpuRegister rt);  // R2
-  void DivR2(GpuRegister rs, GpuRegister rt);  // R2
-  void DivuR2(GpuRegister rs, GpuRegister rt);  // R2
-  void MulR2(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R2
-  void DivR2(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R2
-  void ModR2(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R2
-  void DivuR2(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R2
-  void ModuR2(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R2
-  void MulR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R6
-  void DivR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R6
-  void ModR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R6
-  void DivuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R6
-  void ModuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R6
-  void Dmul(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64 R6
-  void Ddiv(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64 R6
-  void Dmod(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64 R6
-  void Ddivu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64 R6
-  void Dmodu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64 R6
+  void MulR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void MuhR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void DivR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void ModR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void DivuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void ModuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void Dmul(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
+  void Dmuh(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
+  void Ddiv(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
+  void Dmod(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
+  void Ddivu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
+  void Dmodu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
 
   void And(GpuRegister rd, GpuRegister rs, GpuRegister rt);
   void Andi(GpuRegister rt, GpuRegister rs, uint16_t imm16);
@@ -104,12 +94,12 @@
   void Xori(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Nor(GpuRegister rd, GpuRegister rs, GpuRegister rt);
 
-  void Bitswap(GpuRegister rd, GpuRegister rt);  // R6
-  void Dbitswap(GpuRegister rd, GpuRegister rt);  // R6
-  void Seb(GpuRegister rd, GpuRegister rt);  // R2+
-  void Seh(GpuRegister rd, GpuRegister rt);  // R2+
-  void Dsbh(GpuRegister rd, GpuRegister rt);  // R2+
-  void Dshd(GpuRegister rd, GpuRegister rt);  // R2+
+  void Bitswap(GpuRegister rd, GpuRegister rt);
+  void Dbitswap(GpuRegister rd, GpuRegister rt);
+  void Seb(GpuRegister rd, GpuRegister rt);
+  void Seh(GpuRegister rd, GpuRegister rt);
+  void Dsbh(GpuRegister rd, GpuRegister rt);
+  void Dshd(GpuRegister rd, GpuRegister rt);
   void Dext(GpuRegister rs, GpuRegister rt, int pos, int size_less_one);  // MIPS64
   void Wsbh(GpuRegister rd, GpuRegister rt);
   void Sc(GpuRegister rt, GpuRegister base, int16_t imm9 = 0);
@@ -146,11 +136,9 @@
   void Lhu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Lwu(GpuRegister rt, GpuRegister rs, uint16_t imm16);  // MIPS64
   void Lui(GpuRegister rt, uint16_t imm16);
-  void Dahi(GpuRegister rs, uint16_t imm16);  // MIPS64 R6
-  void Dati(GpuRegister rs, uint16_t imm16);  // MIPS64 R6
+  void Dahi(GpuRegister rs, uint16_t imm16);  // MIPS64
+  void Dati(GpuRegister rs, uint16_t imm16);  // MIPS64
   void Sync(uint32_t stype);
-  void Mfhi(GpuRegister rd);  // R2
-  void Mflo(GpuRegister rd);  // R2
 
   void Sb(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Sh(GpuRegister rt, GpuRegister rs, uint16_t imm16);
@@ -175,21 +163,21 @@
   void Jalr(GpuRegister rd, GpuRegister rs);
   void Jalr(GpuRegister rs);
   void Jr(GpuRegister rs);
-  void Auipc(GpuRegister rs, uint16_t imm16);  // R6
-  void Jic(GpuRegister rt, uint16_t imm16);  // R6
-  void Jialc(GpuRegister rt, uint16_t imm16);  // R6
-  void Bltc(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
-  void Bltzc(GpuRegister rt, uint16_t imm16);  // R6
-  void Bgtzc(GpuRegister rt, uint16_t imm16);  // R6
-  void Bgec(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
-  void Bgezc(GpuRegister rt, uint16_t imm16);  // R6
-  void Blezc(GpuRegister rt, uint16_t imm16);  // R6
-  void Bltuc(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
-  void Bgeuc(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
-  void Beqc(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
-  void Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
-  void Beqzc(GpuRegister rs, uint32_t imm21);  // R6
-  void Bnezc(GpuRegister rs, uint32_t imm21);  // R6
+  void Auipc(GpuRegister rs, uint16_t imm16);
+  void Jic(GpuRegister rt, uint16_t imm16);
+  void Jialc(GpuRegister rt, uint16_t imm16);
+  void Bltc(GpuRegister rs, GpuRegister rt, uint16_t imm16);
+  void Bltzc(GpuRegister rt, uint16_t imm16);
+  void Bgtzc(GpuRegister rt, uint16_t imm16);
+  void Bgec(GpuRegister rs, GpuRegister rt, uint16_t imm16);
+  void Bgezc(GpuRegister rt, uint16_t imm16);
+  void Blezc(GpuRegister rt, uint16_t imm16);
+  void Bltuc(GpuRegister rs, GpuRegister rt, uint16_t imm16);
+  void Bgeuc(GpuRegister rs, GpuRegister rt, uint16_t imm16);
+  void Beqc(GpuRegister rs, GpuRegister rt, uint16_t imm16);
+  void Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16);
+  void Beqzc(GpuRegister rs, uint32_t imm21);
+  void Bnezc(GpuRegister rs, uint32_t imm21);
 
   void AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
   void SubS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
@@ -259,25 +247,25 @@
   void Addiu32(GpuRegister rt, GpuRegister rs, int32_t value, GpuRegister rtmp = AT);
   void Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp = AT);  // MIPS64
 
-  void Bind(Label* label) OVERRIDE;  // R6
+  void Bind(Label* label) OVERRIDE;
   void Jump(Label* label) OVERRIDE {
     B(label);
   }
-  void B(Label* label);  // R6
-  void Jalr(Label* label, GpuRegister indirect_reg = RA);  // R6
+  void B(Label* label);
+  void Jalr(Label* label, GpuRegister indirect_reg = RA);
   // TODO: implement common for R6 and non-R6 interface for conditional branches?
-  void Bltc(GpuRegister rs, GpuRegister rt, Label* label);  // R6
-  void Bltzc(GpuRegister rt, Label* label);  // R6
-  void Bgtzc(GpuRegister rt, Label* label);  // R6
-  void Bgec(GpuRegister rs, GpuRegister rt, Label* label);  // R6
-  void Bgezc(GpuRegister rt, Label* label);  // R6
-  void Blezc(GpuRegister rt, Label* label);  // R6
-  void Bltuc(GpuRegister rs, GpuRegister rt, Label* label);  // R6
-  void Bgeuc(GpuRegister rs, GpuRegister rt, Label* label);  // R6
-  void Beqc(GpuRegister rs, GpuRegister rt, Label* label);  // R6
-  void Bnec(GpuRegister rs, GpuRegister rt, Label* label);  // R6
-  void Beqzc(GpuRegister rs, Label* label);  // R6
-  void Bnezc(GpuRegister rs, Label* label);  // R6
+  void Bltc(GpuRegister rs, GpuRegister rt, Label* label);
+  void Bltzc(GpuRegister rt, Label* label);
+  void Bgtzc(GpuRegister rt, Label* label);
+  void Bgec(GpuRegister rs, GpuRegister rt, Label* label);
+  void Bgezc(GpuRegister rt, Label* label);
+  void Blezc(GpuRegister rt, Label* label);
+  void Bltuc(GpuRegister rs, GpuRegister rt, Label* label);
+  void Bgeuc(GpuRegister rs, GpuRegister rt, Label* label);
+  void Beqc(GpuRegister rs, GpuRegister rt, Label* label);
+  void Bnec(GpuRegister rs, GpuRegister rt, Label* label);
+  void Beqzc(GpuRegister rs, Label* label);
+  void Bnezc(GpuRegister rs, Label* label);
 
   void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size);
   void LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset);
diff --git a/runtime/arch/mips/registers_mips.h b/runtime/arch/mips/registers_mips.h
index 0f784ed..1096af0 100644
--- a/runtime/arch/mips/registers_mips.h
+++ b/runtime/arch/mips/registers_mips.h
@@ -59,6 +59,8 @@
   SP   = 29,  // Stack pointer.
   FP   = 30,  // Saved value/frame pointer.
   RA   = 31,  // Return address.
+  TR   = S1,  // ART Thread Register
+  TMP  = T8,  // scratch register (in addition to AT)
   kNumberOfCoreRegisters = 32,
   kNoRegister = -1  // Signals an illegal register.
 };
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 514d3cc..5dac95d 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -153,7 +153,7 @@
   self->AssertPendingException();
 }
 
-void ClassLinker::ThrowEarlierClassFailure(mirror::Class* c) {
+void ClassLinker::ThrowEarlierClassFailure(mirror::Class* c, bool wrap_in_no_class_def) {
   // The class failed to initialize on a previous attempt, so we want to throw
   // a NoClassDefFoundError (v2 2.17.5).  The exception to this rule is if we
   // failed in verification, in which case v2 5.4.1 says we need to re-throw
@@ -178,10 +178,15 @@
     self->SetException(pre_allocated);
   } else {
     if (c->GetVerifyError() != nullptr) {
+      // Rethrow stored error.
       HandleEarlierVerifyError(self, this, c);
-    } else {
-      self->ThrowNewException("Ljava/lang/NoClassDefFoundError;",
-                              PrettyDescriptor(c).c_str());
+    }
+    if (c->GetVerifyError() == nullptr || wrap_in_no_class_def) {
+      // If there isn't a recorded earlier error, or this is a repeat throw from initialization,
+      // the top-level exception must be a NoClassDefFoundError. The potentially already pending
+      // exception will be a cause.
+      self->ThrowNewWrappedException("Ljava/lang/NoClassDefFoundError;",
+                                     PrettyDescriptor(c).c_str());
     }
   }
 }
@@ -3430,7 +3435,7 @@
 
     // Was the class already found to be erroneous? Done under the lock to match the JLS.
     if (klass->IsErroneous()) {
-      ThrowEarlierClassFailure(klass.Get());
+      ThrowEarlierClassFailure(klass.Get(), true);
       VlogClassInitializationFailure(klass);
       return false;
     }
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 73f9d4b..a35ba3e 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -848,9 +848,7 @@
 
   // Throw the class initialization failure recorded when first trying to initialize the given
   // class.
-  // Note: Currently we only store the descriptor, so we cannot throw the exact throwable, only
-  //       a recreation with a custom string.
-  void ThrowEarlierClassFailure(mirror::Class* c)
+  void ThrowEarlierClassFailure(mirror::Class* c, bool wrap_in_no_class_def = false)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
 
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index a25d003..13d0b84 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -4069,13 +4069,15 @@
   if (is_constructor) {
     // If we invoked a constructor (which actually returns void), return the receiver,
     // unless we threw, in which case we return null.
-    result_tag = JDWP::JT_OBJECT;
+    DCHECK_EQ(JDWP::JT_VOID, result_tag);
     if (exceptionObjectId == 0) {
       // TODO we could keep the receiver ObjectId in the DebugInvokeReq to avoid looking into the
       // object registry.
       result_value = GetObjectRegistry()->Add(pReq->receiver.Read());
+      result_tag = TagFromObject(soa, pReq->receiver.Read());
     } else {
       result_value = 0;
+      result_tag = JDWP::JT_OBJECT;
     }
   }
 
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 234a733..415109f 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -1670,7 +1670,7 @@
     CHECK_NON_NULL_ARGUMENT_RETURN_VOID(java_string);
     ScopedObjectAccess soa(env);
     mirror::String* s = soa.Decode<mirror::String*>(java_string);
-    if (start < 0 || length < 0 || start + length > s->GetLength()) {
+    if (start < 0 || length < 0 || length > s->GetLength() - start) {
       ThrowSIOOBE(soa, start, length, s->GetLength());
     } else {
       CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf);
@@ -1684,7 +1684,7 @@
     CHECK_NON_NULL_ARGUMENT_RETURN_VOID(java_string);
     ScopedObjectAccess soa(env);
     mirror::String* s = soa.Decode<mirror::String*>(java_string);
-    if (start < 0 || length < 0 || start + length > s->GetLength()) {
+    if (start < 0 || length < 0 || length > s->GetLength() - start) {
       ThrowSIOOBE(soa, start, length, s->GetLength());
     } else {
       CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf);
@@ -2473,7 +2473,7 @@
                                                               "GetPrimitiveArrayRegion",
                                                               "get region of");
     if (array != nullptr) {
-      if (start < 0 || length < 0 || start + length > array->GetLength()) {
+      if (start < 0 || length < 0 || length > array->GetLength() - start) {
         ThrowAIOOBE(soa, array, start, length, "src");
       } else {
         CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf);
@@ -2493,7 +2493,7 @@
                                                               "SetPrimitiveArrayRegion",
                                                               "set region of");
     if (array != nullptr) {
-      if (start < 0 || length < 0 || start + length > array->GetLength()) {
+      if (start < 0 || length < 0 || length > array->GetLength() - start) {
         ThrowAIOOBE(soa, array, start, length, "dst");
       } else {
         CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf);
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index 41b368e..649df5f 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -1077,6 +1077,12 @@
   env_->set_region_fn(a, size - 1, size, nullptr); \
   ExpectException(aioobe_); \
   \
+  /* Regression test against integer overflow in range check. */ \
+  env_->get_region_fn(a, 0x7fffffff, 0x7fffffff, nullptr); \
+  ExpectException(aioobe_); \
+  env_->set_region_fn(a, 0x7fffffff, 0x7fffffff, nullptr); \
+  ExpectException(aioobe_); \
+  \
   /* It's okay for the buffer to be null as long as the length is 0. */ \
   env_->get_region_fn(a, 2, 0, nullptr); \
   /* Even if the offset is invalid... */ \
@@ -1507,6 +1513,9 @@
   ExpectException(sioobe_);
   env_->GetStringRegion(s, 10, 1, nullptr);
   ExpectException(sioobe_);
+  // Regression test against integer overflow in range check.
+  env_->GetStringRegion(s, 0x7fffffff, 0x7fffffff, nullptr);
+  ExpectException(sioobe_);
 
   jchar chars[4] = { 'x', 'x', 'x', 'x' };
   env_->GetStringRegion(s, 1, 2, &chars[1]);
@@ -1529,6 +1538,9 @@
   ExpectException(sioobe_);
   env_->GetStringUTFRegion(s, 10, 1, nullptr);
   ExpectException(sioobe_);
+  // Regression test against integer overflow in range check.
+  env_->GetStringUTFRegion(s, 0x7fffffff, 0x7fffffff, nullptr);
+  ExpectException(sioobe_);
 
   char bytes[4] = { 'x', 'x', 'x', 'x' };
   env_->GetStringUTFRegion(s, 1, 2, &bytes[1]);
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 77275f0..91e1cec 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -94,42 +94,9 @@
       }
     }
 
-    // Stash current exception.
-    StackHandleScope<1> hs(self);
-    Handle<mirror::Throwable> old_exception(hs.NewHandle(self->GetException()));
-    CHECK(old_exception.Get() != nullptr);
-    Class* eiie_class;
-    // Do't attempt to use FindClass if we have an OOM error since this can try to do more
-    // allocations and may cause infinite loops.
-    bool throw_eiie = (old_exception.Get() == nullptr);
-    if (!throw_eiie) {
-      std::string temp;
-      const char* old_exception_descriptor = old_exception->GetClass()->GetDescriptor(&temp);
-      throw_eiie = (strcmp(old_exception_descriptor, "Ljava/lang/OutOfMemoryError;") != 0);
-    }
-    if (throw_eiie) {
-      // Clear exception to call FindSystemClass.
-      self->ClearException();
-      eiie_class = Runtime::Current()->GetClassLinker()->FindSystemClass(
-          self, "Ljava/lang/ExceptionInInitializerError;");
-      CHECK(!self->IsExceptionPending());
-      // Only verification errors, not initialization problems, should set a verify error.
-      // This is to ensure that ThrowEarlierClassFailure will throw NoClassDefFoundError in that
-      // case.
-      Class* exception_class = old_exception->GetClass();
-      if (!eiie_class->IsAssignableFrom(exception_class)) {
-        // Store the exception class when this is the AoT compiler. Don't store full exceptions,
-        // as they need to be trimmed (native components are not storable in an image).
-        if (Runtime::Current()->IsAotCompiler()) {
-          h_this->SetVerifyError(exception_class);
-        } else {
-          h_this->SetVerifyError(old_exception.Get());
-        }
-      }
-    }
-
-    // Restore exception.
-    self->SetException(old_exception.Get());
+    // Remember the current exception.
+    CHECK(self->GetException() != nullptr);
+    h_this->SetVerifyError(self->GetException());
   }
   static_assert(sizeof(Status) == sizeof(uint32_t), "Size of status not equal to uint32");
   if (Runtime::Current()->IsActiveTransaction()) {
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 48dce63..68db7e3 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -1434,7 +1434,8 @@
     execv(program, &args[0]);
 
     PLOG(ERROR) << "Failed to execv(" << command_line << ")";
-    exit(1);
+    // _exit to avoid atexit handlers in child.
+    _exit(1);
   } else {
     if (pid == -1) {
       *error_msg = StringPrintf("Failed to execv(%s) because fork failed: %s",
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 2db79ab..aae0317 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -2045,6 +2045,10 @@
           } else if (reg_type.IsUninitializedTypes()) {
             Fail(VERIFY_ERROR_BAD_CLASS_SOFT) << "returning uninitialized object '"
                                               << reg_type << "'";
+          } else if (!reg_type.IsReferenceTypes()) {
+            // We really do expect a reference here.
+            Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "return-object returns a non-reference type "
+                                              << reg_type;
           } else if (!return_type.IsAssignableFrom(reg_type)) {
             if (reg_type.IsUnresolvedTypes() || return_type.IsUnresolvedTypes()) {
               Fail(VERIFY_ERROR_NO_CLASS) << " can't resolve returned type '" << return_type
diff --git a/test/008-exceptions/expected.txt b/test/008-exceptions/expected.txt
index ea59a49..083ecf7 100644
--- a/test/008-exceptions/expected.txt
+++ b/test/008-exceptions/expected.txt
@@ -9,7 +9,9 @@
 	... 2 more
 Static Init
 BadError: This is bad by convention: BadInit
+java.lang.NoClassDefFoundError: BadInit
 BadError: This is bad by convention: BadInit
 Static BadInitNoStringInit
 BadErrorNoStringInit: This is bad by convention
+java.lang.NoClassDefFoundError: BadInitNoStringInit
 BadErrorNoStringInit: This is bad by convention
diff --git a/test/008-exceptions/src/Main.java b/test/008-exceptions/src/Main.java
index c050204..9e3477a 100644
--- a/test/008-exceptions/src/Main.java
+++ b/test/008-exceptions/src/Main.java
@@ -98,8 +98,9 @@
             try {
                 BadInit.dummy = 1;
                 throw new IllegalStateException("Should not reach here.");
-            } catch (BadError e) {
+            } catch (NoClassDefFoundError e) {
                 System.out.println(e);
+                System.out.println(e.getCause());
             }
         } catch (Exception error) {
             error.printStackTrace();
@@ -120,8 +121,9 @@
             try {
                 BadInitNoStringInit.dummy = 1;
                 throw new IllegalStateException("Should not reach here.");
-            } catch (BadErrorNoStringInit e) {
+            } catch (NoClassDefFoundError e) {
                 System.out.println(e);
+                System.out.println(e.getCause());
             }
         } catch (Exception error) {
             error.printStackTrace();
diff --git a/test/546-regression-simplify-catch/expected.txt b/test/546-regression-simplify-catch/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/546-regression-simplify-catch/expected.txt
diff --git a/test/546-regression-simplify-catch/info.txt b/test/546-regression-simplify-catch/info.txt
new file mode 100644
index 0000000..b146e87
--- /dev/null
+++ b/test/546-regression-simplify-catch/info.txt
@@ -0,0 +1,2 @@
+Tests simplification of catch blocks in the presence of trivially dead code
+that was not verified by the verifier.
diff --git a/test/546-regression-simplify-catch/smali/TestCase.smali b/test/546-regression-simplify-catch/smali/TestCase.smali
new file mode 100644
index 0000000..486b3b0
--- /dev/null
+++ b/test/546-regression-simplify-catch/smali/TestCase.smali
@@ -0,0 +1,104 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+.super Ljava/lang/Object;
+
+# Test simplification of an empty, dead catch block. Compiler used to segfault
+# because it did expect at least a control-flow instruction (b/25494450).
+
+.method public static testCase_EmptyCatch()I
+    .registers 3
+
+    const v0, 0x0
+    return v0
+
+    :try_start
+    nop
+    :try_end
+    .catchall {:try_start .. :try_end} :catch
+
+    nop
+
+    :catch
+    nop
+
+.end method
+
+# Test simplification of a dead catch block with some code but no control-flow
+# instruction.
+
+.method public static testCase_NoConrolFlowCatch()I
+    .registers 3
+
+    const v0, 0x0
+    return v0
+
+    :try_start
+    nop
+    :try_end
+    .catchall {:try_start .. :try_end} :catch
+
+    nop
+
+    :catch
+    const v1, 0x3
+    add-int v0, v0, v1
+
+.end method
+
+# Test simplification of a dead catch block with normal-predecessors but
+# starting with a move-exception. Verifier does not check trivially dead code
+# and this used to trip a DCHECK (b/25492628).
+
+.method public static testCase_InvalidLoadException()I
+    .registers 3
+
+    const v0, 0x0
+    return v0
+
+    :try_start
+    nop
+    :try_end
+    .catchall {:try_start .. :try_end} :catch
+
+    :catch
+    move-exception v0
+
+.end method
+
+# Test simplification of a live catch block with dead normal-predecessors and
+# starting with a move-exception. Verifier does not check trivially dead code
+# and this used to trip a DCHECK (b/25492628).
+
+.method public static testCase_TriviallyDeadPredecessor(II)I
+    .registers 3
+
+    :try_start
+    div-int v0, p0, p1
+    return v0
+    :try_end
+    .catchall {:try_start .. :try_end} :catch
+
+    # Trivially dead predecessor block.
+    add-int p0, p0, p1
+
+    :catch
+    # This verifies because only exceptional predecessors are live.
+    move-exception v0
+    const v0, 0x0
+    return v0
+
+.end method
+
diff --git a/test/546-regression-simplify-catch/src/Main.java b/test/546-regression-simplify-catch/src/Main.java
new file mode 100644
index 0000000..8eddac3
--- /dev/null
+++ b/test/546-regression-simplify-catch/src/Main.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) {}
+
+}
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index 17c1f00..a590cf1 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -46,4 +46,5 @@
 b/23300986 (2)
 b/23502994 (if-eqz)
 b/23502994 (check-cast)
+b/25494456
 Done!
diff --git a/test/800-smali/smali/b_25494456.smali b/test/800-smali/smali/b_25494456.smali
new file mode 100644
index 0000000..0675b27
--- /dev/null
+++ b/test/800-smali/smali/b_25494456.smali
@@ -0,0 +1,14 @@
+.class public LB25494456;
+
+.super Ljava/lang/Object;
+
+# Ensure that a type mismatch (integral/float vs reference) overrides a soft failure (because of
+# an unresolvable type) in return-object.
+
+.method public static run()Lwont/be/Resolvable;
+    .registers 1
+
+    const/4 v0, 1
+    return-object v0
+
+.end method
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index f75747d..4844848 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -137,6 +137,8 @@
                 new Object[] { new Object() }, null, null));
         testCases.add(new TestCase("b/23502994 (check-cast)", "B23502994", "runCHECKCAST",
                 new Object[] { "abc" }, null, null));
+        testCases.add(new TestCase("b/25494456", "B25494456", "run", null, new VerifyError(),
+                null));
     }
 
     public void runTests() {