Merge "Revert "Revert "Fast ART x86 interpreter"""
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 6eeef3f..4589736 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -143,9 +143,7 @@
 	jni/quick/arm64/calling_convention_arm64.cc \
 	linker/arm64/relative_patcher_arm64.cc \
 	optimizing/code_generator_arm64.cc \
-	optimizing/instruction_simplifier_arm.cc \
 	optimizing/instruction_simplifier_arm64.cc \
-	optimizing/instruction_simplifier_shared.cc \
 	optimizing/intrinsics_arm64.cc \
 	utils/arm64/assembler_arm64.cc \
 	utils/arm64/managed_register_arm64.cc \
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 4617668..3766093 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -39,6 +39,7 @@
     true,   // kIntrinsicFloatCvt
     true,   // kIntrinsicReverseBits
     true,   // kIntrinsicReverseBytes
+    true,   // kIntrinsicBitCount
     true,   // kIntrinsicNumberOfLeadingZeros
     true,   // kIntrinsicNumberOfTrailingZeros
     true,   // kIntrinsicRotateRight
@@ -99,6 +100,7 @@
 static_assert(kIntrinsicIsStatic[kIntrinsicFloatCvt], "FloatCvt must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicReverseBits], "ReverseBits must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicReverseBytes], "ReverseBytes must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicBitCount], "BitCount must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicNumberOfLeadingZeros],
               "NumberOfLeadingZeros must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicNumberOfTrailingZeros],
@@ -293,6 +295,7 @@
     "putObjectVolatile",     // kNameCachePutObjectVolatile
     "putOrderedObject",      // kNameCachePutOrderedObject
     "arraycopy",             // kNameCacheArrayCopy
+    "bitCount",              // kNameCacheBitCount
     "numberOfLeadingZeros",  // kNameCacheNumberOfLeadingZeros
     "numberOfTrailingZeros",  // kNameCacheNumberOfTrailingZeros
     "rotateRight",           // kNameCacheRotateRight
@@ -447,6 +450,8 @@
     INTRINSIC(JavaLangInteger, Reverse, I_I, kIntrinsicReverseBits, k32),
     INTRINSIC(JavaLangLong, Reverse, J_J, kIntrinsicReverseBits, k64),
 
+    INTRINSIC(JavaLangInteger, BitCount, I_I, kIntrinsicBitCount, k32),
+    INTRINSIC(JavaLangLong, BitCount, J_I, kIntrinsicBitCount, k64),
     INTRINSIC(JavaLangInteger, NumberOfLeadingZeros, I_I, kIntrinsicNumberOfLeadingZeros, k32),
     INTRINSIC(JavaLangLong, NumberOfLeadingZeros, J_I, kIntrinsicNumberOfLeadingZeros, k64),
     INTRINSIC(JavaLangInteger, NumberOfTrailingZeros, I_I, kIntrinsicNumberOfTrailingZeros, k32),
@@ -745,6 +750,7 @@
                                           intrinsic.d.data & kIntrinsicFlagIsOrdered);
     case kIntrinsicSystemArrayCopyCharArray:
       return backend->GenInlinedArrayCopyCharArray(info);
+    case kIntrinsicBitCount:
     case kIntrinsicNumberOfLeadingZeros:
     case kIntrinsicNumberOfTrailingZeros:
     case kIntrinsicRotateRight:
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index ac70577..2803623 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -224,6 +224,7 @@
       kNameCachePutObjectVolatile,
       kNameCachePutOrderedObject,
       kNameCacheArrayCopy,
+      kNameCacheBitCount,
       kNameCacheNumberOfLeadingZeros,
       kNameCacheNumberOfTrailingZeros,
       kNameCacheRotateRight,
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index f265a0c..2725792 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -6449,33 +6449,6 @@
   return DeduplicateMethodLiteral(target_method, &call_patches_);
 }
 
-void LocationsBuilderARM::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
-  locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
-                     Location::RequiresRegister());
-  locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
-  locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-void InstructionCodeGeneratorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
-  LocationSummary* locations = instr->GetLocations();
-  Register res = locations->Out().AsRegister<Register>();
-  Register accumulator = locations->InAt(HMultiplyAccumulate::kInputAccumulatorIndex)
-                                      .AsRegister<Register>();
-  Register mul_left = locations->InAt(HMultiplyAccumulate::kInputMulLeftIndex)
-                                  .AsRegister<Register>();
-  Register mul_right = locations->InAt(HMultiplyAccumulate::kInputMulRightIndex)
-                                    .AsRegister<Register>();
-
-  if (instr->GetOpKind() == HInstruction::kAdd) {
-    __ mla(res, mul_left, mul_right, accumulator);
-  } else {
-    __ mls(res, mul_left, mul_right, accumulator);
-  }
-}
-
 void LocationsBuilderARM::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
   // Nothing to do, this should be removed during prepare for register allocator.
   LOG(FATAL) << "Unreachable";
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index df2126c..d45ea97 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -159,7 +159,6 @@
 
   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
   FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
-  FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
 
@@ -198,7 +197,6 @@
 
   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
   FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
-  FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
 
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 3fdd718..3d65e9c 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1952,27 +1952,21 @@
          Operand(InputOperandAt(instruction, 1)));
 }
 
-void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
+void LocationsBuilderARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
-  HInstruction* accumulator = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
-  if (instr->GetOpKind() == HInstruction::kSub &&
-      accumulator->IsConstant() &&
-      accumulator->AsConstant()->IsZero()) {
-    // Don't allocate register for Mneg instruction.
-  } else {
-    locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
-                       Location::RequiresRegister());
-  }
-  locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
-  locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
+  locations->SetInAt(HArm64MultiplyAccumulate::kInputAccumulatorIndex,
+                     Location::RequiresRegister());
+  locations->SetInAt(HArm64MultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
+  locations->SetInAt(HArm64MultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
 }
 
-void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
+void InstructionCodeGeneratorARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) {
   Register res = OutputRegister(instr);
-  Register mul_left = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex);
-  Register mul_right = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex);
+  Register accumulator = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputAccumulatorIndex);
+  Register mul_left = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulLeftIndex);
+  Register mul_right = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulRightIndex);
 
   // Avoid emitting code that could trigger Cortex A53's erratum 835769.
   // This fixup should be carried out for all multiply-accumulate instructions:
@@ -1992,18 +1986,10 @@
   }
 
   if (instr->GetOpKind() == HInstruction::kAdd) {
-    Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
     __ Madd(res, mul_left, mul_right, accumulator);
   } else {
     DCHECK(instr->GetOpKind() == HInstruction::kSub);
-    HInstruction* accum_instr = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
-    if (accum_instr->IsConstant() && accum_instr->AsConstant()->IsZero()) {
-      __ Mneg(res, mul_left, mul_right);
-    } else {
-      Register accumulator = InputRegisterAt(instr,
-                                             HMultiplyAccumulate::kInputAccumulatorIndex);
-      __ Msub(res, mul_left, mul_right, accumulator);
-    }
+    __ Msub(res, mul_left, mul_right, accumulator);
   }
 }
 
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 98303f6..8eb9fcc 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -195,7 +195,6 @@
 
   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
   FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
-  FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
 
@@ -246,7 +245,6 @@
 
   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
   FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
-  FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
 
diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc
index 91e4a99..feb8b20 100644
--- a/compiler/optimizing/dominator_test.cc
+++ b/compiler/optimizing/dominator_test.cc
@@ -133,8 +133,9 @@
 
   const uint32_t dominators[] = {
       kInvalidBlockId,
-      0,
-      kInvalidBlockId
+      3,
+      kInvalidBlockId,
+      0
   };
 
   TestCode(data1, dominators, sizeof(dominators) / sizeof(int));
diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc
index d4b9b71..d530564 100644
--- a/compiler/optimizing/graph_test.cc
+++ b/compiler/optimizing/graph_test.cc
@@ -164,7 +164,7 @@
 
   // Ensure there is only one back edge.
   ASSERT_EQ(if_block->GetPredecessors().size(), 2u);
-  ASSERT_EQ(if_block->GetPredecessors()[0], entry_block);
+  ASSERT_EQ(if_block->GetPredecessors()[0], entry_block->GetSingleSuccessor());
   ASSERT_NE(if_block->GetPredecessors()[1], if_block);
 
   // Ensure the new block is the back edge.
@@ -199,7 +199,7 @@
 
   // Ensure there is only one back edge.
   ASSERT_EQ(if_block->GetPredecessors().size(), 2u);
-  ASSERT_EQ(if_block->GetPredecessors()[0], entry_block);
+  ASSERT_EQ(if_block->GetPredecessors()[0], entry_block->GetSingleSuccessor());
   ASSERT_NE(if_block->GetPredecessors()[1], if_block);
 
   // Ensure the new block is the back edge.
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 3c88ba7..32c3a92 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -426,12 +426,6 @@
     StartAttributeStream("kind") << (try_boundary->IsEntry() ? "entry" : "exit");
   }
 
-#if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64)
-  void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) OVERRIDE {
-    StartAttributeStream("kind") << instruction->GetOpKind();
-  }
-#endif
-
 #ifdef ART_ENABLE_CODEGEN_arm64
   void VisitArm64DataProcWithShifterOp(HArm64DataProcWithShifterOp* instruction) OVERRIDE {
     StartAttributeStream("kind") << instruction->GetInstrKind() << "+" << instruction->GetOpKind();
@@ -439,6 +433,10 @@
       StartAttributeStream("shift") << instruction->GetShiftAmount();
     }
   }
+
+  void VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instruction) OVERRIDE {
+    StartAttributeStream("kind") << instruction->GetOpKind();
+  }
 #endif
 
   bool IsPass(const char* name) {
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 20c4f1f..2e79df1 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -419,7 +419,10 @@
   size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits();
   if (code_item->insns_size_in_code_units_ > inline_max_code_units) {
     VLOG(compiler) << "Method " << PrettyMethod(method)
-                   << " is too big to inline";
+                   << " is too big to inline: "
+                   << code_item->insns_size_in_code_units_
+                   << " > "
+                   << inline_max_code_units;
     return false;
   }
 
@@ -639,9 +642,12 @@
 
   for (; !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
-    if (block->IsLoopHeader()) {
+
+    if (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible()) {
+      // Don't inline methods with irreducible loops, they could prevent some
+      // optimizations to run.
       VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
-                     << " could not be inlined because it contains a loop";
+                     << " could not be inlined because it contains an irreducible loop";
       return false;
     }
 
diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc
deleted file mode 100644
index db1f9a7..0000000
--- a/compiler/optimizing/instruction_simplifier_arm.cc
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "instruction_simplifier_arm.h"
-#include "instruction_simplifier_shared.h"
-
-namespace art {
-namespace arm {
-
-void InstructionSimplifierArmVisitor::VisitMul(HMul* instruction) {
-  if (TryCombineMultiplyAccumulate(instruction, kArm)) {
-    RecordSimplification();
-  }
-}
-
-}  // namespace arm
-}  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h
deleted file mode 100644
index 379b95d..0000000
--- a/compiler/optimizing/instruction_simplifier_arm.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM_H_
-#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM_H_
-
-#include "nodes.h"
-#include "optimization.h"
-
-namespace art {
-namespace arm {
-
-class InstructionSimplifierArmVisitor : public HGraphVisitor {
- public:
-  InstructionSimplifierArmVisitor(HGraph* graph, OptimizingCompilerStats* stats)
-      : HGraphVisitor(graph), stats_(stats) {}
-
- private:
-  void RecordSimplification() {
-    if (stats_ != nullptr) {
-      stats_->RecordStat(kInstructionSimplificationsArch);
-    }
-  }
-
-  void VisitMul(HMul* instruction) OVERRIDE;
-
-  OptimizingCompilerStats* stats_;
-};
-
-
-class InstructionSimplifierArm : public HOptimization {
- public:
-  InstructionSimplifierArm(HGraph* graph, OptimizingCompilerStats* stats)
-    : HOptimization(graph, "instruction_simplifier_arm", stats) {}
-
-  void Run() OVERRIDE {
-    InstructionSimplifierArmVisitor visitor(graph_, stats_);
-    visitor.VisitReversePostOrder();
-  }
-};
-
-}  // namespace arm
-}  // namespace art
-
-#endif  // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM_H_
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index 83126a5..4bcfc54 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -17,7 +17,6 @@
 #include "instruction_simplifier_arm64.h"
 
 #include "common_arm64.h"
-#include "instruction_simplifier_shared.h"
 #include "mirror/array-inl.h"
 
 namespace art {
@@ -180,6 +179,67 @@
   return true;
 }
 
+bool InstructionSimplifierArm64Visitor::TrySimpleMultiplyAccumulatePatterns(
+    HMul* mul, HBinaryOperation* input_binop, HInstruction* input_other) {
+  DCHECK(Primitive::IsIntOrLongType(mul->GetType()));
+  DCHECK(input_binop->IsAdd() || input_binop->IsSub());
+  DCHECK_NE(input_binop, input_other);
+  if (!input_binop->HasOnlyOneNonEnvironmentUse()) {
+    return false;
+  }
+
+  // Try to interpret patterns like
+  //    a * (b <+/-> 1)
+  // as
+  //    (a * b) <+/-> a
+  HInstruction* input_a = input_other;
+  HInstruction* input_b = nullptr;  // Set to a non-null value if we found a pattern to optimize.
+  HInstruction::InstructionKind op_kind;
+
+  if (input_binop->IsAdd()) {
+    if ((input_binop->GetConstantRight() != nullptr) && input_binop->GetConstantRight()->IsOne()) {
+      // Interpret
+      //    a * (b + 1)
+      // as
+      //    (a * b) + a
+      input_b = input_binop->GetLeastConstantLeft();
+      op_kind = HInstruction::kAdd;
+    }
+  } else {
+    DCHECK(input_binop->IsSub());
+    if (input_binop->GetRight()->IsConstant() &&
+        input_binop->GetRight()->AsConstant()->IsMinusOne()) {
+      // Interpret
+      //    a * (b - (-1))
+      // as
+      //    a + (a * b)
+      input_b = input_binop->GetLeft();
+      op_kind = HInstruction::kAdd;
+    } else if (input_binop->GetLeft()->IsConstant() &&
+               input_binop->GetLeft()->AsConstant()->IsOne()) {
+      // Interpret
+      //    a * (1 - b)
+      // as
+      //    a - (a * b)
+      input_b = input_binop->GetRight();
+      op_kind = HInstruction::kSub;
+    }
+  }
+
+  if (input_b == nullptr) {
+    // We did not find a pattern we can optimize.
+    return false;
+  }
+
+  HArm64MultiplyAccumulate* mulacc = new(GetGraph()->GetArena()) HArm64MultiplyAccumulate(
+      mul->GetType(), op_kind, input_a, input_a, input_b, mul->GetDexPc());
+
+  mul->GetBlock()->ReplaceAndRemoveInstructionWith(mul, mulacc);
+  input_binop->GetBlock()->RemoveInstruction(input_binop);
+
+  return false;
+}
+
 void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) {
   TryExtractArrayAccessAddress(instruction,
                                instruction->GetArray(),
@@ -195,8 +255,75 @@
 }
 
 void InstructionSimplifierArm64Visitor::VisitMul(HMul* instruction) {
-  if (TryCombineMultiplyAccumulate(instruction, kArm64)) {
-    RecordSimplification();
+  Primitive::Type type = instruction->GetType();
+  if (!Primitive::IsIntOrLongType(type)) {
+    return;
+  }
+
+  HInstruction* use = instruction->HasNonEnvironmentUses()
+      ? instruction->GetUses().GetFirst()->GetUser()
+      : nullptr;
+
+  if (instruction->HasOnlyOneNonEnvironmentUse() && (use->IsAdd() || use->IsSub())) {
+    // Replace code looking like
+    //    MUL tmp, x, y
+    //    SUB dst, acc, tmp
+    // with
+    //    MULSUB dst, acc, x, y
+    // Note that we do not want to (unconditionally) perform the merge when the
+    // multiplication has multiple uses and it can be merged in all of them.
+    // Multiple uses could happen on the same control-flow path, and we would
+    // then increase the amount of work. In the future we could try to evaluate
+    // whether all uses are on different control-flow paths (using dominance and
+    // reverse-dominance information) and only perform the merge when they are.
+    HInstruction* accumulator = nullptr;
+    HBinaryOperation* binop = use->AsBinaryOperation();
+    HInstruction* binop_left = binop->GetLeft();
+    HInstruction* binop_right = binop->GetRight();
+    // Be careful after GVN. This should not happen since the `HMul` has only
+    // one use.
+    DCHECK_NE(binop_left, binop_right);
+    if (binop_right == instruction) {
+      accumulator = binop_left;
+    } else if (use->IsAdd()) {
+      DCHECK_EQ(binop_left, instruction);
+      accumulator = binop_right;
+    }
+
+    if (accumulator != nullptr) {
+      HArm64MultiplyAccumulate* mulacc =
+          new (GetGraph()->GetArena()) HArm64MultiplyAccumulate(type,
+                                                                binop->GetKind(),
+                                                                accumulator,
+                                                                instruction->GetLeft(),
+                                                                instruction->GetRight());
+
+      binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc);
+      DCHECK(!instruction->HasUses());
+      instruction->GetBlock()->RemoveInstruction(instruction);
+      RecordSimplification();
+      return;
+    }
+  }
+
+  // Use multiply accumulate instruction for a few simple patterns.
+  // We prefer not applying the following transformations if the left and
+  // right inputs perform the same operation.
+  // We rely on GVN having squashed the inputs if appropriate. However the
+  // results are still correct even if that did not happen.
+  if (instruction->GetLeft() == instruction->GetRight()) {
+    return;
+  }
+
+  HInstruction* left = instruction->GetLeft();
+  HInstruction* right = instruction->GetRight();
+  if ((right->IsAdd() || right->IsSub()) &&
+      TrySimpleMultiplyAccumulatePatterns(instruction, right->AsBinaryOperation(), left)) {
+    return;
+  }
+  if ((left->IsAdd() || left->IsSub()) &&
+      TrySimpleMultiplyAccumulatePatterns(instruction, left->AsBinaryOperation(), right)) {
+    return;
   }
 }
 
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index 37a34c0..b7f490b 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -51,6 +51,10 @@
     return TryMergeIntoShifterOperand(use, bitfield_op, true);
   }
 
+  bool TrySimpleMultiplyAccumulatePatterns(HMul* mul,
+                                           HBinaryOperation* input_binop,
+                                           HInstruction* input_other);
+
   // HInstruction visitors, sorted alphabetically.
   void VisitArrayGet(HArrayGet* instruction) OVERRIDE;
   void VisitArraySet(HArraySet* instruction) OVERRIDE;
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
deleted file mode 100644
index 45d196f..0000000
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ /dev/null
@@ -1,189 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "instruction_simplifier_shared.h"
-
-namespace art {
-
-namespace {
-
-bool TrySimpleMultiplyAccumulatePatterns(HMul* mul,
-                                         HBinaryOperation* input_binop,
-                                         HInstruction* input_other) {
-  DCHECK(Primitive::IsIntOrLongType(mul->GetType()));
-  DCHECK(input_binop->IsAdd() || input_binop->IsSub());
-  DCHECK_NE(input_binop, input_other);
-  if (!input_binop->HasOnlyOneNonEnvironmentUse()) {
-    return false;
-  }
-
-  // Try to interpret patterns like
-  //    a * (b <+/-> 1)
-  // as
-  //    (a * b) <+/-> a
-  HInstruction* input_a = input_other;
-  HInstruction* input_b = nullptr;  // Set to a non-null value if we found a pattern to optimize.
-  HInstruction::InstructionKind op_kind;
-
-  if (input_binop->IsAdd()) {
-    if ((input_binop->GetConstantRight() != nullptr) && input_binop->GetConstantRight()->IsOne()) {
-      // Interpret
-      //    a * (b + 1)
-      // as
-      //    (a * b) + a
-      input_b = input_binop->GetLeastConstantLeft();
-      op_kind = HInstruction::kAdd;
-    }
-  } else {
-    DCHECK(input_binop->IsSub());
-    if (input_binop->GetRight()->IsConstant() &&
-        input_binop->GetRight()->AsConstant()->IsMinusOne()) {
-      // Interpret
-      //    a * (b - (-1))
-      // as
-      //    a + (a * b)
-      input_b = input_binop->GetLeft();
-      op_kind = HInstruction::kAdd;
-    } else if (input_binop->GetLeft()->IsConstant() &&
-               input_binop->GetLeft()->AsConstant()->IsOne()) {
-      // Interpret
-      //    a * (1 - b)
-      // as
-      //    a - (a * b)
-      input_b = input_binop->GetRight();
-      op_kind = HInstruction::kSub;
-    }
-  }
-
-  if (input_b == nullptr) {
-    // We did not find a pattern we can optimize.
-    return false;
-  }
-
-  ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena();
-  HMultiplyAccumulate* mulacc = new(arena) HMultiplyAccumulate(
-      mul->GetType(), op_kind, input_a, input_a, input_b, mul->GetDexPc());
-
-  mul->GetBlock()->ReplaceAndRemoveInstructionWith(mul, mulacc);
-  input_binop->GetBlock()->RemoveInstruction(input_binop);
-
-  return true;
-}
-
-}  // namespace
-
-bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa) {
-  Primitive::Type type = mul->GetType();
-  switch (isa) {
-    case kArm:
-    case kThumb2:
-      if (type != Primitive::kPrimInt) {
-        return false;
-      }
-      break;
-    case kArm64:
-      if (!Primitive::IsIntOrLongType(type)) {
-        return false;
-      }
-      break;
-    default:
-      return false;
-  }
-
-  HInstruction* use = mul->HasNonEnvironmentUses()
-      ? mul->GetUses().GetFirst()->GetUser()
-      : nullptr;
-
-  ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena();
-
-  if (mul->HasOnlyOneNonEnvironmentUse()) {
-    if (use->IsAdd() || use->IsSub()) {
-      // Replace code looking like
-      //    MUL tmp, x, y
-      //    SUB dst, acc, tmp
-      // with
-      //    MULSUB dst, acc, x, y
-      // Note that we do not want to (unconditionally) perform the merge when the
-      // multiplication has multiple uses and it can be merged in all of them.
-      // Multiple uses could happen on the same control-flow path, and we would
-      // then increase the amount of work. In the future we could try to evaluate
-      // whether all uses are on different control-flow paths (using dominance and
-      // reverse-dominance information) and only perform the merge when they are.
-      HInstruction* accumulator = nullptr;
-      HBinaryOperation* binop = use->AsBinaryOperation();
-      HInstruction* binop_left = binop->GetLeft();
-      HInstruction* binop_right = binop->GetRight();
-      // Be careful after GVN. This should not happen since the `HMul` has only
-      // one use.
-      DCHECK_NE(binop_left, binop_right);
-      if (binop_right == mul) {
-        accumulator = binop_left;
-      } else if (use->IsAdd()) {
-        DCHECK_EQ(binop_left, mul);
-        accumulator = binop_right;
-      }
-
-      if (accumulator != nullptr) {
-        HMultiplyAccumulate* mulacc =
-            new (arena) HMultiplyAccumulate(type,
-                                            binop->GetKind(),
-                                            accumulator,
-                                            mul->GetLeft(),
-                                            mul->GetRight());
-
-        binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc);
-        DCHECK(!mul->HasUses());
-        mul->GetBlock()->RemoveInstruction(mul);
-        return true;
-      }
-    } else if (use->IsNeg() && isa != kArm) {
-      HMultiplyAccumulate* mulacc =
-          new (arena) HMultiplyAccumulate(type,
-                                          HInstruction::kSub,
-                                          mul->GetBlock()->GetGraph()->GetConstant(type, 0),
-                                          mul->GetLeft(),
-                                          mul->GetRight());
-
-      use->GetBlock()->ReplaceAndRemoveInstructionWith(use, mulacc);
-      DCHECK(!mul->HasUses());
-      mul->GetBlock()->RemoveInstruction(mul);
-      return true;
-    }
-  }
-
-  // Use multiply accumulate instruction for a few simple patterns.
-  // We prefer not applying the following transformations if the left and
-  // right inputs perform the same operation.
-  // We rely on GVN having squashed the inputs if appropriate. However the
-  // results are still correct even if that did not happen.
-  if (mul->GetLeft() == mul->GetRight()) {
-    return false;
-  }
-
-  HInstruction* left = mul->GetLeft();
-  HInstruction* right = mul->GetRight();
-  if ((right->IsAdd() || right->IsSub()) &&
-      TrySimpleMultiplyAccumulatePatterns(mul, right->AsBinaryOperation(), left)) {
-    return true;
-  }
-  if ((left->IsAdd() || left->IsSub()) &&
-      TrySimpleMultiplyAccumulatePatterns(mul, left->AsBinaryOperation(), right)) {
-    return true;
-  }
-  return false;
-}
-
-}  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h
deleted file mode 100644
index 9832ecc..0000000
--- a/compiler/optimizing/instruction_simplifier_shared.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_
-#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_
-
-#include "nodes.h"
-
-namespace art {
-
-bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa);
-
-}  // namespace art
-
-#endif  // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index c6da9a3..5caf077 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -176,6 +176,16 @@
       }
 
     // Misc data processing.
+    case kIntrinsicBitCount:
+      switch (GetType(method.d.data, true)) {
+        case Primitive::kPrimInt:
+          return Intrinsics::kIntegerBitCount;
+        case Primitive::kPrimLong:
+          return Intrinsics::kLongBitCount;
+        default:
+          LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+          UNREACHABLE();
+      }
     case kIntrinsicNumberOfLeadingZeros:
       switch (GetType(method.d.data, true)) {
         case Primitive::kPrimInt:
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index b1fbf28..e72f927 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1577,10 +1577,12 @@
 void IntrinsicCodeGeneratorARM::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
+UNIMPLEMENTED_INTRINSIC(IntegerBitCount)
 UNIMPLEMENTED_INTRINSIC(IntegerReverse)
 UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes)
 UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
 UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
+UNIMPLEMENTED_INTRINSIC(LongBitCount)
 UNIMPLEMENTED_INTRINSIC(LongReverse)
 UNIMPLEMENTED_INTRINSIC(LongReverseBytes)
 UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 81cab86..c5688a3 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1447,8 +1447,10 @@
 void IntrinsicCodeGeneratorARM64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
+UNIMPLEMENTED_INTRINSIC(IntegerBitCount)
 UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
 UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
+UNIMPLEMENTED_INTRINSIC(LongBitCount)
 UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
 UNIMPLEMENTED_INTRINSIC(LongRotateRight)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h
index 2e87546..ea38034 100644
--- a/compiler/optimizing/intrinsics_list.h
+++ b/compiler/optimizing/intrinsics_list.h
@@ -28,12 +28,14 @@
   V(FloatIntBitsToFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(IntegerReverse, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(IntegerReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(IntegerBitCount, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(IntegerNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(IntegerNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(IntegerRotateRight, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(IntegerRotateLeft, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(LongReverse, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(LongReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(LongBitCount, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(LongNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(LongNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
   V(LongRotateRight, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index bc126a2..81112b1 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -935,6 +935,9 @@
 void IntrinsicCodeGeneratorMIPS::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
+UNIMPLEMENTED_INTRINSIC(IntegerBitCount)
+UNIMPLEMENTED_INTRINSIC(LongBitCount)
+
 UNIMPLEMENTED_INTRINSIC(MathAbsDouble)
 UNIMPLEMENTED_INTRINSIC(MathAbsFloat)
 UNIMPLEMENTED_INTRINSIC(MathAbsInt)
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 8b45ea7..ac969e3 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1724,6 +1724,9 @@
 void IntrinsicCodeGeneratorMIPS64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
+UNIMPLEMENTED_INTRINSIC(IntegerBitCount)
+UNIMPLEMENTED_INTRINSIC(LongBitCount)
+
 UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
 UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
 
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 677f2e9..e48bed5 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -2303,6 +2303,81 @@
   SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
 }
 
+static void CreateBitCountLocations(
+    ArenaAllocator* arena, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) {
+  if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
+    // Do nothing if there is no popcnt support. This results in generating
+    // a call for the intrinsic rather than direct code.
+    return;
+  }
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  if (is_long) {
+    locations->SetInAt(0, Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());
+  } else {
+    locations->SetInAt(0, Location::Any());
+  }
+  locations->SetOut(Location::RequiresRegister());
+}
+
+static void GenBitCount(X86Assembler* assembler, HInvoke* invoke, bool is_long) {
+  LocationSummary* locations = invoke->GetLocations();
+  Location src = locations->InAt(0);
+  Register out = locations->Out().AsRegister<Register>();
+
+  if (invoke->InputAt(0)->IsConstant()) {
+    // Evaluate this at compile time.
+    int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
+    value = is_long
+        ? POPCOUNT(static_cast<uint64_t>(value))
+        : POPCOUNT(static_cast<uint32_t>(value));
+    if (value == 0) {
+      __ xorl(out, out);
+    } else {
+      __ movl(out, Immediate(value));
+    }
+    return;
+  }
+
+  // Handle the non-constant cases.
+  if (!is_long) {
+    if (src.IsRegister()) {
+      __ popcntl(out, src.AsRegister<Register>());
+    } else {
+      DCHECK(src.IsStackSlot());
+      __ popcntl(out, Address(ESP, src.GetStackIndex()));
+    }
+    return;
+  }
+
+  // The 64-bit case needs to worry about both parts of the register.
+  DCHECK(src.IsRegisterPair());
+  Register src_lo = src.AsRegisterPairLow<Register>();
+  Register src_hi = src.AsRegisterPairHigh<Register>();
+  Register temp = locations->GetTemp(0).AsRegister<Register>();
+  __ popcntl(temp, src_lo);
+  __ popcntl(out, src_hi);
+  __ addl(out, temp);
+}
+
+void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) {
+  CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ false);
+}
+
+void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) {
+  GenBitCount(GetAssembler(), invoke, /* is_long */ false);
+}
+
+void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
+  CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ true);
+}
+
+void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) {
+  GenBitCount(GetAssembler(), invoke, /* is_long */ true);
+}
+
 static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
                                                            LocationSummary::kNoCall,
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 690cf3d..23a628f 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -2368,6 +2368,70 @@
   SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
 }
 
+static void CreateBitCountLocations(
+    ArenaAllocator* arena, CodeGeneratorX86_64* codegen, HInvoke* invoke) {
+  if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
+    // Do nothing if there is no popcnt support. This results in generating
+    // a call for the intrinsic rather than direct code.
+    return;
+  }
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::Any());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+static void GenBitCount(X86_64Assembler* assembler, HInvoke* invoke, bool is_long) {
+  LocationSummary* locations = invoke->GetLocations();
+  Location src = locations->InAt(0);
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+  if (invoke->InputAt(0)->IsConstant()) {
+    // Evaluate this at compile time.
+    int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
+    value = is_long
+        ? POPCOUNT(static_cast<uint64_t>(value))
+        : POPCOUNT(static_cast<uint32_t>(value));
+    if (value == 0) {
+      __ xorl(out, out);
+    } else {
+      __ movl(out, Immediate(value));
+    }
+    return;
+  }
+
+  if (src.IsRegister()) {
+    if (is_long) {
+      __ popcntq(out, src.AsRegister<CpuRegister>());
+    } else {
+      __ popcntl(out, src.AsRegister<CpuRegister>());
+    }
+  } else if (is_long) {
+    DCHECK(src.IsDoubleStackSlot());
+    __ popcntq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
+  } else {
+    DCHECK(src.IsStackSlot());
+    __ popcntl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
+  }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitIntegerBitCount(HInvoke* invoke) {
+  CreateBitCountLocations(arena_, codegen_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) {
+  GenBitCount(GetAssembler(), invoke, /* is_long */ false);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) {
+  CreateBitCountLocations(arena_, codegen_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) {
+  GenBitCount(GetAssembler(), invoke, /* is_long */ true);
+}
+
 static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
                                                            LocationSummary::kNoCall,
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 2eabadf..adf8734 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -288,9 +288,10 @@
 
   // Make sure the loop has only one pre header. This simplifies SSA building by having
   // to just look at the pre header to know which locals are initialized at entry of the
-  // loop.
+  // loop. Also, don't allow the entry block to be a pre header: this simplifies inlining
+  // this graph.
   size_t number_of_incomings = header->GetPredecessors().size() - info->NumberOfBackEdges();
-  if (number_of_incomings != 1) {
+  if (number_of_incomings != 1 || (GetEntryBlock()->GetSingleSuccessor() == header)) {
     HBasicBlock* pre_header = new (arena_) HBasicBlock(this, header->GetDexPc());
     AddBlock(pre_header);
     pre_header->AddInstruction(new (arena_) HGoto(header->GetDexPc()));
@@ -1837,6 +1838,7 @@
     DCHECK(GetBlocks()[0]->IsEntryBlock());
     DCHECK(GetBlocks()[2]->IsExitBlock());
     DCHECK(!body->IsExitBlock());
+    DCHECK(!body->IsInLoop());
     HInstruction* last = body->GetLastInstruction();
 
     invoke->GetBlock()->instructions_.AddAfter(invoke, body->GetInstructions());
@@ -1895,7 +1897,7 @@
     // Update the meta information surrounding blocks:
     // (1) the graph they are now in,
     // (2) the reverse post order of that graph,
-    // (3) the potential loop information they are now in,
+    // (3) their potential loop information, inner and outer,
     // (4) try block membership.
     // Note that we do not need to update catch phi inputs because they
     // correspond to the register file of the outer method which the inlinee
@@ -1924,15 +1926,24 @@
     for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
       HBasicBlock* current = it.Current();
       if (current != exit_block_ && current != entry_block_ && current != first) {
-        DCHECK(!current->IsInLoop());
         DCHECK(current->GetTryCatchInformation() == nullptr);
         DCHECK(current->GetGraph() == this);
         current->SetGraph(outer_graph);
         outer_graph->AddBlock(current);
         outer_graph->reverse_post_order_[++index_of_at] = current;
-        if (loop_info != nullptr) {
+        if (!current->IsInLoop()) {
           current->SetLoopInformation(loop_info);
-          for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) {
+        } else if (current->IsLoopHeader()) {
+          // Clear the information of which blocks are contained in that loop. Since the
+          // information is stored as a bit vector based on block ids, we have to update
+          // it, as those block ids were specific to the callee graph and we are now adding
+          // these blocks to the caller graph.
+          current->GetLoopInformation()->ClearAllBlocks();
+        }
+        if (current->IsInLoop()) {
+          for (HLoopInformationOutwardIterator loop_it(*current);
+               !loop_it.Done();
+               loop_it.Advance()) {
             loop_it.Current()->Add(current);
           }
         }
@@ -1945,7 +1956,9 @@
     outer_graph->AddBlock(to);
     outer_graph->reverse_post_order_[++index_of_at] = to;
     if (loop_info != nullptr) {
-      to->SetLoopInformation(loop_info);
+      if (!to->IsInLoop()) {
+        to->SetLoopInformation(loop_info);
+      }
       for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) {
         loop_it.Current()->Add(to);
       }
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 57fa558..5246fd1 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -689,6 +689,10 @@
   void Add(HBasicBlock* block);
   void Remove(HBasicBlock* block);
 
+  void ClearAllBlocks() {
+    blocks_.ClearAllBits();
+  }
+
  private:
   // Internal recursive implementation of `Populate`.
   void PopulateRecursive(HBasicBlock* block);
@@ -1226,16 +1230,6 @@
   M(UShr, BinaryOperation)                                              \
   M(Xor, BinaryOperation)                                               \
 
-/*
- * Instructions, shared across several (not all) architectures.
- */
-#if !defined(ART_ENABLE_CODEGEN_arm) && !defined(ART_ENABLE_CODEGEN_arm64)
-#define FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M)
-#else
-#define FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M)                         \
-  M(MultiplyAccumulate, Instruction)
-#endif
-
 #ifndef ART_ENABLE_CODEGEN_arm
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M)
 #else
@@ -1248,7 +1242,8 @@
 #else
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)                          \
   M(Arm64DataProcWithShifterOp, Instruction)                            \
-  M(Arm64IntermediateAddress, Instruction)
+  M(Arm64IntermediateAddress, Instruction)                              \
+  M(Arm64MultiplyAccumulate, Instruction)
 #endif
 
 #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M)
@@ -1268,7 +1263,6 @@
 
 #define FOR_EACH_CONCRETE_INSTRUCTION(M)                                \
   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M)                               \
-  FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M)                               \
   FOR_EACH_CONCRETE_INSTRUCTION_ARM(M)                                  \
   FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)                                \
   FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M)                                 \
@@ -5728,9 +5722,6 @@
 
 }  // namespace art
 
-#if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64)
-#include "nodes_shared.h"
-#endif
 #ifdef ART_ENABLE_CODEGEN_arm
 #include "nodes_arm.h"
 #endif
diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h
index 173852a..445cdab 100644
--- a/compiler/optimizing/nodes_arm64.h
+++ b/compiler/optimizing/nodes_arm64.h
@@ -118,6 +118,40 @@
   DISALLOW_COPY_AND_ASSIGN(HArm64IntermediateAddress);
 };
 
+class HArm64MultiplyAccumulate : public HExpression<3> {
+ public:
+  HArm64MultiplyAccumulate(Primitive::Type type,
+                           InstructionKind op,
+                           HInstruction* accumulator,
+                           HInstruction* mul_left,
+                           HInstruction* mul_right,
+                           uint32_t dex_pc = kNoDexPc)
+      : HExpression(type, SideEffects::None(), dex_pc), op_kind_(op) {
+    SetRawInputAt(kInputAccumulatorIndex, accumulator);
+    SetRawInputAt(kInputMulLeftIndex, mul_left);
+    SetRawInputAt(kInputMulRightIndex, mul_right);
+  }
+
+  static constexpr int kInputAccumulatorIndex = 0;
+  static constexpr int kInputMulLeftIndex = 1;
+  static constexpr int kInputMulRightIndex = 2;
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+    return op_kind_ == other->AsArm64MultiplyAccumulate()->op_kind_;
+  }
+
+  InstructionKind GetOpKind() const { return op_kind_; }
+
+  DECLARE_INSTRUCTION(Arm64MultiplyAccumulate);
+
+ private:
+  // Indicates if this is a MADD or MSUB.
+  InstructionKind op_kind_;
+
+  DISALLOW_COPY_AND_ASSIGN(HArm64MultiplyAccumulate);
+};
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h
deleted file mode 100644
index b04b622..0000000
--- a/compiler/optimizing/nodes_shared.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_OPTIMIZING_NODES_SHARED_H_
-#define ART_COMPILER_OPTIMIZING_NODES_SHARED_H_
-
-namespace art {
-
-class HMultiplyAccumulate : public HExpression<3> {
- public:
-  HMultiplyAccumulate(Primitive::Type type,
-                      InstructionKind op,
-                      HInstruction* accumulator,
-                      HInstruction* mul_left,
-                      HInstruction* mul_right,
-                      uint32_t dex_pc = kNoDexPc)
-      : HExpression(type, SideEffects::None(), dex_pc), op_kind_(op) {
-    SetRawInputAt(kInputAccumulatorIndex, accumulator);
-    SetRawInputAt(kInputMulLeftIndex, mul_left);
-    SetRawInputAt(kInputMulRightIndex, mul_right);
-  }
-
-  static constexpr int kInputAccumulatorIndex = 0;
-  static constexpr int kInputMulLeftIndex = 1;
-  static constexpr int kInputMulRightIndex = 2;
-
-  bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    return op_kind_ == other->AsMultiplyAccumulate()->op_kind_;
-  }
-
-  InstructionKind GetOpKind() const { return op_kind_; }
-
-  DECLARE_INSTRUCTION(MultiplyAccumulate);
-
- private:
-  // Indicates if this is a MADD or MSUB.
-  const InstructionKind op_kind_;
-
-  DISALLOW_COPY_AND_ASSIGN(HMultiplyAccumulate);
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_OPTIMIZING_NODES_SHARED_H_
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 4da48bd..fffd005 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -62,7 +62,6 @@
 #include "induction_var_analysis.h"
 #include "inliner.h"
 #include "instruction_simplifier.h"
-#include "instruction_simplifier_arm.h"
 #include "intrinsics.h"
 #include "jit/debugger_interface.h"
 #include "jit/jit_code_cache.h"
@@ -446,11 +445,8 @@
     case kThumb2:
     case kArm: {
       arm::DexCacheArrayFixups* fixups = new (arena) arm::DexCacheArrayFixups(graph, stats);
-      arm::InstructionSimplifierArm* simplifier =
-          new (arena) arm::InstructionSimplifierArm(graph, stats);
       HOptimization* arm_optimizations[] = {
-        fixups,
-        simplifier
+        fixups
       };
       RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer);
       break;
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index a966b62..d77639d 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -1734,6 +1734,12 @@
   }
 }
 
+static bool IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(
+    HInstruction* instruction) {
+  return instruction->GetBlock()->GetGraph()->HasIrreducibleLoops() &&
+         (instruction->IsConstant() || instruction->IsCurrentMethod());
+}
+
 void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval,
                                              HBasicBlock* from,
                                              HBasicBlock* to) const {
@@ -1750,7 +1756,19 @@
     // Interval was not split.
     return;
   }
-  DCHECK(destination != nullptr && source != nullptr);
+
+  LiveInterval* parent = interval->GetParent();
+  HInstruction* defined_by = parent->GetDefinedBy();
+  if (destination == nullptr) {
+    // Our live_in fixed point calculation has found that the instruction is live
+    // in the `to` block because it will eventually enter an irreducible loop. Our
+    // live interval computation however does not compute a fixed point, and
+    // therefore will not have a location for that instruction for `to`.
+    // Because the instruction is a constant or the ArtMethod, we don't need to
+    // do anything: it will be materialized in the irreducible loop.
+    DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by));
+    return;
+  }
 
   if (!destination->HasRegister()) {
     // Values are eagerly spilled. Spill slot already contains appropriate value.
@@ -1761,13 +1779,13 @@
   // we need to put the moves at the entry of `to`.
   if (from->GetNormalSuccessors().size() == 1) {
     InsertParallelMoveAtExitOf(from,
-                               interval->GetParent()->GetDefinedBy(),
+                               defined_by,
                                source->ToLocation(),
                                destination->ToLocation());
   } else {
     DCHECK_EQ(to->GetPredecessors().size(), 1u);
     InsertParallelMoveAtEntryOf(to,
-                                interval->GetParent()->GetDefinedBy(),
+                                defined_by,
                                 source->ToLocation(),
                                 destination->ToLocation());
   }
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index d6caa3c..7138a46 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -186,6 +186,22 @@
   EmitOperand(dst, src);
 }
 
+void X86Assembler::popcntl(Register dst, Register src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitUint8(0x0F);
+  EmitUint8(0xB8);
+  EmitRegisterOperand(dst, src);
+}
+
+void X86Assembler::popcntl(Register dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitUint8(0x0F);
+  EmitUint8(0xB8);
+  EmitOperand(dst, src);
+}
+
 void X86Assembler::movzxb(Register dst, ByteRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x0F);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 655af9c..759a41e 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -330,11 +330,15 @@
   void movntl(const Address& dst, Register src);
 
   void bswapl(Register dst);
+
   void bsfl(Register dst, Register src);
   void bsfl(Register dst, const Address& src);
   void bsrl(Register dst, Register src);
   void bsrl(Register dst, const Address& src);
 
+  void popcntl(Register dst, Register src);
+  void popcntl(Register dst, const Address& src);
+
   void rorl(Register reg, const Immediate& imm);
   void rorl(Register operand, Register shifter);
   void roll(Register reg, const Immediate& imm);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index a9b991c..0fd0982 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -260,6 +260,19 @@
   DriverStr(expected, "bsrl_address");
 }
 
+TEST_F(AssemblerX86Test, Popcntl) {
+  DriverStr(RepeatRR(&x86::X86Assembler::popcntl, "popcntl %{reg2}, %{reg1}"), "popcntl");
+}
+
+TEST_F(AssemblerX86Test, PopcntlAddress) {
+  GetAssembler()->popcntl(x86::Register(x86::EDI), x86::Address(
+      x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12));
+  const char* expected =
+    "popcntl 0xc(%EDI,%EBX,4), %EDI\n";
+
+  DriverStr(expected, "popcntl_address");
+}
+
 // Rorl only allows CL as the shift count.
 std::string rorl_fn(AssemblerX86Test::Base* assembler_test, x86::X86Assembler* assembler) {
   std::ostringstream str;
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index db07267..10f5a00 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -2247,6 +2247,42 @@
   EmitOperand(dst.LowBits(), src);
 }
 
+void X86_64Assembler::popcntl(CpuRegister dst, CpuRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xB8);
+  EmitRegisterOperand(dst.LowBits(), src.LowBits());
+}
+
+void X86_64Assembler::popcntl(CpuRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xB8);
+  EmitOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::popcntq(CpuRegister dst, CpuRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitRex64(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xB8);
+  EmitRegisterOperand(dst.LowBits(), src.LowBits());
+}
+
+void X86_64Assembler::popcntq(CpuRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitRex64(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xB8);
+  EmitOperand(dst.LowBits(), src);
+}
+
 void X86_64Assembler::repne_scasw() {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 01d28e3..6f0847e 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -647,6 +647,11 @@
   void bsrq(CpuRegister dst, CpuRegister src);
   void bsrq(CpuRegister dst, const Address& src);
 
+  void popcntl(CpuRegister dst, CpuRegister src);
+  void popcntl(CpuRegister dst, const Address& src);
+  void popcntq(CpuRegister dst, CpuRegister src);
+  void popcntq(CpuRegister dst, const Address& src);
+
   void rorl(CpuRegister reg, const Immediate& imm);
   void rorl(CpuRegister operand, CpuRegister shifter);
   void roll(CpuRegister reg, const Immediate& imm);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 00bb5ca..8a87fca 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1333,6 +1333,44 @@
   DriverStr(expected, "bsrq_address");
 }
 
+TEST_F(AssemblerX86_64Test, Popcntl) {
+  DriverStr(Repeatrr(&x86_64::X86_64Assembler::popcntl, "popcntl %{reg2}, %{reg1}"), "popcntl");
+}
+
+TEST_F(AssemblerX86_64Test, PopcntlAddress) {
+  GetAssembler()->popcntl(x86_64::CpuRegister(x86_64::R10), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+  GetAssembler()->popcntl(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::R10), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+  GetAssembler()->popcntl(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
+  const char* expected =
+    "popcntl 0xc(%RDI,%RBX,4), %R10d\n"
+    "popcntl 0xc(%R10,%RBX,4), %edi\n"
+    "popcntl 0xc(%RDI,%R9,4), %edi\n";
+
+  DriverStr(expected, "popcntl_address");
+}
+
+TEST_F(AssemblerX86_64Test, Popcntq) {
+  DriverStr(RepeatRR(&x86_64::X86_64Assembler::popcntq, "popcntq %{reg2}, %{reg1}"), "popcntq");
+}
+
+TEST_F(AssemblerX86_64Test, PopcntqAddress) {
+  GetAssembler()->popcntq(x86_64::CpuRegister(x86_64::R10), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+  GetAssembler()->popcntq(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::R10), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+  GetAssembler()->popcntq(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
+  const char* expected =
+    "popcntq 0xc(%RDI,%RBX,4), %R10\n"
+    "popcntq 0xc(%R10,%RBX,4), %RDI\n"
+    "popcntq 0xc(%RDI,%R9,4), %RDI\n";
+
+  DriverStr(expected, "popcntq_address");
+}
+
 /////////////////
 // Near labels //
 /////////////////
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index e495a27..918a01b 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -2091,6 +2091,10 @@
     // Disable libsigchain. We don't don't need it during compilation and it prevents us
     // from getting a statically linked version of dex2oat (because of dlsym and RTLD_NEXT).
     raw_options.push_back(std::make_pair("-Xno-sig-chain", nullptr));
+    // Disable Hspace compaction to save heap size virtual space.
+    // Only need disable Hspace for OOM becasue background collector is equal to
+    // foreground collector by default for dex2oat.
+    raw_options.push_back(std::make_pair("-XX:DisableHSpaceCompactForOOM", nullptr));
 
     if (!Runtime::ParseOptions(raw_options, false, runtime_options)) {
       LOG(ERROR) << "Failed to parse runtime options";
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index d4bef0f..1f74c93 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -938,6 +938,11 @@
         has_modrm = true;
         load = true;
         break;
+      case 0xB8:
+        opcode1 = "popcnt";
+        has_modrm = true;
+        load = true;
+        break;
       case 0xBE:
         opcode1 = "movsxb";
         has_modrm = true;
diff --git a/runtime/arch/x86/instruction_set_features_x86.cc b/runtime/arch/x86/instruction_set_features_x86.cc
index 42f5df4..b97a8db 100644
--- a/runtime/arch/x86/instruction_set_features_x86.cc
+++ b/runtime/arch/x86/instruction_set_features_x86.cc
@@ -50,6 +50,10 @@
     "silvermont",
 };
 
+static constexpr const char* x86_variants_with_popcnt[] = {
+    "silvermont",
+};
+
 const X86InstructionSetFeatures* X86InstructionSetFeatures::FromVariant(
     const std::string& variant, std::string* error_msg ATTRIBUTE_UNUSED,
     bool x86_64) {
@@ -69,6 +73,11 @@
                                                arraysize(x86_variants_prefer_locked_add_sync),
                                                variant);
 
+  bool has_POPCNT = FindVariantInArray(x86_variants_with_popcnt,
+                                       arraysize(x86_variants_with_popcnt),
+                                       variant);
+
+  // Verify that variant is known.
   bool known_variant = FindVariantInArray(x86_known_variants, arraysize(x86_known_variants),
                                           variant);
   if (!known_variant && variant != "default") {
@@ -77,10 +86,10 @@
 
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, prefers_locked_add);
+                                            has_AVX2, prefers_locked_add, has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, prefers_locked_add);
+                                            has_AVX2, prefers_locked_add, has_POPCNT);
   }
 }
 
@@ -93,12 +102,15 @@
   bool has_AVX = (bitmap & kAvxBitfield) != 0;
   bool has_AVX2 = (bitmap & kAvxBitfield) != 0;
   bool prefers_locked_add = (bitmap & kPrefersLockedAdd) != 0;
+  bool has_POPCNT = (bitmap & kPopCntBitfield) != 0;
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2,
-                                                has_AVX, has_AVX2, prefers_locked_add);
+                                            has_AVX, has_AVX2, prefers_locked_add,
+                                            has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2,
-                                             has_AVX, has_AVX2, prefers_locked_add);
+                                         has_AVX, has_AVX2, prefers_locked_add,
+                                         has_POPCNT);
   }
 }
 
@@ -138,12 +150,18 @@
   // No #define for memory synchronization preference.
   const bool prefers_locked_add = false;
 
+#ifndef __POPCNT__
+  const bool has_POPCNT = false;
+#else
+  const bool has_POPCNT = true;
+#endif
+
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                                has_AVX2, prefers_locked_add);
+                                            has_AVX2, prefers_locked_add, has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, prefers_locked_add);
+                                         has_AVX2, prefers_locked_add, has_POPCNT);
   }
 }
 
@@ -158,6 +176,7 @@
   bool has_AVX2 = false;
   // No cpuinfo for memory synchronization preference.
   const bool prefers_locked_add = false;
+  bool has_POPCNT = false;
 
   std::ifstream in("/proc/cpuinfo");
   if (!in.fail()) {
@@ -183,6 +202,9 @@
           if (line.find("avx2") != std::string::npos) {
             has_AVX2 = true;
           }
+          if (line.find("popcnt") != std::string::npos) {
+            has_POPCNT = true;
+          }
         } else if (line.find("processor") != std::string::npos &&
             line.find(": 1") != std::string::npos) {
           smp = true;
@@ -195,10 +217,10 @@
   }
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                                has_AVX2, prefers_locked_add);
+                                            has_AVX2, prefers_locked_add, has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, prefers_locked_add);
+                                         has_AVX2, prefers_locked_add, has_POPCNT);
   }
 }
 
@@ -223,7 +245,8 @@
       (has_SSE4_2_ == other_as_x86->has_SSE4_2_) &&
       (has_AVX_ == other_as_x86->has_AVX_) &&
       (has_AVX2_ == other_as_x86->has_AVX2_) &&
-      (prefers_locked_add_ == other_as_x86->prefers_locked_add_);
+      (prefers_locked_add_ == other_as_x86->prefers_locked_add_) &&
+      (has_POPCNT_ == other_as_x86->has_POPCNT_);
 }
 
 uint32_t X86InstructionSetFeatures::AsBitmap() const {
@@ -233,7 +256,8 @@
       (has_SSE4_2_ ? kSse4_2Bitfield : 0) |
       (has_AVX_ ? kAvxBitfield : 0) |
       (has_AVX2_ ? kAvx2Bitfield : 0) |
-      (prefers_locked_add_ ? kPrefersLockedAdd : 0);
+      (prefers_locked_add_ ? kPrefersLockedAdd : 0) |
+      (has_POPCNT_ ? kPopCntBitfield : 0);
 }
 
 std::string X86InstructionSetFeatures::GetFeatureString() const {
@@ -273,6 +297,11 @@
   } else {
     result += ",-lock_add";
   }
+  if (has_POPCNT_) {
+    result += ",popcnt";
+  } else {
+    result += ",-popcnt";
+  }
   return result;
 }
 
@@ -285,6 +314,7 @@
   bool has_AVX = has_AVX_;
   bool has_AVX2 = has_AVX2_;
   bool prefers_locked_add = prefers_locked_add_;
+  bool has_POPCNT = has_POPCNT_;
   for (auto i = features.begin(); i != features.end(); i++) {
     std::string feature = Trim(*i);
     if (feature == "ssse3") {
@@ -311,6 +341,10 @@
       prefers_locked_add = true;
     } else if (feature == "-lock_add") {
       prefers_locked_add = false;
+    } else if (feature == "popcnt") {
+      has_POPCNT = true;
+    } else if (feature == "-popcnt") {
+      has_POPCNT = false;
     } else {
       *error_msg = StringPrintf("Unknown instruction set feature: '%s'", feature.c_str());
       return nullptr;
@@ -318,10 +352,10 @@
   }
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                                has_AVX2, prefers_locked_add);
+                                            has_AVX2, prefers_locked_add, has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, prefers_locked_add);
+                                         has_AVX2, prefers_locked_add, has_POPCNT);
   }
 }
 
diff --git a/runtime/arch/x86/instruction_set_features_x86.h b/runtime/arch/x86/instruction_set_features_x86.h
index 2b845f8..1819654 100644
--- a/runtime/arch/x86/instruction_set_features_x86.h
+++ b/runtime/arch/x86/instruction_set_features_x86.h
@@ -62,6 +62,8 @@
 
   bool PrefersLockedAddSynchronization() const { return prefers_locked_add_; }
 
+  bool HasPopCnt() const { return has_POPCNT_; }
+
  protected:
   // Parse a string of the form "ssse3" adding these to a new InstructionSetFeatures.
   virtual const InstructionSetFeatures*
@@ -75,10 +77,17 @@
                                  bool x86_64, std::string* error_msg) const;
 
   X86InstructionSetFeatures(bool smp, bool has_SSSE3, bool has_SSE4_1, bool has_SSE4_2,
-                            bool has_AVX, bool has_AVX2, bool prefers_locked_add)
-      : InstructionSetFeatures(smp), has_SSSE3_(has_SSSE3), has_SSE4_1_(has_SSE4_1),
-        has_SSE4_2_(has_SSE4_2), has_AVX_(has_AVX), has_AVX2_(has_AVX2),
-        prefers_locked_add_(prefers_locked_add) {
+                            bool has_AVX, bool has_AVX2,
+                            bool prefers_locked_add,
+                            bool has_POPCNT)
+      : InstructionSetFeatures(smp),
+        has_SSSE3_(has_SSSE3),
+        has_SSE4_1_(has_SSE4_1),
+        has_SSE4_2_(has_SSE4_2),
+        has_AVX_(has_AVX),
+        has_AVX2_(has_AVX2),
+        prefers_locked_add_(prefers_locked_add),
+        has_POPCNT_(has_POPCNT) {
   }
 
  private:
@@ -91,6 +100,7 @@
     kAvxBitfield = 16,
     kAvx2Bitfield = 32,
     kPrefersLockedAdd = 64,
+    kPopCntBitfield = 128,
   };
 
   const bool has_SSSE3_;   // x86 128bit SIMD - Supplemental SSE.
@@ -99,6 +109,7 @@
   const bool has_AVX_;     // x86 256bit SIMD AVX.
   const bool has_AVX2_;    // x86 256bit SIMD AVX 2.0.
   const bool prefers_locked_add_;  // x86 use locked add for memory synchronization.
+  const bool has_POPCNT_;  // x86 population count
 
   DISALLOW_COPY_AND_ASSIGN(X86InstructionSetFeatures);
 };
diff --git a/runtime/arch/x86/instruction_set_features_x86_test.cc b/runtime/arch/x86/instruction_set_features_x86_test.cc
index e8d01e6..a062c12 100644
--- a/runtime/arch/x86/instruction_set_features_x86_test.cc
+++ b/runtime/arch/x86/instruction_set_features_x86_test.cc
@@ -27,7 +27,7 @@
   ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_features->Equals(x86_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add,-popcnt",
                x86_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_features->AsBitmap(), 1U);
 }
@@ -40,7 +40,7 @@
   ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_features->Equals(x86_features.get()));
-  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,lock_add",
+  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,lock_add,-popcnt",
                x86_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_features->AsBitmap(), 67U);
 
@@ -50,7 +50,7 @@
   ASSERT_TRUE(x86_default_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_default_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_default_features->Equals(x86_default_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add,-popcnt",
                x86_default_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_default_features->AsBitmap(), 1U);
 
@@ -60,7 +60,7 @@
   ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
   EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
-  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,lock_add",
+  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,lock_add,-popcnt",
                x86_64_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_64_features->AsBitmap(), 67U);
 
@@ -77,9 +77,9 @@
   ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_features->Equals(x86_features.get()));
-  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,lock_add",
+  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,lock_add,popcnt",
                x86_features->GetFeatureString().c_str());
-  EXPECT_EQ(x86_features->AsBitmap(), 79U);
+  EXPECT_EQ(x86_features->AsBitmap(), 207U);
 
   // Build features for a 32-bit x86 default processor.
   std::unique_ptr<const InstructionSetFeatures> x86_default_features(
@@ -87,7 +87,7 @@
   ASSERT_TRUE(x86_default_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_default_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_default_features->Equals(x86_default_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add,-popcnt",
                x86_default_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_default_features->AsBitmap(), 1U);
 
@@ -97,9 +97,9 @@
   ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
   EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
-  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,lock_add",
+  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,lock_add,popcnt",
                x86_64_features->GetFeatureString().c_str());
-  EXPECT_EQ(x86_64_features->AsBitmap(), 79U);
+  EXPECT_EQ(x86_64_features->AsBitmap(), 207U);
 
   EXPECT_FALSE(x86_64_features->Equals(x86_features.get()));
   EXPECT_FALSE(x86_64_features->Equals(x86_default_features.get()));
diff --git a/runtime/arch/x86_64/instruction_set_features_x86_64.h b/runtime/arch/x86_64/instruction_set_features_x86_64.h
index b8000d0..aba7234 100644
--- a/runtime/arch/x86_64/instruction_set_features_x86_64.h
+++ b/runtime/arch/x86_64/instruction_set_features_x86_64.h
@@ -74,9 +74,10 @@
 
  private:
   X86_64InstructionSetFeatures(bool smp, bool has_SSSE3, bool has_SSE4_1, bool has_SSE4_2,
-                               bool has_AVX, bool has_AVX2, bool prefers_locked_add)
+                               bool has_AVX, bool has_AVX2, bool prefers_locked_add,
+                               bool has_POPCNT)
       : X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                  has_AVX2, prefers_locked_add) {
+                                  has_AVX2, prefers_locked_add, has_POPCNT) {
   }
 
   friend class X86InstructionSetFeatures;
diff --git a/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc b/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
index 4562c64..78aeacf 100644
--- a/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
+++ b/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
@@ -27,7 +27,7 @@
   ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
   EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add,-popcnt",
                x86_64_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_64_features->AsBitmap(), 1U);
 }
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 6e11cf8..a0f875d 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -230,11 +230,11 @@
     Dbg::PostException(exception_object);
   }
 
-  // We only care about how many backward branches were executed in the Jit.
-  void BackwardBranch(Thread* /*thread*/, ArtMethod* method, int32_t dex_pc_offset)
+  // We only care about branches in the Jit.
+  void Branch(Thread* /*thread*/, ArtMethod* method, uint32_t dex_pc, int32_t dex_pc_offset)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
-    LOG(ERROR) << "Unexpected backward branch event in debugger " << PrettyMethod(method)
-               << " " << dex_pc_offset;
+    LOG(ERROR) << "Unexpected branch event in debugger " << PrettyMethod(method)
+               << " " << dex_pc << ", " << dex_pc_offset;
   }
 
   // We only care about invokes in the Jit.
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 7d55e8c..c57b1bb 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -78,7 +78,7 @@
       have_field_read_listeners_(false),
       have_field_write_listeners_(false),
       have_exception_caught_listeners_(false),
-      have_backward_branch_listeners_(false),
+      have_branch_listeners_(false),
       have_invoke_virtual_or_interface_listeners_(false),
       deoptimized_methods_lock_("deoptimized methods lock"),
       deoptimization_enabled_(false),
@@ -431,11 +431,11 @@
                            method_unwind_listeners_,
                            listener,
                            &have_method_unwind_listeners_);
-  PotentiallyAddListenerTo(kBackwardBranch,
+  PotentiallyAddListenerTo(kBranch,
                            events,
-                           backward_branch_listeners_,
+                           branch_listeners_,
                            listener,
-                           &have_backward_branch_listeners_);
+                           &have_branch_listeners_);
   PotentiallyAddListenerTo(kInvokeVirtualOrInterface,
                            events,
                            invoke_virtual_or_interface_listeners_,
@@ -508,11 +508,11 @@
                                 method_unwind_listeners_,
                                 listener,
                                 &have_method_unwind_listeners_);
-  PotentiallyRemoveListenerFrom(kBackwardBranch,
+  PotentiallyRemoveListenerFrom(kBranch,
                                 events,
-                                backward_branch_listeners_,
+                                branch_listeners_,
                                 listener,
-                                &have_backward_branch_listeners_);
+                                &have_branch_listeners_);
   PotentiallyRemoveListenerFrom(kInvokeVirtualOrInterface,
                                 events,
                                 invoke_virtual_or_interface_listeners_,
@@ -917,11 +917,13 @@
   }
 }
 
-void Instrumentation::BackwardBranchImpl(Thread* thread, ArtMethod* method,
-                                         int32_t offset) const {
-  for (InstrumentationListener* listener : backward_branch_listeners_) {
+void Instrumentation::BranchImpl(Thread* thread,
+                                 ArtMethod* method,
+                                 uint32_t dex_pc,
+                                 int32_t offset) const {
+  for (InstrumentationListener* listener : branch_listeners_) {
     if (listener != nullptr) {
-      listener->BackwardBranch(thread, method, offset);
+      listener->Branch(thread, method, dex_pc, offset);
     }
   }
 }
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index b29245f..56aeefc 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -94,8 +94,11 @@
   virtual void ExceptionCaught(Thread* thread, mirror::Throwable* exception_object)
       SHARED_REQUIRES(Locks::mutator_lock_) = 0;
 
-  // Call-back for when we get a backward branch.
-  virtual void BackwardBranch(Thread* thread, ArtMethod* method, int32_t dex_pc_offset)
+  // Call-back for when we execute a branch.
+  virtual void Branch(Thread* thread,
+                      ArtMethod* method,
+                      uint32_t dex_pc,
+                      int32_t dex_pc_offset)
       SHARED_REQUIRES(Locks::mutator_lock_) = 0;
 
   // Call-back for when we get an invokevirtual or an invokeinterface.
@@ -122,7 +125,7 @@
     kFieldRead = 0x10,
     kFieldWritten = 0x20,
     kExceptionCaught = 0x40,
-    kBackwardBranch = 0x80,
+    kBranch = 0x80,
     kInvokeVirtualOrInterface = 0x100,
   };
 
@@ -276,8 +279,8 @@
     return have_exception_caught_listeners_;
   }
 
-  bool HasBackwardBranchListeners() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    return have_backward_branch_listeners_;
+  bool HasBranchListeners() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    return have_branch_listeners_;
   }
 
   bool HasInvokeVirtualOrInterfaceListeners() const SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -324,11 +327,11 @@
     }
   }
 
-  // Inform listeners that a backward branch has been taken (only supported by the interpreter).
-  void BackwardBranch(Thread* thread, ArtMethod* method, int32_t offset) const
+  // Inform listeners that a branch has been taken (only supported by the interpreter).
+  void Branch(Thread* thread, ArtMethod* method, uint32_t dex_pc, int32_t offset) const
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (UNLIKELY(HasBackwardBranchListeners())) {
-      BackwardBranchImpl(thread, method, offset);
+    if (UNLIKELY(HasBranchListeners())) {
+      BranchImpl(thread, method, dex_pc, offset);
     }
   }
 
@@ -442,7 +445,7 @@
   void DexPcMovedEventImpl(Thread* thread, mirror::Object* this_object,
                            ArtMethod* method, uint32_t dex_pc) const
       SHARED_REQUIRES(Locks::mutator_lock_);
-  void BackwardBranchImpl(Thread* thread, ArtMethod* method, int32_t offset) const
+  void BranchImpl(Thread* thread, ArtMethod* method, uint32_t dex_pc, int32_t offset) const
       SHARED_REQUIRES(Locks::mutator_lock_);
   void InvokeVirtualOrInterfaceImpl(Thread* thread,
                                     mirror::Object* this_object,
@@ -513,8 +516,8 @@
   // Do we have any exception caught listeners? Short-cut to avoid taking the instrumentation_lock_.
   bool have_exception_caught_listeners_ GUARDED_BY(Locks::mutator_lock_);
 
-  // Do we have any backward branch listeners? Short-cut to avoid taking the instrumentation_lock_.
-  bool have_backward_branch_listeners_ GUARDED_BY(Locks::mutator_lock_);
+  // Do we have any branch listeners? Short-cut to avoid taking the instrumentation_lock_.
+  bool have_branch_listeners_ GUARDED_BY(Locks::mutator_lock_);
 
   // Do we have any invoke listeners? Short-cut to avoid taking the instrumentation_lock_.
   bool have_invoke_virtual_or_interface_listeners_ GUARDED_BY(Locks::mutator_lock_);
@@ -537,7 +540,7 @@
   std::list<InstrumentationListener*> method_entry_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> method_exit_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> method_unwind_listeners_ GUARDED_BY(Locks::mutator_lock_);
-  std::list<InstrumentationListener*> backward_branch_listeners_ GUARDED_BY(Locks::mutator_lock_);
+  std::list<InstrumentationListener*> branch_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> invoke_virtual_or_interface_listeners_
       GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> dex_pc_listeners_ GUARDED_BY(Locks::mutator_lock_);
diff --git a/runtime/instrumentation_test.cc b/runtime/instrumentation_test.cc
index e4688a2..56e3bc5 100644
--- a/runtime/instrumentation_test.cc
+++ b/runtime/instrumentation_test.cc
@@ -37,7 +37,7 @@
     : received_method_enter_event(false), received_method_exit_event(false),
       received_method_unwind_event(false), received_dex_pc_moved_event(false),
       received_field_read_event(false), received_field_written_event(false),
-      received_exception_caught_event(false), received_backward_branch_event(false),
+      received_exception_caught_event(false), received_branch_event(false),
       received_invoke_virtual_or_interface_event(false) {}
 
   virtual ~TestInstrumentationListener() {}
@@ -100,11 +100,12 @@
     received_exception_caught_event = true;
   }
 
-  void BackwardBranch(Thread* thread ATTRIBUTE_UNUSED,
-                      ArtMethod* method ATTRIBUTE_UNUSED,
-                      int32_t dex_pc_offset ATTRIBUTE_UNUSED)
+  void Branch(Thread* thread ATTRIBUTE_UNUSED,
+              ArtMethod* method ATTRIBUTE_UNUSED,
+              uint32_t dex_pc ATTRIBUTE_UNUSED,
+              int32_t dex_pc_offset ATTRIBUTE_UNUSED)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
-    received_backward_branch_event = true;
+    received_branch_event = true;
   }
 
   void InvokeVirtualOrInterface(Thread* thread ATTRIBUTE_UNUSED,
@@ -124,7 +125,7 @@
     received_field_read_event = false;
     received_field_written_event = false;
     received_exception_caught_event = false;
-    received_backward_branch_event = false;
+    received_branch_event = false;
     received_invoke_virtual_or_interface_event = false;
   }
 
@@ -135,7 +136,7 @@
   bool received_field_read_event;
   bool received_field_written_event;
   bool received_exception_caught_event;
-  bool received_backward_branch_event;
+  bool received_branch_event;
   bool received_invoke_virtual_or_interface_event;
 
  private:
@@ -305,8 +306,8 @@
         return instr->HasFieldWriteListeners();
       case instrumentation::Instrumentation::kExceptionCaught:
         return instr->HasExceptionCaughtListeners();
-      case instrumentation::Instrumentation::kBackwardBranch:
-        return instr->HasBackwardBranchListeners();
+      case instrumentation::Instrumentation::kBranch:
+        return instr->HasBranchListeners();
       case instrumentation::Instrumentation::kInvokeVirtualOrInterface:
         return instr->HasInvokeVirtualOrInterfaceListeners();
       default:
@@ -349,8 +350,8 @@
         self->ClearException();
         break;
       }
-      case instrumentation::Instrumentation::kBackwardBranch:
-        instr->BackwardBranch(self, method, dex_pc);
+      case instrumentation::Instrumentation::kBranch:
+        instr->Branch(self, method, dex_pc, -1);
         break;
       case instrumentation::Instrumentation::kInvokeVirtualOrInterface:
         instr->InvokeVirtualOrInterface(self, obj, method, dex_pc, method);
@@ -378,8 +379,8 @@
         return listener.received_field_written_event;
       case instrumentation::Instrumentation::kExceptionCaught:
         return listener.received_exception_caught_event;
-      case instrumentation::Instrumentation::kBackwardBranch:
-        return listener.received_backward_branch_event;
+      case instrumentation::Instrumentation::kBranch:
+        return listener.received_branch_event;
       case instrumentation::Instrumentation::kInvokeVirtualOrInterface:
         return listener.received_invoke_virtual_or_interface_event;
       default:
@@ -441,8 +442,8 @@
   TestEvent(instrumentation::Instrumentation::kExceptionCaught);
 }
 
-TEST_F(InstrumentationTest, BackwardBranchEvent) {
-  TestEvent(instrumentation::Instrumentation::kBackwardBranch);
+TEST_F(InstrumentationTest, BranchEvent) {
+  TestEvent(instrumentation::Instrumentation::kBranch);
 }
 
 TEST_F(InstrumentationTest, InvokeVirtualOrInterfaceEvent) {
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 9766299..ca00621 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -63,10 +63,10 @@
   currentHandlersTable = handlersTable[ \
       Runtime::Current()->GetInstrumentation()->GetInterpreterHandlerTable()]
 
-#define BACKWARD_BRANCH_INSTRUMENTATION(offset) \
+#define BRANCH_INSTRUMENTATION(offset) \
   do { \
     instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation(); \
-    instrumentation->BackwardBranch(self, shadow_frame.GetMethod(), offset); \
+    instrumentation->Branch(self, shadow_frame.GetMethod(), dex_pc, offset); \
   } while (false)
 
 #define UNREACHABLE_CODE_CHECK()                \
@@ -633,8 +633,8 @@
 
   HANDLE_INSTRUCTION_START(GOTO) {
     int8_t offset = inst->VRegA_10t(inst_data);
+    BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
-      BACKWARD_BRANCH_INSTRUMENTATION(offset);
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -646,8 +646,8 @@
 
   HANDLE_INSTRUCTION_START(GOTO_16) {
     int16_t offset = inst->VRegA_20t();
+    BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
-      BACKWARD_BRANCH_INSTRUMENTATION(offset);
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -659,8 +659,8 @@
 
   HANDLE_INSTRUCTION_START(GOTO_32) {
     int32_t offset = inst->VRegA_30t();
+    BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
-      BACKWARD_BRANCH_INSTRUMENTATION(offset);
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -672,8 +672,8 @@
 
   HANDLE_INSTRUCTION_START(PACKED_SWITCH) {
     int32_t offset = DoPackedSwitch(inst, shadow_frame, inst_data);
+    BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
-      BACKWARD_BRANCH_INSTRUMENTATION(offset);
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -685,8 +685,8 @@
 
   HANDLE_INSTRUCTION_START(SPARSE_SWITCH) {
     int32_t offset = DoSparseSwitch(inst, shadow_frame, inst_data);
+    BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
-      BACKWARD_BRANCH_INSTRUMENTATION(offset);
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -788,8 +788,8 @@
   HANDLE_INSTRUCTION_START(IF_EQ) {
     if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) == shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
       int16_t offset = inst->VRegC_22t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -806,8 +806,8 @@
     if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) !=
         shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
       int16_t offset = inst->VRegC_22t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -824,8 +824,8 @@
     if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) <
         shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
       int16_t offset = inst->VRegC_22t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -842,8 +842,8 @@
     if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) >=
         shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
       int16_t offset = inst->VRegC_22t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -860,8 +860,8 @@
     if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) >
     shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
       int16_t offset = inst->VRegC_22t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -878,8 +878,8 @@
     if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) <=
         shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
       int16_t offset = inst->VRegC_22t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -895,8 +895,8 @@
   HANDLE_INSTRUCTION_START(IF_EQZ) {
     if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) == 0) {
       int16_t offset = inst->VRegB_21t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -912,8 +912,8 @@
   HANDLE_INSTRUCTION_START(IF_NEZ) {
     if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) != 0) {
       int16_t offset = inst->VRegB_21t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -929,8 +929,8 @@
   HANDLE_INSTRUCTION_START(IF_LTZ) {
     if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) < 0) {
       int16_t offset = inst->VRegB_21t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -946,8 +946,8 @@
   HANDLE_INSTRUCTION_START(IF_GEZ) {
     if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) >= 0) {
       int16_t offset = inst->VRegB_21t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -963,8 +963,8 @@
   HANDLE_INSTRUCTION_START(IF_GTZ) {
     if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) > 0) {
       int16_t offset = inst->VRegB_21t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -980,8 +980,8 @@
   HANDLE_INSTRUCTION_START(IF_LEZ)  {
     if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) <= 0) {
       int16_t offset = inst->VRegB_21t();
+      BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        BACKWARD_BRANCH_INSTRUMENTATION(offset);
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index bab0d40..c3b75b2 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -69,9 +69,9 @@
     }                                                                                           \
   } while (false)
 
-#define BACKWARD_BRANCH_INSTRUMENTATION(offset) \
+#define BRANCH_INSTRUMENTATION(offset) \
   do { \
-    instrumentation->BackwardBranch(self, shadow_frame.GetMethod(), offset); \
+    instrumentation->Branch(self, shadow_frame.GetMethod(), dex_pc, offset); \
   } while (false)
 
 static bool IsExperimentalInstructionEnabled(const Instruction *inst) {
@@ -565,8 +565,8 @@
       case Instruction::GOTO: {
         PREAMBLE();
         int8_t offset = inst->VRegA_10t(inst_data);
+        BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
-          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -575,8 +575,8 @@
       case Instruction::GOTO_16: {
         PREAMBLE();
         int16_t offset = inst->VRegA_20t();
+        BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
-          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -585,8 +585,8 @@
       case Instruction::GOTO_32: {
         PREAMBLE();
         int32_t offset = inst->VRegA_30t();
+        BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
-          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -595,8 +595,8 @@
       case Instruction::PACKED_SWITCH: {
         PREAMBLE();
         int32_t offset = DoPackedSwitch(inst, shadow_frame, inst_data);
+        BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
-          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -605,8 +605,8 @@
       case Instruction::SPARSE_SWITCH: {
         PREAMBLE();
         int32_t offset = DoSparseSwitch(inst, shadow_frame, inst_data);
+        BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
-          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -709,8 +709,8 @@
         if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) ==
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -724,8 +724,8 @@
         if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) !=
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -739,8 +739,8 @@
         if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) <
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -754,8 +754,8 @@
         if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) >=
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -769,8 +769,8 @@
         if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) >
         shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -784,8 +784,8 @@
         if (shadow_frame.GetVReg(inst->VRegA_22t(inst_data)) <=
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -798,8 +798,8 @@
         PREAMBLE();
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) == 0) {
           int16_t offset = inst->VRegB_21t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -812,8 +812,8 @@
         PREAMBLE();
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) != 0) {
           int16_t offset = inst->VRegB_21t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -826,8 +826,8 @@
         PREAMBLE();
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) < 0) {
           int16_t offset = inst->VRegB_21t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -840,8 +840,8 @@
         PREAMBLE();
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) >= 0) {
           int16_t offset = inst->VRegB_21t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -854,8 +854,8 @@
         PREAMBLE();
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) > 0) {
           int16_t offset = inst->VRegB_21t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -868,8 +868,8 @@
         PREAMBLE();
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) <= 0) {
           int16_t offset = inst->VRegB_21t();
+          BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
diff --git a/runtime/jdwp/jdwp_socket.cc b/runtime/jdwp/jdwp_socket.cc
index 2a00f05..1bc58ac 100644
--- a/runtime/jdwp/jdwp_socket.cc
+++ b/runtime/jdwp/jdwp_socket.cc
@@ -276,7 +276,12 @@
    */
 #if defined(__linux__)
   hostent he;
-  char auxBuf[256];
+  // The size of the work buffer used in the gethostbyname_r call
+  // below. It used to be 128, but this was not enough on some
+  // configurations (maybe because of IPv6?), causing failures in JDWP
+  // host testing; thus it was increased to 256.
+  static constexpr size_t kAuxBufSize = 256;
+  char auxBuf[kAuxBufSize];
   int error;
   int cc = gethostbyname_r(options->host.c_str(), &he, auxBuf, sizeof(auxBuf), &pEntry, &error);
   if (cc != 0) {
@@ -298,7 +303,8 @@
 
   addr.addrInet.sin_port = htons(options->port);
 
-  LOG(INFO) << "Connecting out to " << inet_ntoa(addr.addrInet.sin_addr) << ":" << ntohs(addr.addrInet.sin_port);
+  LOG(INFO) << "Connecting out to " << inet_ntoa(addr.addrInet.sin_addr) << ":"
+            << ntohs(addr.addrInet.sin_port);
 
   /*
    * Create a socket.
@@ -313,13 +319,15 @@
    * Try to connect.
    */
   if (connect(clientSock, &addr.addrPlain, sizeof(addr)) != 0) {
-    PLOG(ERROR) << "Unable to connect to " << inet_ntoa(addr.addrInet.sin_addr) << ":" << ntohs(addr.addrInet.sin_port);
+    PLOG(ERROR) << "Unable to connect to " << inet_ntoa(addr.addrInet.sin_addr) << ":"
+                << ntohs(addr.addrInet.sin_port);
     close(clientSock);
     clientSock = -1;
     return false;
   }
 
-  LOG(INFO) << "Connection established to " << options->host << " (" << inet_ntoa(addr.addrInet.sin_addr) << ":" << ntohs(addr.addrInet.sin_port) << ")";
+  LOG(INFO) << "Connection established to " << options->host << " ("
+            << inet_ntoa(addr.addrInet.sin_addr) << ":" << ntohs(addr.addrInet.sin_port) << ")";
   SetAwaitingHandshake(true);
   input_count_ = 0;
 
@@ -438,7 +446,8 @@
         }
       }
       if (clientSock >= 0 && FD_ISSET(clientSock, &readfds)) {
-        readCount = read(clientSock, input_buffer_ + input_count_, sizeof(input_buffer_) - input_count_);
+        readCount =
+            read(clientSock, input_buffer_ + input_count_, sizeof(input_buffer_) - input_count_);
         if (readCount < 0) {
           /* read failed */
           if (errno != EINTR) {
@@ -479,7 +488,8 @@
     errno = 0;
     int cc = TEMP_FAILURE_RETRY(write(clientSock, input_buffer_, kMagicHandshakeLen));
     if (cc != kMagicHandshakeLen) {
-      PLOG(ERROR) << "Failed writing handshake bytes (" << cc << " of " << kMagicHandshakeLen << ")";
+      PLOG(ERROR) << "Failed writing handshake bytes ("
+                  << cc << " of " << kMagicHandshakeLen << ")";
       goto fail;
     }
 
diff --git a/runtime/jit/jit_instrumentation.cc b/runtime/jit/jit_instrumentation.cc
index 4cbaf2c..6b47b67 100644
--- a/runtime/jit/jit_instrumentation.cc
+++ b/runtime/jit/jit_instrumentation.cc
@@ -165,11 +165,14 @@
   instrumentation_cache_->AddSamples(thread, method, 1);
 }
 
-void JitInstrumentationListener::BackwardBranch(Thread* thread,
-                                                ArtMethod* method,
-                                                int32_t dex_pc_offset) {
-  CHECK_LE(dex_pc_offset, 0);
-  instrumentation_cache_->AddSamples(thread, method, 1);
+void JitInstrumentationListener::Branch(Thread* thread,
+                                        ArtMethod* method,
+                                        uint32_t dex_pc ATTRIBUTE_UNUSED,
+                                        int32_t dex_pc_offset) {
+  if (dex_pc_offset < 0) {
+    // Increment method hotness if it is a backward branch.
+    instrumentation_cache_->AddSamples(thread, method, 1);
+  }
 }
 
 void JitInstrumentationListener::InvokeVirtualOrInterface(Thread* thread,
diff --git a/runtime/jit/jit_instrumentation.h b/runtime/jit/jit_instrumentation.h
index 15969e4..620c087 100644
--- a/runtime/jit/jit_instrumentation.h
+++ b/runtime/jit/jit_instrumentation.h
@@ -70,7 +70,7 @@
   void DexPcMoved(Thread* /*self*/, mirror::Object* /*this_object*/,
                   ArtMethod* /*method*/, uint32_t /*new_dex_pc*/) OVERRIDE { }
 
-  void BackwardBranch(Thread* thread, ArtMethod* method, int32_t dex_pc_offset)
+  void Branch(Thread* thread, ArtMethod* method, uint32_t dex_pc, int32_t dex_pc_offset)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
 
   void InvokeVirtualOrInterface(Thread* thread,
@@ -84,7 +84,7 @@
 
   static constexpr uint32_t kJitEvents =
       instrumentation::Instrumentation::kMethodEntered |
-      instrumentation::Instrumentation::kBackwardBranch |
+      instrumentation::Instrumentation::kBranch |
       instrumentation::Instrumentation::kInvokeVirtualOrInterface;
 
  private:
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index 36a967f..7f216f9 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -312,7 +312,7 @@
   // Update the oat file on disk if we can. This may fail, but that's okay.
   // Best effort is all that matters here.
   if (!oat_file_assistant.MakeUpToDate(/*out*/&error_msg)) {
-    LOG(WARNING) << error_msg;
+    LOG(INFO) << error_msg;
   }
 
   // Get the oat file on disk.
diff --git a/runtime/quick/inline_method_analyser.h b/runtime/quick/inline_method_analyser.h
index 6cea902..ca456c2 100644
--- a/runtime/quick/inline_method_analyser.h
+++ b/runtime/quick/inline_method_analyser.h
@@ -39,6 +39,7 @@
   kIntrinsicFloatCvt,
   kIntrinsicReverseBits,
   kIntrinsicReverseBytes,
+  kIntrinsicBitCount,
   kIntrinsicNumberOfLeadingZeros,
   kIntrinsicNumberOfTrailingZeros,
   kIntrinsicRotateRight,
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index fc1a445..727ffe5 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -190,7 +190,7 @@
 class DumpCheckpoint FINAL : public Closure {
  public:
   explicit DumpCheckpoint(std::ostream* os)
-      : os_(os), barrier_(0), backtrace_map_(BacktraceMap::Create(GetTid())) {}
+      : os_(os), barrier_(0), backtrace_map_(BacktraceMap::Create(getpid())) {}
 
   void Run(Thread* thread) OVERRIDE {
     // Note thread and self may not be equal if thread was already suspended at the point of the
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 5815f7a..99b2296 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -815,10 +815,10 @@
   LOG(ERROR) << "Unexpected exception caught event in tracing";
 }
 
-void Trace::BackwardBranch(Thread* /*thread*/, ArtMethod* method,
-                           int32_t /*dex_pc_offset*/)
+void Trace::Branch(Thread* /*thread*/, ArtMethod* method,
+                   uint32_t /*dex_pc*/, int32_t /*dex_pc_offset*/)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-  LOG(ERROR) << "Unexpected backward branch event in tracing" << PrettyMethod(method);
+  LOG(ERROR) << "Unexpected branch event in tracing" << PrettyMethod(method);
 }
 
 void Trace::InvokeVirtualOrInterface(Thread*,
diff --git a/runtime/trace.h b/runtime/trace.h
index 356a81f..80f1a4c 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -164,7 +164,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!*unique_methods_lock_) OVERRIDE;
   void ExceptionCaught(Thread* thread, mirror::Throwable* exception_object)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!*unique_methods_lock_) OVERRIDE;
-  void BackwardBranch(Thread* thread, ArtMethod* method, int32_t dex_pc_offset)
+  void Branch(Thread* thread, ArtMethod* method, uint32_t dex_pc, int32_t dex_pc_offset)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!*unique_methods_lock_) OVERRIDE;
   void InvokeVirtualOrInterface(Thread* thread,
                                 mirror::Object* this_object,
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 8e9f12b..07f94c0 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -1115,7 +1115,7 @@
   BacktraceMap* map = existing_map;
   std::unique_ptr<BacktraceMap> tmp_map;
   if (map == nullptr) {
-    tmp_map.reset(BacktraceMap::Create(tid));
+    tmp_map.reset(BacktraceMap::Create(getpid()));
     map = tmp_map.get();
   }
   std::unique_ptr<Backtrace> backtrace(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, tid, map));
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 1c95648..7e0f337 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -2735,7 +2735,8 @@
                        inst->Opcode() == Instruction::INVOKE_SUPER_RANGE);
       bool is_super = (inst->Opcode() == Instruction::INVOKE_SUPER ||
                        inst->Opcode() == Instruction::INVOKE_SUPER_RANGE);
-      ArtMethod* called_method = VerifyInvocationArgs(inst, METHOD_VIRTUAL, is_range, is_super);
+      MethodType type = is_super ? METHOD_SUPER : METHOD_VIRTUAL;
+      ArtMethod* called_method = VerifyInvocationArgs(inst, type, is_range);
       const RegType* return_type = nullptr;
       if (called_method != nullptr) {
         size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
@@ -2768,7 +2769,7 @@
     case Instruction::INVOKE_DIRECT:
     case Instruction::INVOKE_DIRECT_RANGE: {
       bool is_range = (inst->Opcode() == Instruction::INVOKE_DIRECT_RANGE);
-      ArtMethod* called_method = VerifyInvocationArgs(inst, METHOD_DIRECT, is_range, false);
+      ArtMethod* called_method = VerifyInvocationArgs(inst, METHOD_DIRECT, is_range);
       const char* return_type_descriptor;
       bool is_constructor;
       const RegType* return_type = nullptr;
@@ -2848,7 +2849,7 @@
     case Instruction::INVOKE_STATIC:
     case Instruction::INVOKE_STATIC_RANGE: {
         bool is_range = (inst->Opcode() == Instruction::INVOKE_STATIC_RANGE);
-        ArtMethod* called_method = VerifyInvocationArgs(inst, METHOD_STATIC, is_range, false);
+        ArtMethod* called_method = VerifyInvocationArgs(inst, METHOD_STATIC, is_range);
         const char* descriptor;
         if (called_method == nullptr) {
           uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
@@ -2870,7 +2871,7 @@
     case Instruction::INVOKE_INTERFACE:
     case Instruction::INVOKE_INTERFACE_RANGE: {
       bool is_range =  (inst->Opcode() == Instruction::INVOKE_INTERFACE_RANGE);
-      ArtMethod* abs_method = VerifyInvocationArgs(inst, METHOD_INTERFACE, is_range, false);
+      ArtMethod* abs_method = VerifyInvocationArgs(inst, METHOD_INTERFACE, is_range);
       if (abs_method != nullptr) {
         mirror::Class* called_interface = abs_method->GetDeclaringClass();
         if (!called_interface->IsInterface() && !called_interface->IsObjectClass()) {
@@ -3639,9 +3640,8 @@
   return *common_super;
 }
 
-// TODO Maybe I should just add a METHOD_SUPER to MethodType?
 ArtMethod* MethodVerifier::ResolveMethodAndCheckAccess(
-    uint32_t dex_method_idx, MethodType method_type, bool is_super) {
+    uint32_t dex_method_idx, MethodType method_type) {
   const DexFile::MethodId& method_id = dex_file_->GetMethodId(dex_method_idx);
   const RegType& klass_type = ResolveClassAndCheckAccess(method_id.class_idx_);
   if (klass_type.IsConflict()) {
@@ -3668,9 +3668,10 @@
       res_method = klass->FindDirectMethod(name, signature, pointer_size);
     } else if (method_type == METHOD_INTERFACE) {
       res_method = klass->FindInterfaceMethod(name, signature, pointer_size);
-    } else if (is_super && klass->IsInterface()) {
+    } else if (method_type == METHOD_SUPER && klass->IsInterface()) {
       res_method = klass->FindInterfaceMethod(name, signature, pointer_size);
     } else {
+      DCHECK(method_type == METHOD_VIRTUAL || method_type == METHOD_SUPER);
       res_method = klass->FindVirtualMethod(name, signature, pointer_size);
     }
     if (res_method != nullptr) {
@@ -3679,7 +3680,9 @@
       // If a virtual or interface method wasn't found with the expected type, look in
       // the direct methods. This can happen when the wrong invoke type is used or when
       // a class has changed, and will be flagged as an error in later checks.
-      if (method_type == METHOD_INTERFACE || method_type == METHOD_VIRTUAL) {
+      if (method_type == METHOD_INTERFACE ||
+          method_type == METHOD_VIRTUAL ||
+          method_type == METHOD_SUPER) {
         res_method = klass->FindDirectMethod(name, signature, pointer_size);
       }
       if (res_method == nullptr) {
@@ -3742,7 +3745,7 @@
     return res_method;
   }
   // Check that invoke-virtual and invoke-super are not used on private methods of the same class.
-  if (res_method->IsPrivate() && method_type == METHOD_VIRTUAL) {
+  if (res_method->IsPrivate() && (method_type == METHOD_VIRTUAL || method_type == METHOD_SUPER)) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invoke-super/virtual can't be used on private method "
                                       << PrettyMethod(res_method);
     return nullptr;
@@ -3751,7 +3754,9 @@
   // target method.
   if ((method_type == METHOD_DIRECT && (!res_method->IsDirect() || res_method->IsStatic())) ||
       (method_type == METHOD_STATIC && !res_method->IsStatic()) ||
-      ((method_type == METHOD_VIRTUAL || method_type == METHOD_INTERFACE) && res_method->IsDirect())
+      ((method_type == METHOD_SUPER ||
+        method_type == METHOD_VIRTUAL ||
+        method_type == METHOD_INTERFACE) && res_method->IsDirect())
       ) {
     Fail(VERIFY_ERROR_CLASS_CHANGE) << "invoke type (" << method_type << ") does not match method "
                                        " type of " << PrettyMethod(res_method);
@@ -3937,12 +3942,12 @@
 };
 
 ArtMethod* MethodVerifier::VerifyInvocationArgs(
-    const Instruction* inst, MethodType method_type, bool is_range, bool is_super) {
+    const Instruction* inst, MethodType method_type, bool is_range) {
   // Resolve the method. This could be an abstract or concrete method depending on what sort of call
   // we're making.
   const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
 
-  ArtMethod* res_method = ResolveMethodAndCheckAccess(method_idx, method_type, is_super);
+  ArtMethod* res_method = ResolveMethodAndCheckAccess(method_idx, method_type);
   if (res_method == nullptr) {  // error or class is unresolved
     // Check what we can statically.
     if (!have_pending_hard_failure_) {
@@ -3953,8 +3958,7 @@
 
   // If we're using invoke-super(method), make sure that the executing method's class' superclass
   // has a vtable entry for the target method. Or the target is on a interface.
-  if (is_super) {
-    DCHECK(method_type == METHOD_VIRTUAL);
+  if (method_type == METHOD_SUPER) {
     if (res_method->GetDeclaringClass()->IsInterface()) {
       // TODO Fill in this part. Verify what we can...
       if (Runtime::Current()->IsAotCompiler()) {
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index ec0a8f9..a26e0fb 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -57,7 +57,8 @@
   METHOD_UNKNOWN  = 0,
   METHOD_DIRECT,      // <init>, private
   METHOD_STATIC,      // static
-  METHOD_VIRTUAL,     // virtual, super
+  METHOD_VIRTUAL,     // virtual
+  METHOD_SUPER,       // super
   METHOD_INTERFACE    // interface
 };
 std::ostream& operator<<(std::ostream& os, const MethodType& rhs);
@@ -654,7 +655,7 @@
    * the referrer can access the resolved method.
    * Does not throw exceptions.
    */
-  ArtMethod* ResolveMethodAndCheckAccess(uint32_t method_idx, MethodType method_type, bool is_super)
+  ArtMethod* ResolveMethodAndCheckAccess(uint32_t method_idx, MethodType method_type)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   /*
@@ -679,9 +680,7 @@
    * Returns the resolved method on success, null on failure (with *failure
    * set appropriately).
    */
-  ArtMethod* VerifyInvocationArgs(const Instruction* inst,
-                                          MethodType method_type,
-                                          bool is_range, bool is_super)
+  ArtMethod* VerifyInvocationArgs(const Instruction* inst, MethodType method_type, bool is_range)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Similar checks to the above, but on the proto. Will be used when the method cannot be
diff --git a/test/004-StackWalk/src/Main.java b/test/004-StackWalk/src/Main.java
index 883ce2c..072b1d0 100644
--- a/test/004-StackWalk/src/Main.java
+++ b/test/004-StackWalk/src/Main.java
@@ -2,14 +2,14 @@
   public Main() {
   }
 
+  boolean doThrow = false;
+
   int $noinline$f() throws Exception {
     g(1);
     g(2);
 
-    // This loop currently defeats inlining of `f`.
-    for (int i = 0; i < 10; i++) {
-      Thread.sleep(0);
-    }
+    // This currently defeats inlining of `f`.
+    if (doThrow) { throw new Error(); }
     return 0;
   }
 
diff --git a/test/048-reflect-v8/build b/test/048-reflect-v8/build
new file mode 100644
index 0000000..4ea1838
--- /dev/null
+++ b/test/048-reflect-v8/build
@@ -0,0 +1,28 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Make us exit on a failure.
+set -e
+
+# Hard-wired use of experimental jack.
+# TODO: fix this temporary work-around for lambdas, see b/19467889
+export USE_JACK=true
+export JACK_SERVER=false
+export JACK_REPOSITORY="${ANDROID_BUILD_TOP}/prebuilts/sdk/tools/jacks"
+# e.g. /foo/bar/jack-3.10.ALPHA.jar -> 3.10.ALPHA
+export JACK_VERSION="$(find "$JACK_REPOSITORY" -name '*ALPHA*' | sed 's/.*jack-//g' | sed 's/[.]jar//g')"
+
+./default-build "$@" --experimental default-methods
diff --git a/test/048-reflect-v8/expected.txt b/test/048-reflect-v8/expected.txt
new file mode 100644
index 0000000..2d0b4cc
--- /dev/null
+++ b/test/048-reflect-v8/expected.txt
@@ -0,0 +1,4 @@
+Main$DefaultInterface is default = yes
+Main$RegularInterface is default = no
+Main$ImplementsWithDefault is default = yes
+Main$ImplementsWithRegular is default = no
diff --git a/test/048-reflect-v8/info.txt b/test/048-reflect-v8/info.txt
new file mode 100644
index 0000000..a336d30
--- /dev/null
+++ b/test/048-reflect-v8/info.txt
@@ -0,0 +1 @@
+Test reflection for 1.8 APIs
diff --git a/test/048-reflect-v8/run b/test/048-reflect-v8/run
new file mode 100644
index 0000000..ba3318a
--- /dev/null
+++ b/test/048-reflect-v8/run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Ensure that the default methods are turned on for dalvikvm and dex2oat
+${RUN} "$@" --experimental default-methods
diff --git a/test/048-reflect-v8/src/Main.java b/test/048-reflect-v8/src/Main.java
new file mode 100644
index 0000000..7fa2a92
--- /dev/null
+++ b/test/048-reflect-v8/src/Main.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  interface DefaultInterface {
+    default void sayHi() {
+      System.out.println("hi default");
+    }
+  }
+
+  interface RegularInterface {
+    void sayHi();
+  }
+
+  class ImplementsWithDefault implements DefaultInterface {}
+  class ImplementsWithRegular implements RegularInterface {
+    public void sayHi() {
+      System.out.println("hello specific");
+    }
+  }
+
+  private static void printIsDefault(Class<?> klass) {
+    Method m;
+    try {
+      m = klass.getMethod("sayHi");
+    } catch (Throwable t) {
+      System.out.println(t);
+      return;
+    }
+
+    boolean isDefault = m.isDefault();
+    System.out.println(klass.getName() + " is default = " + (isDefault ? "yes" : "no"));
+  }
+
+  public static void main(String[] args) {
+    printIsDefault(DefaultInterface.class);
+    printIsDefault(RegularInterface.class);
+    printIsDefault(ImplementsWithDefault.class);
+    printIsDefault(ImplementsWithRegular.class);
+  }
+}
diff --git a/test/482-checker-loop-back-edge-use/src/Main.java b/test/482-checker-loop-back-edge-use/src/Main.java
index d0b33b9..f8f0aa3 100644
--- a/test/482-checker-loop-back-edge-use/src/Main.java
+++ b/test/482-checker-loop-back-edge-use/src/Main.java
@@ -40,6 +40,9 @@
   /// CHECK-EVAL:    <<GotoLiv2>> + 2 == <<ArgLoopUse2>>
 
   public static void loop2(boolean incoming) {
+    // Add some code at entry to avoid having the entry block be a pre header.
+    // This avoids having to create a synthesized block.
+    System.out.println("Enter");
     while (true) {
       System.out.println("foo");
       while (incoming) {}
@@ -170,6 +173,9 @@
   /// CHECK-EVAL:    <<GotoLiv1>> + 2 == <<ArgLoopUse>>
 
   public static void loop9() {
+    // Add some code at entry to avoid having the entry block be a pre header.
+    // This avoids having to create a synthesized block.
+    System.out.println("Enter");
     while (Runtime.getRuntime() != null) {
       // 'incoming' must only have a use in the inner loop.
       boolean incoming = field;
diff --git a/test/510-checker-try-catch/smali/SsaBuilder.smali b/test/510-checker-try-catch/smali/SsaBuilder.smali
index 710e849..a6a5bfe 100644
--- a/test/510-checker-try-catch/smali/SsaBuilder.smali
+++ b/test/510-checker-try-catch/smali/SsaBuilder.smali
@@ -21,7 +21,7 @@
 
 ## CHECK-START: int SsaBuilder.testSimplifyCatchBlock(int, int, int) ssa_builder (after)
 
-## CHECK:      name             "B0"
+## CHECK:      name             "B1"
 ## CHECK-NEXT: from_bci
 ## CHECK-NEXT: to_bci
 ## CHECK-NEXT: predecessors
@@ -39,12 +39,15 @@
 ## CHECK:      name             "<<BExtracted>>"
 ## CHECK-NEXT: from_bci
 ## CHECK-NEXT: to_bci
-## CHECK-NEXT: predecessors     "B0" "<<BCatch>>"
+## CHECK-NEXT: predecessors     "B1" "<<BCatch>>"
 ## CHECK-NOT:  flags            "catch_block"
 ## CHECK:      Add
 
 .method public static testSimplifyCatchBlock(III)I
     .registers 4
+    # Avoid entry block be a pre header, which leads to
+    # the cfg simplifier to add a synthesized block.
+    goto :catch_all
 
     :catch_all
     add-int/2addr p0, p1
diff --git a/test/550-checker-multiply-accumulate/src/Main.java b/test/550-checker-multiply-accumulate/src/Main.java
index 549ed99..2d0688d 100644
--- a/test/550-checker-multiply-accumulate/src/Main.java
+++ b/test/550-checker-multiply-accumulate/src/Main.java
@@ -47,7 +47,7 @@
   /// CHECK:       <<Acc:i\d+>>         ParameterValue
   /// CHECK:       <<Left:i\d+>>        ParameterValue
   /// CHECK:       <<Right:i\d+>>       ParameterValue
-  /// CHECK:       <<MulAdd:i\d+>>      MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Add
+  /// CHECK:       <<MulAdd:i\d+>>      Arm64MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Add
   /// CHECK:                            Return [<<MulAdd>>]
 
   /// CHECK-START-ARM64: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm64 (after)
@@ -57,28 +57,6 @@
   /// CHECK-START-ARM64: int Main.$opt$noinline$mulAdd(int, int, int) disassembly (after)
   /// CHECK:                            madd w{{\d+}}, w{{\d+}}, w{{\d+}}, w{{\d+}}
 
-  /// CHECK-START-ARM: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm (before)
-  /// CHECK:       <<Acc:i\d+>>         ParameterValue
-  /// CHECK:       <<Left:i\d+>>        ParameterValue
-  /// CHECK:       <<Right:i\d+>>       ParameterValue
-  /// CHECK:       <<Mul:i\d+>>         Mul [<<Left>>,<<Right>>]
-  /// CHECK:       <<Add:i\d+>>         Add [<<Acc>>,<<Mul>>]
-  /// CHECK:                            Return [<<Add>>]
-
-  /// CHECK-START-ARM: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm (after)
-  /// CHECK:       <<Acc:i\d+>>         ParameterValue
-  /// CHECK:       <<Left:i\d+>>        ParameterValue
-  /// CHECK:       <<Right:i\d+>>       ParameterValue
-  /// CHECK:       <<MulAdd:i\d+>>      MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Add
-  /// CHECK:                            Return [<<MulAdd>>]
-
-  /// CHECK-START-ARM: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm (after)
-  /// CHECK-NOT:                        Mul
-  /// CHECK-NOT:                        Add
-
-  /// CHECK-START-ARM: int Main.$opt$noinline$mulAdd(int, int, int) disassembly (after)
-  /// CHECK:                            mla w{{\d+}}, w{{\d+}}, w{{\d+}}, w{{\d+}}
-
   public static int $opt$noinline$mulAdd(int acc, int left, int right) {
     if (doThrow) throw new Error();
     return acc + left * right;
@@ -100,7 +78,7 @@
   /// CHECK:       <<Acc:j\d+>>         ParameterValue
   /// CHECK:       <<Left:j\d+>>        ParameterValue
   /// CHECK:       <<Right:j\d+>>       ParameterValue
-  /// CHECK:       <<MulSub:j\d+>>      MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Sub
+  /// CHECK:       <<MulSub:j\d+>>      Arm64MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Sub
   /// CHECK:                            Return [<<MulSub>>]
 
   /// CHECK-START-ARM64: long Main.$opt$noinline$mulSub(long, long, long) instruction_simplifier_arm64 (after)
@@ -110,28 +88,6 @@
   /// CHECK-START-ARM64: long Main.$opt$noinline$mulSub(long, long, long) disassembly (after)
   /// CHECK:                            msub x{{\d+}}, x{{\d+}}, x{{\d+}}, x{{\d+}}
 
-  /// CHECK-START-ARM: long Main.$opt$noinline$mulSub(long, long, long) instruction_simplifier_arm (before)
-  /// CHECK:       <<Acc:j\d+>>         ParameterValue
-  /// CHECK:       <<Left:j\d+>>        ParameterValue
-  /// CHECK:       <<Right:j\d+>>       ParameterValue
-  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
-  /// CHECK:       <<Sub:j\d+>>         Sub [<<Acc>>,<<Mul>>]
-  /// CHECK:                            Return [<<Sub>>]
-
-  /// CHECK-START-ARM: long Main.$opt$noinline$mulSub(long, long, long) instruction_simplifier_arm (after)
-  /// CHECK:       <<Acc:j\d+>>         ParameterValue
-  /// CHECK:       <<Left:j\d+>>        ParameterValue
-  /// CHECK:       <<Right:j\d+>>       ParameterValue
-  /// CHECK:       <<MulSub:j\d+>>      MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Sub
-  /// CHECK:                            Return [<<MulSub>>]
-
-  /// CHECK-START-ARM: long Main.$opt$noinline$mulSub(long, long, long) instruction_simplifier_arm (after)
-  /// CHECK-NOT:                        Mul
-  /// CHECK-NOT:                        Sub
-
-  /// CHECK-START-ARM: long Main.$opt$noinline$mulSub(long, long, long) disassembly (after)
-  /// CHECK:                            mls x{{\d+}}, x{{\d+}}, x{{\d+}}, x{{\d+}}
-
   public static long $opt$noinline$mulSub(long acc, long left, long right) {
     if (doThrow) throw new Error();
     return acc - left * right;
@@ -161,28 +117,7 @@
   /// CHECK:                            Return [<<Or>>]
 
   /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm64 (after)
-  /// CHECK-NOT:                        MultiplyAccumulate
-
-  /// CHECK-START-ARM: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm (before)
-  /// CHECK:       <<Acc:i\d+>>         ParameterValue
-  /// CHECK:       <<Left:i\d+>>        ParameterValue
-  /// CHECK:       <<Right:i\d+>>       ParameterValue
-  /// CHECK:       <<Mul:i\d+>>         Mul [<<Left>>,<<Right>>]
-  /// CHECK:       <<Add:i\d+>>         Add [<<Acc>>,<<Mul>>]
-  /// CHECK:       <<Or:i\d+>>          Or [<<Mul>>,<<Add>>]
-  /// CHECK:                            Return [<<Or>>]
-
-  /// CHECK-START-ARM: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm (after)
-  /// CHECK:       <<Acc:i\d+>>         ParameterValue
-  /// CHECK:       <<Left:i\d+>>        ParameterValue
-  /// CHECK:       <<Right:i\d+>>       ParameterValue
-  /// CHECK:       <<Mul:i\d+>>         Mul [<<Left>>,<<Right>>]
-  /// CHECK:       <<Add:i\d+>>         Add [<<Acc>>,<<Mul>>]
-  /// CHECK:       <<Or:i\d+>>          Or [<<Mul>>,<<Add>>]
-  /// CHECK:                            Return [<<Or>>]
-
-  /// CHECK-START-ARM: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm (after)
-  /// CHECK-NOT:                        MultiplyAccumulate
+  /// CHECK-NOT:                        Arm64MultiplyAccumulate
 
   public static int $opt$noinline$multipleUses1(int acc, int left, int right) {
     if (doThrow) throw new Error();
@@ -216,30 +151,7 @@
   /// CHECK:                            Return [<<Res>>]
 
   /// CHECK-START-ARM64: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm64 (after)
-  /// CHECK-NOT:                        MultiplyAccumulate
-
-  /// CHECK-START-ARM: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm (before)
-  /// CHECK:       <<Acc:j\d+>>         ParameterValue
-  /// CHECK:       <<Left:j\d+>>        ParameterValue
-  /// CHECK:       <<Right:j\d+>>       ParameterValue
-  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
-  /// CHECK:       <<Add:j\d+>>         Add [<<Acc>>,<<Mul>>]
-  /// CHECK:       <<Sub:j\d+>>         Sub [<<Acc>>,<<Mul>>]
-  /// CHECK:       <<Res:j\d+>>         Add [<<Add>>,<<Sub>>]
-  /// CHECK:                            Return [<<Res>>]
-
-  /// CHECK-START-ARM: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm (after)
-  /// CHECK:       <<Acc:j\d+>>         ParameterValue
-  /// CHECK:       <<Left:j\d+>>        ParameterValue
-  /// CHECK:       <<Right:j\d+>>       ParameterValue
-  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
-  /// CHECK:       <<Add:j\d+>>         Add [<<Acc>>,<<Mul>>]
-  /// CHECK:       <<Sub:j\d+>>         Sub [<<Acc>>,<<Mul>>]
-  /// CHECK:       <<Res:j\d+>>         Add [<<Add>>,<<Sub>>]
-  /// CHECK:                            Return [<<Res>>]
-
-  /// CHECK-START-ARM: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm (after)
-  /// CHECK-NOT:                        MultiplyAccumulate
+  /// CHECK-NOT:                        Arm64MultiplyAccumulate
 
 
   public static long $opt$noinline$multipleUses2(long acc, long left, long right) {
@@ -264,7 +176,7 @@
   /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm64 (after)
   /// CHECK:       <<Acc:i\d+>>         ParameterValue
   /// CHECK:       <<Var:i\d+>>         ParameterValue
-  /// CHECK:       <<MulAdd:i\d+>>      MultiplyAccumulate [<<Acc>>,<<Acc>>,<<Var>>] kind:Add
+  /// CHECK:       <<MulAdd:i\d+>>      Arm64MultiplyAccumulate [<<Acc>>,<<Acc>>,<<Var>>] kind:Add
   /// CHECK:                            Return [<<MulAdd>>]
 
   /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm64 (after)
@@ -274,27 +186,6 @@
   /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) disassembly (after)
   /// CHECK:                            madd w{{\d+}}, w{{\d+}}, w{{\d+}}, w{{\d+}}
 
-  /// CHECK-START-ARM: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm (before)
-  /// CHECK:       <<Acc:i\d+>>         ParameterValue
-  /// CHECK:       <<Var:i\d+>>         ParameterValue
-  /// CHECK:       <<Const1:i\d+>>      IntConstant 1
-  /// CHECK:       <<Add:i\d+>>         Add [<<Var>>,<<Const1>>]
-  /// CHECK:       <<Mul:i\d+>>         Mul [<<Acc>>,<<Add>>]
-  /// CHECK:                            Return [<<Mul>>]
-
-  /// CHECK-START-ARM: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm (after)
-  /// CHECK:       <<Acc:i\d+>>         ParameterValue
-  /// CHECK:       <<Var:i\d+>>         ParameterValue
-  /// CHECK:       <<MulAdd:i\d+>>      MultiplyAccumulate [<<Acc>>,<<Acc>>,<<Var>>] kind:Add
-  /// CHECK:                            Return [<<MulAdd>>]
-
-  /// CHECK-START-ARM: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm (after)
-  /// CHECK-NOT:                        Mul
-  /// CHECK-NOT:                        Add
-
-  /// CHECK-START-ARM: int Main.$opt$noinline$mulPlusOne(int, int) disassembly (after)
-  /// CHECK:                            mla w{{\d+}}, w{{\d+}}, w{{\d+}}, w{{\d+}}
-
   public static int $opt$noinline$mulPlusOne(int acc, int var) {
     if (doThrow) throw new Error();
     return acc * (var + 1);
@@ -316,7 +207,7 @@
   /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm64 (after)
   /// CHECK:       <<Acc:j\d+>>         ParameterValue
   /// CHECK:       <<Var:j\d+>>         ParameterValue
-  /// CHECK:       <<MulSub:j\d+>>      MultiplyAccumulate [<<Acc>>,<<Acc>>,<<Var>>] kind:Sub
+  /// CHECK:       <<MulSub:j\d+>>      Arm64MultiplyAccumulate [<<Acc>>,<<Acc>>,<<Var>>] kind:Sub
   /// CHECK:                            Return [<<MulSub>>]
 
   /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm64 (after)
@@ -326,123 +217,11 @@
   /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) disassembly (after)
   /// CHECK:                            msub x{{\d+}}, x{{\d+}}, x{{\d+}}, x{{\d+}}
 
-  /// CHECK-START-ARM: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm (before)
-  /// CHECK:       <<Acc:j\d+>>         ParameterValue
-  /// CHECK:       <<Var:j\d+>>         ParameterValue
-  /// CHECK:       <<Const1:j\d+>>      LongConstant 1
-  /// CHECK:       <<Sub:j\d+>>         Sub [<<Const1>>,<<Var>>]
-  /// CHECK:       <<Mul:j\d+>>         Mul [<<Acc>>,<<Sub>>]
-  /// CHECK:                            Return [<<Mul>>]
-
-  /// CHECK-START-ARM: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm (after)
-  /// CHECK:       <<Acc:j\d+>>         ParameterValue
-  /// CHECK:       <<Var:j\d+>>         ParameterValue
-  /// CHECK:       <<MulSub:j\d+>>      MultiplyAccumulate [<<Acc>>,<<Acc>>,<<Var>>] kind:Sub
-  /// CHECK:                            Return [<<MulSub>>]
-
-  /// CHECK-START-ARM: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm (after)
-  /// CHECK-NOT:                        Mul
-  /// CHECK-NOT:                        Sub
-
-  /// CHECK-START-ARM: long Main.$opt$noinline$mulMinusOne(long, long) disassembly (after)
-  /// CHECK:                            mls x{{\d+}}, x{{\d+}}, x{{\d+}}, x{{\d+}}
-
   public static long $opt$noinline$mulMinusOne(long acc, long var) {
     if (doThrow) throw new Error();
     return acc * (1 - var);
   }
 
-  /**
-   * Test basic merging of `MUL+NEG` into `MULNEG`.
-   */
-
-  /// CHECK-START-ARM64: int Main.$opt$noinline$mulNeg(int, int) instruction_simplifier_arm64 (before)
-  /// CHECK:       <<Left:j\d+>>        ParameterValue
-  /// CHECK:       <<Right:j\d+>>       ParameterValue
-  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
-  /// CHECK:       <<Neg:j\d+>>         Neg [<<Mul>>]
-  /// CHECK:                            Return [<<Neg>>]
-
-  /// CHECK-START-ARM64: int Main.$opt$noinline$mulNeg(int, int) instruction_simplifier_arm64 (after)
-  /// CHECK:       <<Left:j\d+>>        ParameterValue
-  /// CHECK:       <<Right:j\d+>>       ParameterValue
-  /// CHECK:       <<MulNeg:j\d+>>      MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Sub
-  /// CHECK:                            Return [<<MulNeg>>]
-
-  /// CHECK-START-ARM64: int Main.$opt$noinline$mulNeg(int, int) instruction_simplifier_arm64 (after)
-  /// CHECK-NOT:                        Mul
-  /// CHECK-NOT:                        Neg
-
-  /// CHECK-START-ARM64: int Main.$opt$noinline$mulNeg(int, int) disassembly (after)
-  /// CHECK:                            mneg x{{\d+}}, x{{\d+}}, x{{\d+}}
-
-  /// CHECK-START-ARM: int Main.$opt$noinline$mulNeg(int, int) instruction_simplifier_arm (before)
-  /// CHECK:       <<Left:j\d+>>        ParameterValue
-  /// CHECK:       <<Right:j\d+>>       ParameterValue
-  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
-  /// CHECK:       <<Neg:j\d+>>         Neg [<<Mul>>,<<Mul>>]
-  /// CHECK:                            Return [<<Neg>>]
-
-  /// CHECK-START-ARM: int Main.$opt$noinline$mulNeg(int, int) instruction_simplifier_arm (after)
-  /// CHECK:       <<Left:j\d+>>        ParameterValue
-  /// CHECK:       <<Right:j\d+>>       ParameterValue
-  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
-  /// CHECK:       <<Neg:j\d+>>         Neg [<<Mul>>,<<Mul>>]
-  /// CHECK:                            Return [<<Neg>>]
-
-  /// CHECK-START-ARM: int Main.$opt$noinline$mulNeg(int, int) instruction_simplifier_arm (after)
-  /// CHECK-NOT:                        MultiplyAccumulate
-
-  public static int $opt$noinline$mulNeg(int left, int right) {
-    if (doThrow) throw new Error();
-    return - (left * right);
-  }
-
-  /**
-   * Test basic merging of `MUL+NEG` into `MULNEG`.
-   */
-
-  /// CHECK-START-ARM64: long Main.$opt$noinline$mulNeg(long, long) instruction_simplifier_arm64 (before)
-  /// CHECK:       <<Left:j\d+>>        ParameterValue
-  /// CHECK:       <<Right:j\d+>>       ParameterValue
-  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
-  /// CHECK:       <<Neg:j\d+>>         Neg [<<Mul>>]
-  /// CHECK:                            Return [<<Neg>>]
-
-  /// CHECK-START-ARM64: long Main.$opt$noinline$mulNeg(long, long) instruction_simplifier_arm64 (after)
-  /// CHECK:       <<Left:j\d+>>        ParameterValue
-  /// CHECK:       <<Right:j\d+>>       ParameterValue
-  /// CHECK:       <<MulNeg:j\d+>>      MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Sub
-  /// CHECK:                            Return [<<MulNeg>>]
-
-  /// CHECK-START-ARM64: long Main.$opt$noinline$mulNeg(long, long) instruction_simplifier_arm64 (after)
-  /// CHECK-NOT:                        Mul
-  /// CHECK-NOT:                        Neg
-
-  /// CHECK-START-ARM64: long Main.$opt$noinline$mulNeg(long, long) disassembly (after)
-  /// CHECK:                            mneg x{{\d+}}, x{{\d+}}, x{{\d+}}
-
-  /// CHECK-START-ARM: long Main.$opt$noinline$mulNeg(long, long) instruction_simplifier_arm (before)
-  /// CHECK:       <<Left:j\d+>>        ParameterValue
-  /// CHECK:       <<Right:j\d+>>       ParameterValue
-  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
-  /// CHECK:       <<Neg:j\d+>>         Neg [<<Mul>>,<<Mul>>]
-  /// CHECK:                            Return [<<Neg>>]
-
-  /// CHECK-START-ARM: long Main.$opt$noinline$mulNeg(long, long) instruction_simplifier_arm (after)
-  /// CHECK:       <<Left:j\d+>>        ParameterValue
-  /// CHECK:       <<Right:j\d+>>       ParameterValue
-  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
-  /// CHECK:       <<Neg:j\d+>>         Neg [<<Mul>>,<<Mul>>]
-  /// CHECK:                            Return [<<Neg>>]
-
-  /// CHECK-START-ARM: long Main.$opt$noinline$mulNeg(long, long) instruction_simplifier_arm (after)
-  /// CHECK-NOT:                        MultiplyAccumulate
-
-  public static long $opt$noinline$mulNeg(long left, long right) {
-    if (doThrow) throw new Error();
-    return - (left * right);
-  }
 
   public static void main(String[] args) {
     assertIntEquals(7, $opt$noinline$mulAdd(1, 2, 3));
@@ -451,7 +230,5 @@
     assertLongEquals(20, $opt$noinline$multipleUses2(10, 11, 12));
     assertIntEquals(195, $opt$noinline$mulPlusOne(13, 14));
     assertLongEquals(-225, $opt$noinline$mulMinusOne(15, 16));
-    assertIntEquals(-306, $opt$noinline$mulNeg(17, 18));
-    assertLongEquals(-380, $opt$noinline$mulNeg(19, 20));
   }
 }
diff --git a/test/564-checker-bitcount/expected.txt b/test/564-checker-bitcount/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/564-checker-bitcount/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/564-checker-bitcount/info.txt b/test/564-checker-bitcount/info.txt
new file mode 100644
index 0000000..57db66b
--- /dev/null
+++ b/test/564-checker-bitcount/info.txt
@@ -0,0 +1 @@
+Unit test for 32-bit and 64-bit bit count operation.
diff --git a/test/564-checker-bitcount/src/Main.java b/test/564-checker-bitcount/src/Main.java
new file mode 100644
index 0000000..b250145
--- /dev/null
+++ b/test/564-checker-bitcount/src/Main.java
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  // TODO: make this work when b/26700769 is done.
+  //
+  // CHECK-START-X86_64: int Main.bits32(int) disassembly (after)
+  // CHECK-DAG: popcnt
+  //
+  // CHECK-START-X86_64: int Main.bits32(int) disassembly (after)
+  // CHECK-NOT: call
+  private static int bits32(int x) {
+    return Integer.bitCount(x);
+  }
+
+  // TODO: make this work when b/26700769 is done.
+  //
+  // CHECK-START-X86_64: int Main.bits64(long) disassembly (after)
+  // CHECK-DAG: popcnt
+  //
+  // CHECK-START-X86_64: int Main.bits64(long) disassembly (after)
+  // CHECK-NOT: call
+  private static int bits64(long x) {
+    return Long.bitCount(x);
+  }
+
+  public static void main(String args[]) {
+    expectEquals32(bits32(0x00000000), 0);
+    expectEquals32(bits32(0x00000001), 1);
+    expectEquals32(bits32(0x10000000), 1);
+    expectEquals32(bits32(0x10000001), 2);
+    expectEquals32(bits32(0x00000003), 2);
+    expectEquals32(bits32(0x70000000), 3);
+    expectEquals32(bits32(0x000F0000), 4);
+    expectEquals32(bits32(0x00001111), 4);
+    expectEquals32(bits32(0x11110000), 4);
+    expectEquals32(bits32(0x11111111), 8);
+    expectEquals32(bits32(0x12345678), 13);
+    expectEquals32(bits32(0x9ABCDEF0), 19);
+    expectEquals32(bits32(0xFFFFFFFF), 32);
+
+    for (int i = 0; i < 32; i++) {
+      expectEquals32(bits32(1 << i), 1);
+    }
+
+    expectEquals64(bits64(0x0000000000000000L), 0);
+    expectEquals64(bits64(0x0000000000000001L), 1);
+    expectEquals64(bits64(0x1000000000000000L), 1);
+    expectEquals64(bits64(0x1000000000000001L), 2);
+    expectEquals64(bits64(0x0000000000000003L), 2);
+    expectEquals64(bits64(0x7000000000000000L), 3);
+    expectEquals64(bits64(0x000F000000000000L), 4);
+    expectEquals64(bits64(0x0000000011111111L), 8);
+    expectEquals64(bits64(0x1111111100000000L), 8);
+    expectEquals64(bits64(0x1111111111111111L), 16);
+    expectEquals64(bits64(0x123456789ABCDEF1L), 33);
+    expectEquals64(bits64(0xFFFFFFFFFFFFFFFFL), 64);
+
+    for (int i = 0; i < 64; i++) {
+      expectEquals64(bits64(1L << i), 1);
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals32(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+  private static void expectEquals64(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/564-checker-inline-loop/expected.txt b/test/564-checker-inline-loop/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/564-checker-inline-loop/expected.txt
diff --git a/test/564-checker-inline-loop/info.txt b/test/564-checker-inline-loop/info.txt
new file mode 100644
index 0000000..a590bc6
--- /dev/null
+++ b/test/564-checker-inline-loop/info.txt
@@ -0,0 +1 @@
+Tests inlining of loops in the optimizing compiler.
diff --git a/test/564-checker-inline-loop/src/Main.java b/test/564-checker-inline-loop/src/Main.java
new file mode 100644
index 0000000..6929913
--- /dev/null
+++ b/test/564-checker-inline-loop/src/Main.java
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: int Main.inlineLoop() inliner (before)
+  /// CHECK-DAG:     <<Invoke:i\d+>>  InvokeStaticOrDirect
+  /// CHECK-DAG:                      Return [<<Invoke>>]
+
+  /// CHECK-START: int Main.inlineLoop() inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+
+  /// CHECK-START: int Main.inlineLoop() inliner (after)
+  /// CHECK-DAG:     <<Constant:i\d+>>   IntConstant 42
+  /// CHECK-DAG:                         Return [<<Constant>>]
+
+  /// CHECK-START: int Main.inlineLoop() licm (after)
+  /// CHECK:                         Goto loop:{{B\d+}}
+
+  public static int inlineLoop() {
+    return loopMethod();
+  }
+
+  /// CHECK-START: void Main.inlineWithinLoop() inliner (before)
+  /// CHECK:      InvokeStaticOrDirect
+
+  /// CHECK-START: void Main.inlineWithinLoop() inliner (after)
+  /// CHECK-NOT:  InvokeStaticOrDirect
+
+  /// CHECK-START: void Main.inlineWithinLoop() licm (after)
+  /// CHECK-DAG:  Goto loop:<<OuterLoop:B\d+>> outer_loop:none
+  /// CHECK-DAG:  Goto outer_loop:<<OuterLoop>>
+
+  public static void inlineWithinLoop() {
+    while (doLoop) {
+      loopMethod();
+    }
+  }
+
+  public static int loopMethod() {
+    while (doLoop) {}
+    return 42;
+  }
+
+  public static boolean doLoop = false;
+
+  public static void main(String[] args) {
+    inlineLoop();
+    inlineWithinLoop();
+  }
+}
diff --git a/test/564-checker-irreducible-loop/expected.txt b/test/564-checker-irreducible-loop/expected.txt
new file mode 100644
index 0000000..d81cc07
--- /dev/null
+++ b/test/564-checker-irreducible-loop/expected.txt
@@ -0,0 +1 @@
+42
diff --git a/test/564-checker-irreducible-loop/info.txt b/test/564-checker-irreducible-loop/info.txt
new file mode 100644
index 0000000..1e0dd02
--- /dev/null
+++ b/test/564-checker-irreducible-loop/info.txt
@@ -0,0 +1,2 @@
+Regression test for optimizing in the presence of
+an irreducible loop.
diff --git a/test/564-checker-irreducible-loop/smali/IrreducibleLoop.smali b/test/564-checker-irreducible-loop/smali/IrreducibleLoop.smali
new file mode 100644
index 0000000..b82ed92
--- /dev/null
+++ b/test/564-checker-irreducible-loop/smali/IrreducibleLoop.smali
@@ -0,0 +1,61 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LIrreducibleLoop;
+
+.super Ljava/lang/Object;
+
+## CHECK-START-X86: int IrreducibleLoop.simpleLoop(int) dead_code_elimination (before)
+## CHECK-DAG: <<Method:(i|j)\d+>> CurrentMethod
+## CHECK-DAG: <<Constant:i\d+>>   IntConstant 42
+## CHECK-DAG:                     InvokeStaticOrDirect [<<Constant>>,<<Method>>] loop:{{B\d+}} irreducible:true
+## CHECK-DAG:                     InvokeStaticOrDirect [<<Constant>>,<<Method>>] loop:none
+.method public static simpleLoop(I)I
+   .registers 3
+   const/16 v0, 42
+   if-eqz p0, :loop_entry
+   goto :other_loop_pre_entry
+
+   # The then part: beginning of the irreducible loop.
+   :loop_entry
+   if-nez p0, :exit
+   invoke-static {v0},LIrreducibleLoop;->$noinline$m(I)V
+   :other_loop_entry
+   goto :loop_entry
+
+   # The else part: a block uses the ArtMethod and branches to
+   # a block that doesn't. The register allocator used to trip there, as the
+   # ArtMethod was a live_in of the last block before the loop, but did not have
+   # a location due to our liveness analysis.
+   :other_loop_pre_entry
+   if-eqz p0, :other_loop_entry
+   invoke-static {v0},LIrreducibleLoop;->$noinline$m(I)V
+   goto :other_loop_entry
+
+   :exit
+   return v0
+.end method
+
+.method public static $noinline$m(I)V
+   .registers 3
+   const/16 v0, 0
+   sget-boolean v1,LIrreducibleLoop;->doThrow:Z
+   if-eqz v1, :exit
+   # Prevent inlining.
+   throw v0
+   :exit
+   return-void
+.end method
+
+.field public static doThrow:Z
diff --git a/test/564-checker-irreducible-loop/src/Main.java b/test/564-checker-irreducible-loop/src/Main.java
new file mode 100644
index 0000000..94e3357
--- /dev/null
+++ b/test/564-checker-irreducible-loop/src/Main.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("IrreducibleLoop");
+    Method m = c.getMethod("simpleLoop", int.class);
+    Object[] arguments = { 42 };
+    System.out.println(m.invoke(null, arguments));
+  }
+}
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 36dd9f4..48e10c3 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -621,6 +621,11 @@
 
 TEST_ART_BROKEN_MULTI_IMAGE_RUN_TESTS :=
 
+# Test is flaky b/26733951.
+ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
+    $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \
+    $(PICTEST_TYPES),$(DEBUGGABLE_TYPES),961-default-iface-resolution-generated,$(ALL_ADDRESS_SIZES))
+
 # Clear variables ahead of appending to them when defining tests.
 $(foreach target, $(TARGET_TYPES), $(eval ART_RUN_TEST_$(call name-to-var,$(target))_RULES :=))
 $(foreach target, $(TARGET_TYPES), \