Merge "Run HeapLocationCollector once in scheduler instead of locally."
diff --git a/build/Android.cpplint.mk b/build/Android.cpplint.mk
index 2688c04..247f4e3 100644
--- a/build/Android.cpplint.mk
+++ b/build/Android.cpplint.mk
@@ -18,7 +18,9 @@
 
 ART_CPPLINT := $(LOCAL_PATH)/tools/cpplint.py
 ART_CPPLINT_FILTER := --filter=-whitespace/line_length,-build/include,-readability/function,-readability/streams,-readability/todo,-runtime/references,-runtime/sizeof,-runtime/threadsafe_fn,-runtime/printf
-ART_CPPLINT_FLAGS := --root=$(TOP)
+# Use `pwd` instead of $TOP for root, $TOP is always . and --root doesn't seem
+# to work with a relative path (b/34787652).
+ART_CPPLINT_FLAGS := --root=`pwd`
 ART_CPPLINT_QUIET := --quiet
 ART_CPPLINT_INGORED := \
     runtime/elf.h \
diff --git a/compiler/dex/quick_compiler_callbacks.cc b/compiler/dex/quick_compiler_callbacks.cc
index 872f7ea..c7e9f4f 100644
--- a/compiler/dex/quick_compiler_callbacks.cc
+++ b/compiler/dex/quick_compiler_callbacks.cc
@@ -16,6 +16,7 @@
 
 #include "quick_compiler_callbacks.h"
 
+#include "driver/compiler_driver.h"
 #include "verification_results.h"
 #include "verifier/method_verifier-inl.h"
 
@@ -33,4 +34,17 @@
   }
 }
 
+bool QuickCompilerCallbacks::CanAssumeVerified(ClassReference ref) {
+  // If we don't have class unloading enabled in the compiler, we will never see class that were
+  // previously verified. Return false to avoid overhead from the lookup in the compiler driver.
+  if (!does_class_unloading_) {
+    return false;
+  }
+  DCHECK(compiler_driver_ != nullptr);
+  // In the case of the quicken filter: avoiding verification of quickened instructions, which the
+  // verifier doesn't currently support.
+  // In the case of the verify filter, avoiding verifiying twice.
+  return compiler_driver_->CanAssumeVerified(ref);
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick_compiler_callbacks.h b/compiler/dex/quick_compiler_callbacks.h
index a3a6c09..578aff4 100644
--- a/compiler/dex/quick_compiler_callbacks.h
+++ b/compiler/dex/quick_compiler_callbacks.h
@@ -22,6 +22,7 @@
 
 namespace art {
 
+class CompilerDriver;
 class VerificationResults;
 
 class QuickCompilerCallbacks FINAL : public CompilerCallbacks {
@@ -53,8 +54,19 @@
       verification_results_ = verification_results;
     }
 
+    bool CanAssumeVerified(ClassReference ref) OVERRIDE;
+
+    void SetDoesClassUnloading(bool does_class_unloading, CompilerDriver* compiler_driver)
+        OVERRIDE {
+      does_class_unloading_ = does_class_unloading;
+      compiler_driver_ = compiler_driver;
+      DCHECK(!does_class_unloading || compiler_driver_ != nullptr);
+    }
+
   private:
     VerificationResults* verification_results_ = nullptr;
+    bool does_class_unloading_ = false;
+    CompilerDriver* compiler_driver_ = nullptr;
     std::unique_ptr<verifier::VerifierDeps> verifier_deps_;
 };
 
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 6e087c5..037e458 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -3040,4 +3040,10 @@
   }
 }
 
+bool CompilerDriver::CanAssumeVerified(ClassReference ref) const {
+  mirror::Class::Status existing = mirror::Class::kStatusNotReady;
+  compiled_classes_.Get(DexFileReference(ref.first, ref.second), &existing);
+  return existing >= mirror::Class::kStatusVerified;
+}
+
 }  // namespace art
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index d9886a2..ba4581c 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -377,6 +377,8 @@
     return profile_compilation_info_;
   }
 
+  bool CanAssumeVerified(ClassReference ref) const;
+
  private:
   void PreCompile(jobject class_loader,
                   const std::vector<const DexFile*>& dex_files,
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 4da3e0d..392d57c 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -23,6 +23,7 @@
 #include "art_method-inl.h"
 #include "class_linker-inl.h"
 #include "common_compiler_test.h"
+#include "compiler_callbacks.h"
 #include "dex_file.h"
 #include "dex_file_types.h"
 #include "gc/heap.h"
@@ -368,7 +369,9 @@
 
 // Test that a class of status kStatusRetryVerificationAtRuntime is indeed recorded that way in the
 // driver.
-TEST_F(CompilerDriverVerifyTest, RetryVerifcationStatus) {
+// Test that checks that classes can be assumed as verified if unloading mode is enabled and
+// the class status is at least verified.
+TEST_F(CompilerDriverVerifyTest, RetryVerifcationStatusCheckVerified) {
   Thread* const self = Thread::Current();
   jobject class_loader;
   std::vector<const DexFile*> dex_files;
@@ -382,6 +385,7 @@
     dex_file = dex_files.front();
   }
   compiler_driver_->SetDexFilesForOatFile(dex_files);
+  callbacks_->SetDoesClassUnloading(true, compiler_driver_.get());
   ClassReference ref(dex_file, 0u);
   // Test that the status is read from the compiler driver as expected.
   for (size_t i = mirror::Class::kStatusRetryVerificationAtRuntime;
@@ -397,6 +401,12 @@
     mirror::Class::Status status = {};
     ASSERT_TRUE(compiler_driver_->GetCompiledClass(ref, &status));
     EXPECT_EQ(status, expected_status);
+
+    // Check that we can assume verified if we are a status that is at least verified.
+    if (status >= mirror::Class::kStatusVerified) {
+      // Check that the class can be assumed as verified in the compiler driver.
+      EXPECT_TRUE(callbacks_->CanAssumeVerified(ref)) << status;
+    }
   }
 }
 
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index c166deb..2f96cfa 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -1121,6 +1121,66 @@
     }
   }
 
+  void VisitRem(HRem* instruction) OVERRIDE {
+    HInstruction* left = instruction->GetLeft();
+    HInstruction* right = instruction->GetRight();
+
+    // Handle 'i % CONST' format expression in array index, e.g:
+    //   array[i % 20];
+    if (right->IsIntConstant()) {
+      int32_t right_const = std::abs(right->AsIntConstant()->GetValue());
+      if (right_const == 0) {
+        return;
+      }
+      // The sign of divisor CONST doesn't affect the sign final value range.
+      // For example:
+      // if (i > 0) {
+      //   array[i % 10];  // index value range [0, 9]
+      //   array[i % -10]; // index value range [0, 9]
+      // }
+      ValueRange* right_range = new (GetGraph()->GetArena()) ValueRange(
+          GetGraph()->GetArena(),
+          ValueBound(nullptr, 1 - right_const),
+          ValueBound(nullptr, right_const - 1));
+
+      ValueRange* left_range = LookupValueRange(left, left->GetBlock());
+      if (left_range != nullptr) {
+        right_range = left_range->Narrow(right_range);
+      }
+      AssignRange(instruction->GetBlock(), instruction, right_range);
+      return;
+    }
+
+    // Handle following pattern:
+    // i0 NullCheck
+    // i1 ArrayLength[i0]
+    // i2 DivByZeroCheck [i1]  <-- right
+    // i3 Rem [i5, i2]         <-- we are here.
+    // i4 BoundsCheck [i3,i1]
+    if (right->IsDivZeroCheck()) {
+      // if array_length can pass div-by-zero check,
+      // array_length must be > 0.
+      right = right->AsDivZeroCheck()->InputAt(0);
+    }
+
+    // Handle 'i % array.length' format expression in array index, e.g:
+    //   array[(i+7) % array.length];
+    if (right->IsArrayLength()) {
+      ValueBound lower = ValueBound::Min();  // ideally, lower should be '1-array_length'.
+      ValueBound upper = ValueBound(right, -1);  // array_length - 1
+      ValueRange* right_range = new (GetGraph()->GetArena()) ValueRange(
+          GetGraph()->GetArena(),
+          lower,
+          upper);
+      ValueRange* left_range = LookupValueRange(left, left->GetBlock());
+      if (left_range != nullptr) {
+        right_range = left_range->Narrow(right_range);
+      }
+      AssignRange(instruction->GetBlock(), instruction, right_range);
+      return;
+    }
+  }
+
   void VisitNewArray(HNewArray* new_array) OVERRIDE {
     HInstruction* len = new_array->GetLength();
     if (!len->IsIntConstant()) {
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index 575e2fc..2aaf058 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -951,4 +951,152 @@
   ASSERT_TRUE(IsRemoved(bounds_check6));
 }
 
+// int[] array = new int[10];
+// for (int i=0; i<200; i++) {
+//   array[i%10] = 10;            // Can eliminate
+//   array[i%1] = 10;             // Can eliminate
+//   array[i%200] = 10;           // Cannot eliminate
+//   array[i%-10] = 10;           // Can eliminate
+//   array[i%array.length] = 10;  // Can eliminate
+//   array[param_i%10] = 10;      // Can't eliminate, when param_i < 0
+// }
+TEST_F(BoundsCheckEliminationTest, ModArrayBoundsElimination) {
+  HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(entry);
+  graph_->SetEntryBlock(entry);
+  HInstruction* param_i = new (&allocator_)
+      HParameterValue(graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimInt);
+  entry->AddInstruction(param_i);
+
+  HInstruction* constant_0 = graph_->GetIntConstant(0);
+  HInstruction* constant_1 = graph_->GetIntConstant(1);
+  HInstruction* constant_10 = graph_->GetIntConstant(10);
+  HInstruction* constant_200 = graph_->GetIntConstant(200);
+  HInstruction* constant_minus_10 = graph_->GetIntConstant(-10);
+
+  HBasicBlock* block = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(block);
+  entry->AddSuccessor(block);
+  // We pass a bogus constant for the class to avoid mocking one.
+  HInstruction* new_array = new (&allocator_) HNewArray(constant_10, constant_10, 0);
+  block->AddInstruction(new_array);
+  block->AddInstruction(new (&allocator_) HGoto());
+
+  HBasicBlock* loop_header = new (&allocator_) HBasicBlock(graph_);
+  HBasicBlock* loop_body = new (&allocator_) HBasicBlock(graph_);
+  HBasicBlock* exit = new (&allocator_) HBasicBlock(graph_);
+
+  graph_->AddBlock(loop_header);
+  graph_->AddBlock(loop_body);
+  graph_->AddBlock(exit);
+  block->AddSuccessor(loop_header);
+  loop_header->AddSuccessor(exit);       // true successor
+  loop_header->AddSuccessor(loop_body);  // false successor
+  loop_body->AddSuccessor(loop_header);
+
+  HPhi* phi = new (&allocator_) HPhi(&allocator_, 0, 0, Primitive::kPrimInt);
+  HInstruction* cmp = new (&allocator_) HGreaterThanOrEqual(phi, constant_200);
+  HInstruction* if_inst = new (&allocator_) HIf(cmp);
+  loop_header->AddPhi(phi);
+  loop_header->AddInstruction(cmp);
+  loop_header->AddInstruction(if_inst);
+  phi->AddInput(constant_0);
+
+  //////////////////////////////////////////////////////////////////////////////////
+  // LOOP BODY:
+  // array[i % 10] = 10;
+  HRem* i_mod_10 = new (&allocator_) HRem(Primitive::kPrimInt, phi, constant_10, 0);
+  HBoundsCheck* bounds_check_i_mod_10 = new (&allocator_) HBoundsCheck(i_mod_10, constant_10, 0);
+  HInstruction* array_set = new (&allocator_) HArraySet(
+      new_array, bounds_check_i_mod_10, constant_10, Primitive::kPrimInt, 0);
+  loop_body->AddInstruction(i_mod_10);
+  loop_body->AddInstruction(bounds_check_i_mod_10);
+  loop_body->AddInstruction(array_set);
+
+  // array[i % 1] = 10;
+  HRem* i_mod_1 = new (&allocator_) HRem(Primitive::kPrimInt, phi, constant_1, 0);
+  HBoundsCheck* bounds_check_i_mod_1 = new (&allocator_) HBoundsCheck(i_mod_1, constant_10, 0);
+  array_set = new (&allocator_) HArraySet(
+      new_array, bounds_check_i_mod_1, constant_10, Primitive::kPrimInt, 0);
+  loop_body->AddInstruction(i_mod_1);
+  loop_body->AddInstruction(bounds_check_i_mod_1);
+  loop_body->AddInstruction(array_set);
+
+  // array[i % 200] = 10;
+  HRem* i_mod_200 = new (&allocator_) HRem(Primitive::kPrimInt, phi, constant_1, 0);
+  HBoundsCheck* bounds_check_i_mod_200 = new (&allocator_) HBoundsCheck(i_mod_200, constant_10, 0);
+  array_set = new (&allocator_) HArraySet(
+      new_array, bounds_check_i_mod_200, constant_10, Primitive::kPrimInt, 0);
+  loop_body->AddInstruction(i_mod_200);
+  loop_body->AddInstruction(bounds_check_i_mod_200);
+  loop_body->AddInstruction(array_set);
+
+  // array[i % -10] = 10;
+  HRem* i_mod_minus_10 = new (&allocator_) HRem(Primitive::kPrimInt, phi, constant_minus_10, 0);
+  HBoundsCheck* bounds_check_i_mod_minus_10 = new (&allocator_) HBoundsCheck(
+      i_mod_minus_10, constant_10, 0);
+  array_set = new (&allocator_) HArraySet(
+      new_array, bounds_check_i_mod_minus_10, constant_10, Primitive::kPrimInt, 0);
+  loop_body->AddInstruction(i_mod_minus_10);
+  loop_body->AddInstruction(bounds_check_i_mod_minus_10);
+  loop_body->AddInstruction(array_set);
+
+  // array[i%array.length] = 10;
+  HNullCheck* null_check = new (&allocator_) HNullCheck(new_array, 0);
+  HArrayLength* array_length = new (&allocator_) HArrayLength(null_check, 0);
+  HRem* i_mod_array_length = new (&allocator_) HRem(Primitive::kPrimInt, phi, array_length, 0);
+  HBoundsCheck* bounds_check_i_mod_array_len = new (&allocator_) HBoundsCheck(
+      i_mod_array_length, array_length, 0);
+  array_set = new (&allocator_) HArraySet(
+      null_check, bounds_check_i_mod_array_len, constant_10, Primitive::kPrimInt, 0);
+  loop_body->AddInstruction(null_check);
+  loop_body->AddInstruction(array_length);
+  loop_body->AddInstruction(i_mod_array_length);
+  loop_body->AddInstruction(bounds_check_i_mod_array_len);
+  loop_body->AddInstruction(array_set);
+
+  // array[param_i % 10] = 10;
+  HRem* param_i_mod_10 = new (&allocator_) HRem(Primitive::kPrimInt, param_i, constant_10, 0);
+  HBoundsCheck* bounds_check_param_i_mod_10 = new (&allocator_) HBoundsCheck(
+      param_i_mod_10, constant_10, 0);
+  array_set = new (&allocator_) HArraySet(
+      new_array, bounds_check_param_i_mod_10, constant_10, Primitive::kPrimInt, 0);
+  loop_body->AddInstruction(param_i_mod_10);
+  loop_body->AddInstruction(bounds_check_param_i_mod_10);
+  loop_body->AddInstruction(array_set);
+
+  // array[param_i%array.length] = 10;
+  null_check = new (&allocator_) HNullCheck(new_array, 0);
+  array_length = new (&allocator_) HArrayLength(null_check, 0);
+  HRem* param_i_mod_array_length = new (&allocator_) HRem(
+      Primitive::kPrimInt, param_i, array_length, 0);
+  HBoundsCheck* bounds_check_param_i_mod_array_len = new (&allocator_) HBoundsCheck(
+      param_i_mod_array_length, array_length, 0);
+  array_set = new (&allocator_) HArraySet(
+      null_check, bounds_check_param_i_mod_array_len, constant_10, Primitive::kPrimInt, 0);
+  loop_body->AddInstruction(null_check);
+  loop_body->AddInstruction(array_length);
+  loop_body->AddInstruction(param_i_mod_array_length);
+  loop_body->AddInstruction(bounds_check_param_i_mod_array_len);
+  loop_body->AddInstruction(array_set);
+
+  // i++;
+  HInstruction* add = new (&allocator_) HAdd(Primitive::kPrimInt, phi, constant_1);
+  loop_body->AddInstruction(add);
+  loop_body->AddInstruction(new (&allocator_) HGoto());
+  phi->AddInput(add);
+  //////////////////////////////////////////////////////////////////////////////////
+
+  exit->AddInstruction(new (&allocator_) HExit());
+
+  RunBCE();
+
+  ASSERT_TRUE(IsRemoved(bounds_check_i_mod_10));
+  ASSERT_TRUE(IsRemoved(bounds_check_i_mod_1));
+  ASSERT_TRUE(IsRemoved(bounds_check_i_mod_200));
+  ASSERT_TRUE(IsRemoved(bounds_check_i_mod_minus_10));
+  ASSERT_TRUE(IsRemoved(bounds_check_i_mod_array_len));
+  ASSERT_FALSE(IsRemoved(bounds_check_param_i_mod_10));
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index ab1772b..0b980f5 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -151,6 +151,16 @@
   }
 
   /**
+   * Checks if the given phi instruction has been classified as anything by
+   * induction variable analysis. Returns false for anything that cannot be
+   * classified statically, such as reductions or other complex cycles.
+   */
+  bool IsClassified(HPhi* phi) const {
+    HLoopInformation* lp = phi->GetBlock()->GetLoopInformation();  // closest enveloping loop
+    return (lp != nullptr) && (induction_analysis_->LookupInfo(lp, phi) != nullptr);
+  }
+
+  /**
    * Checks if header logic of a loop terminates. Sets trip-count tc if known.
    */
   bool IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) const;
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 0ef7dcd..4143462 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -256,6 +256,35 @@
   return false;
 }
 
+// Detect reductions of the following forms,
+// under assumption phi has only *one* use:
+//   x = x_phi + ..
+//   x = x_phi - ..
+//   x = max(x_phi, ..)
+//   x = min(x_phi, ..)
+static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) {
+  if (reduction->IsAdd()) {
+    return reduction->InputAt(0) == phi || reduction->InputAt(1) == phi;
+  } else if (reduction->IsSub()) {
+    return reduction->InputAt(0) == phi;
+  } else if (reduction->IsInvokeStaticOrDirect()) {
+    switch (reduction->AsInvokeStaticOrDirect()->GetIntrinsic()) {
+      case Intrinsics::kMathMinIntInt:
+      case Intrinsics::kMathMinLongLong:
+      case Intrinsics::kMathMinFloatFloat:
+      case Intrinsics::kMathMinDoubleDouble:
+      case Intrinsics::kMathMaxIntInt:
+      case Intrinsics::kMathMaxLongLong:
+      case Intrinsics::kMathMaxFloatFloat:
+      case Intrinsics::kMathMaxDoubleDouble:
+        return reduction->InputAt(0) == phi || reduction->InputAt(1) == phi;
+      default:
+        return false;
+    }
+  }
+  return false;
+}
+
 // Test vector restrictions.
 static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) {
   return (restrictions & tested) != 0;
@@ -280,12 +309,11 @@
       return false;
     }
   }
-
   return true;
 }
 
 //
-// Class methods.
+// Public methods.
 //
 
 HLoopOptimization::HLoopOptimization(HGraph* graph,
@@ -299,7 +327,7 @@
       top_loop_(nullptr),
       last_loop_(nullptr),
       iset_(nullptr),
-      induction_simplication_count_(0),
+      reductions_(nullptr),
       simplified_(false),
       vector_length_(0),
       vector_refs_(nullptr),
@@ -333,6 +361,10 @@
   last_loop_ = top_loop_ = nullptr;
 }
 
+//
+// Loop setup and traversal.
+//
+
 void HLoopOptimization::LocalRun() {
   // Build the linear order using the phase-local allocator. This step enables building
   // a loop hierarchy that properly reflects the outer-inner and previous-next relation.
@@ -351,17 +383,21 @@
   // should use the global allocator.
   if (top_loop_ != nullptr) {
     ArenaSet<HInstruction*> iset(loop_allocator_->Adapter(kArenaAllocLoopOptimization));
+    ArenaSafeMap<HInstruction*, HInstruction*> reds(
+        std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
     ArenaSet<ArrayReference> refs(loop_allocator_->Adapter(kArenaAllocLoopOptimization));
     ArenaSafeMap<HInstruction*, HInstruction*> map(
         std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
     // Attach.
     iset_ = &iset;
+    reductions_ = &reds;
     vector_refs_ = &refs;
     vector_map_ = &map;
     // Traverse.
     TraverseLoopsInnerToOuter(top_loop_);
     // Detach.
     iset_ = nullptr;
+    reductions_ = nullptr;
     vector_refs_ = nullptr;
     vector_map_ = nullptr;
   }
@@ -414,16 +450,12 @@
   }
 }
 
-void HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) {
+bool HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) {
+  bool changed = false;
   for ( ; node != nullptr; node = node->next) {
-    // Visit inner loops first.
-    uint32_t current_induction_simplification_count = induction_simplication_count_;
-    if (node->inner != nullptr) {
-      TraverseLoopsInnerToOuter(node->inner);
-    }
-    // Recompute induction information of this loop if the induction
-    // of any inner loop has been simplified.
-    if (current_induction_simplification_count != induction_simplication_count_) {
+    // Visit inner loops first. Recompute induction information for this
+    // loop if the induction of any inner loop has changed.
+    if (TraverseLoopsInnerToOuter(node->inner)) {
       induction_range_.ReVisit(node->loop_info);
     }
     // Repeat simplifications in the loop-body until no more changes occur.
@@ -433,12 +465,14 @@
       simplified_ = false;
       SimplifyInduction(node);
       SimplifyBlocks(node);
+      changed = simplified_ || changed;
     } while (simplified_);
     // Optimize inner loop.
     if (node->inner == nullptr) {
-      OptimizeInnerLoop(node);
+      changed = OptimizeInnerLoop(node) || changed;
     }
   }
+  return changed;
 }
 
 //
@@ -455,21 +489,14 @@
   //           for (int i = 0; i < 10; i++, k++) { .... no k .... } return k;
   for (HInstructionIterator it(header->GetPhis()); !it.Done(); it.Advance()) {
     HPhi* phi = it.Current()->AsPhi();
-    iset_->clear();  // prepare phi induction
     if (TrySetPhiInduction(phi, /*restrict_uses*/ true) &&
+        CanRemoveCycle() &&
         TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ false)) {
-      // Note that it's ok to have replaced uses after the loop with the last value, without
-      // being able to remove the cycle. Environment uses (which are the reason we may not be
-      // able to remove the cycle) within the loop will still hold the right value.
-      if (CanRemoveCycle()) {
-        for (HInstruction* i : *iset_) {
-          RemoveFromCycle(i);
-        }
-
-        // Check that there are no records of the deleted instructions.
-        DCHECK(CheckInductionSetFullyRemoved(iset_));
-        simplified_ = true;
+      simplified_ = true;
+      for (HInstruction* i : *iset_) {
+        RemoveFromCycle(i);
       }
+      DCHECK(CheckInductionSetFullyRemoved(iset_));
     }
   }
 }
@@ -511,21 +538,20 @@
   }
 }
 
-void HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
+bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
   HBasicBlock* header = node->loop_info->GetHeader();
   HBasicBlock* preheader = node->loop_info->GetPreHeader();
   // Ensure loop header logic is finite.
   int64_t trip_count = 0;
   if (!induction_range_.IsFinite(node->loop_info, &trip_count)) {
-    return;
+    return false;
   }
-
   // Ensure there is only a single loop-body (besides the header).
   HBasicBlock* body = nullptr;
   for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) {
     if (it.Current() != header) {
       if (body != nullptr) {
-        return;
+        return false;
       }
       body = it.Current();
     }
@@ -533,27 +559,27 @@
   CHECK(body != nullptr);
   // Ensure there is only a single exit point.
   if (header->GetSuccessors().size() != 2) {
-    return;
+    return false;
   }
   HBasicBlock* exit = (header->GetSuccessors()[0] == body)
       ? header->GetSuccessors()[1]
       : header->GetSuccessors()[0];
   // Ensure exit can only be reached by exiting loop.
   if (exit->GetPredecessors().size() != 1) {
-    return;
+    return false;
   }
   // Detect either an empty loop (no side effects other than plain iteration) or
   // a trivial loop (just iterating once). Replace subsequent index uses, if any,
   // with the last value and remove the loop, possibly after unrolling its body.
-  HInstruction* phi = header->GetFirstPhi();
-  iset_->clear();  // prepare phi induction
-  if (TrySetSimpleLoopHeader(header)) {
+  HPhi* main_phi = nullptr;
+  if (TrySetSimpleLoopHeader(header, &main_phi)) {
     bool is_empty = IsEmptyBody(body);
-    if ((is_empty || trip_count == 1) &&
-        TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ true)) {
+    if (reductions_->empty() &&  // TODO: possible with some effort
+        (is_empty || trip_count == 1) &&
+        TryAssignLastValue(node->loop_info, main_phi, preheader, /*collect_loop_uses*/ true)) {
       if (!is_empty) {
         // Unroll the loop-body, which sees initial value of the index.
-        phi->ReplaceWith(phi->InputAt(0));
+        main_phi->ReplaceWith(main_phi->InputAt(0));
         preheader->MergeInstructionsWith(body);
       }
       body->DisconnectAndDelete();
@@ -566,21 +592,20 @@
       preheader->AddDominatedBlock(exit);
       exit->SetDominator(preheader);
       RemoveLoop(node);  // update hierarchy
-      return;
+      return true;
     }
   }
-
   // Vectorize loop, if possible and valid.
-  if (kEnableVectorization) {
-    iset_->clear();  // prepare phi induction
-    if (TrySetSimpleLoopHeader(header) &&
-        ShouldVectorize(node, body, trip_count) &&
-        TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ true)) {
-      Vectorize(node, body, exit, trip_count);
-      graph_->SetHasSIMD(true);  // flag SIMD usage
-      return;
-    }
+  if (kEnableVectorization &&
+      TrySetSimpleLoopHeader(header, &main_phi) &&
+      reductions_->empty() &&  // TODO: possible with some effort
+      ShouldVectorize(node, body, trip_count) &&
+      TryAssignLastValue(node->loop_info, main_phi, preheader, /*collect_loop_uses*/ true)) {
+    Vectorize(node, body, exit, trip_count);
+    graph_->SetHasSIMD(true);  // flag SIMD usage
+    return true;
   }
+  return false;
 }
 
 //
@@ -621,6 +646,8 @@
   // aliased, as well as the property that references either point to the same
   // array or to two completely disjoint arrays, i.e., no partial aliasing.
   // Other than a few simply heuristics, no detailed subscript analysis is done.
+  // The scan over references also finds a suitable dynamic loop peeling candidate.
+  const ArrayReference* candidate = nullptr;
   for (auto i = vector_refs_->begin(); i != vector_refs_->end(); ++i) {
     for (auto j = i; ++j != vector_refs_->end(); ) {
       if (i->type == j->type && (i->lhs || j->lhs)) {
@@ -656,7 +683,7 @@
   }
 
   // Consider dynamic loop peeling for alignment.
-  SetPeelingCandidate(trip_count);
+  SetPeelingCandidate(candidate, trip_count);
 
   // Success!
   return true;
@@ -679,14 +706,15 @@
   bool needs_cleanup = trip_count == 0 || (trip_count % chunk) != 0;
 
   // Adjust vector bookkeeping.
-  iset_->clear();  // prepare phi induction
-  bool is_simple_loop_header = TrySetSimpleLoopHeader(header);  // fills iset_
+  HPhi* main_phi = nullptr;
+  bool is_simple_loop_header = TrySetSimpleLoopHeader(header, &main_phi);  // refills sets
   DCHECK(is_simple_loop_header);
   vector_header_ = header;
   vector_body_ = block;
 
-  // Generate dynamic loop peeling trip count, if needed:
-  // ptc = <peeling-needed-for-candidate>
+  // Generate dynamic loop peeling trip count, if needed, under the assumption
+  // that the Android runtime guarantees at least "component size" alignment:
+  // ptc = (ALIGN - (&a[initial] % ALIGN)) / type-size
   HInstruction* ptc = nullptr;
   if (vector_peeling_candidate_ != nullptr) {
     DCHECK_LT(vector_length_, trip_count) << "dynamic peeling currently requires known trip count";
@@ -775,6 +803,7 @@
   while (!header->GetFirstInstruction()->IsGoto()) {
     header->RemoveInstruction(header->GetFirstInstruction());
   }
+
   // Update loop hierarchy: the old header now resides in the same outer loop
   // as the old preheader. Note that we don't bother putting sequential
   // loops back in the hierarchy at this point.
@@ -841,13 +870,12 @@
     vector_index_ = new (global_allocator_) HAdd(induc_type, vector_index_, step);
     Insert(vector_body_, vector_index_);
   }
-  // Finalize phi for the loop index.
+  // Finalize phi inputs for the loop index.
   phi->AddInput(lo);
   phi->AddInput(vector_index_);
   vector_index_ = phi;
 }
 
-// TODO: accept reductions at left-hand-side, mixed-type store idioms, etc.
 bool HLoopOptimization::VectorizeDef(LoopNode* node,
                                      HInstruction* instruction,
                                      bool generate_code) {
@@ -1118,7 +1146,7 @@
     case kArm:
     case kThumb2:
       // Allow vectorization for all ARM devices, because Android assumes that
-      // ARM 32-bit always supports advanced SIMD.
+      // ARM 32-bit always supports advanced SIMD (64-bit SIMD).
       switch (type) {
         case Primitive::kPrimBoolean:
         case Primitive::kPrimByte:
@@ -1137,7 +1165,7 @@
       return false;
     case kArm64:
       // Allow vectorization for all ARM devices, because Android assumes that
-      // ARMv8 AArch64 always supports advanced SIMD.
+      // ARMv8 AArch64 always supports advanced SIMD (128-bit SIMD).
       switch (type) {
         case Primitive::kPrimBoolean:
         case Primitive::kPrimByte:
@@ -1162,7 +1190,7 @@
       }
     case kX86:
     case kX86_64:
-      // Allow vectorization for SSE4-enabled X86 devices only (128-bit vectors).
+      // Allow vectorization for SSE4.1-enabled X86 devices only (128-bit SIMD).
       if (features->AsX86InstructionSetFeatures()->HasSSE4_1()) {
         switch (type) {
           case Primitive::kPrimBoolean:
@@ -1310,8 +1338,6 @@
           global_allocator_, base, opa, type, vector_length_, is_string_char_at);
     }
     // Known dynamically enforced alignment?
-    // TODO: detect offset + constant differences.
-    // TODO: long run, static alignment analysis?
     if (vector_peeling_candidate_ != nullptr &&
         vector_peeling_candidate_->base == base &&
         vector_peeling_candidate_->offset == offset) {
@@ -1586,9 +1612,11 @@
   return true;
 }
 
-void HLoopOptimization::SetPeelingCandidate(int64_t trip_count ATTRIBUTE_UNUSED) {
+void HLoopOptimization::SetPeelingCandidate(const ArrayReference* candidate,
+                                            int64_t trip_count ATTRIBUTE_UNUSED) {
   // Current heuristic: none.
   // TODO: implement
+  vector_peeling_candidate_ = candidate;
 }
 
 uint32_t HLoopOptimization::GetUnrollingFactor(HBasicBlock* block, int64_t trip_count) {
@@ -1616,13 +1644,17 @@
 //
 
 bool HLoopOptimization::TrySetPhiInduction(HPhi* phi, bool restrict_uses) {
+  // Start with empty phi induction.
+  iset_->clear();
+
   // Special case Phis that have equivalent in a debuggable setup. Our graph checker isn't
   // smart enough to follow strongly connected components (and it's probably not worth
   // it to make it so). See b/33775412.
   if (graph_->IsDebuggable() && phi->HasEquivalentPhi()) {
     return false;
   }
-  DCHECK(iset_->empty());
+
+  // Lookup phi induction cycle.
   ArenaSet<HInstruction*>* set = induction_range_.LookupCycle(phi);
   if (set != nullptr) {
     for (HInstruction* i : *set) {
@@ -1634,6 +1666,7 @@
       } else if (!i->IsRemovable()) {
         return false;
       } else if (i != phi && restrict_uses) {
+        // Deal with regular uses.
         for (const HUseListNode<HInstruction*>& use : i->GetUses()) {
           if (set->find(use.GetUser()) == set->end()) {
             return false;
@@ -1647,17 +1680,65 @@
   return false;
 }
 
-// Find: phi: Phi(init, addsub)
-//       s:   SuspendCheck
-//       c:   Condition(phi, bound)
-//       i:   If(c)
-// TODO: Find a less pattern matching approach?
-bool HLoopOptimization::TrySetSimpleLoopHeader(HBasicBlock* block) {
+bool HLoopOptimization::TrySetPhiReduction(HPhi* phi) {
   DCHECK(iset_->empty());
-  HInstruction* phi = block->GetFirstPhi();
-  if (phi != nullptr &&
-      phi->GetNext() == nullptr &&
-      TrySetPhiInduction(phi->AsPhi(), /*restrict_uses*/ false)) {
+  // Only unclassified phi cycles are candidates for reductions.
+  if (induction_range_.IsClassified(phi)) {
+    return false;
+  }
+  // Accept operations like x = x + .., provided that the phi and the reduction are
+  // used exactly once inside the loop, and by each other.
+  HInputsRef inputs = phi->GetInputs();
+  if (inputs.size() == 2) {
+    HInstruction* reduction = inputs[1];
+    if (HasReductionFormat(reduction, phi)) {
+      HLoopInformation* loop_info = phi->GetBlock()->GetLoopInformation();
+      int32_t use_count = 0;
+      bool single_use_inside_loop =
+          // Reduction update only used by phi.
+          reduction->GetUses().HasExactlyOneElement() &&
+          !reduction->HasEnvironmentUses() &&
+          // Reduction update is only use of phi inside the loop.
+          IsOnlyUsedAfterLoop(loop_info, phi, /*collect_loop_uses*/ true, &use_count) &&
+          iset_->size() == 1;
+      iset_->clear();  // leave the way you found it
+      if (single_use_inside_loop) {
+        // Link reduction back, and start recording feed value.
+        reductions_->Put(reduction, phi);
+        reductions_->Put(phi, phi->InputAt(0));
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+bool HLoopOptimization::TrySetSimpleLoopHeader(HBasicBlock* block, /*out*/ HPhi** main_phi) {
+  // Start with empty phi induction and reductions.
+  iset_->clear();
+  reductions_->clear();
+
+  // Scan the phis to find the following (the induction structure has already
+  // been optimized, so we don't need to worry about trivial cases):
+  // (1) optional reductions in loop,
+  // (2) the main induction, used in loop control.
+  HPhi* phi = nullptr;
+  for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+    if (TrySetPhiReduction(it.Current()->AsPhi())) {
+      continue;
+    } else if (phi == nullptr) {
+      // Found the first candidate for main induction.
+      phi = it.Current()->AsPhi();
+    } else {
+      return false;
+    }
+  }
+
+  // Then test for a typical loopheader:
+  //   s:  SuspendCheck
+  //   c:  Condition(phi, bound)
+  //   i:  If(c)
+  if (phi != nullptr && TrySetPhiInduction(phi, /*restrict_uses*/ false)) {
     HInstruction* s = block->GetFirstInstruction();
     if (s != nullptr && s->IsSuspendCheck()) {
       HInstruction* c = s->GetNext();
@@ -1669,6 +1750,7 @@
         if (i != nullptr && i->IsIf() && i->InputAt(0) == c) {
           iset_->insert(c);
           iset_->insert(s);
+          *main_phi = phi;
           return true;
         }
       }
@@ -1692,6 +1774,7 @@
 
 bool HLoopOptimization::IsUsedOutsideLoop(HLoopInformation* loop_info,
                                           HInstruction* instruction) {
+  // Deal with regular uses.
   for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
     if (use.GetUser()->GetBlock()->GetLoopInformation() != loop_info) {
       return true;
@@ -1704,6 +1787,7 @@
                                             HInstruction* instruction,
                                             bool collect_loop_uses,
                                             /*out*/ int32_t* use_count) {
+  // Deal with regular uses.
   for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
     HInstruction* user = use.GetUser();
     if (iset_->find(user) == iset_->end()) {  // not excluded?
@@ -1729,6 +1813,7 @@
   // Try to replace outside uses with the last value.
   if (induction_range_.CanGenerateLastValue(instruction)) {
     HInstruction* replacement = induction_range_.GenerateLastValue(instruction, graph_, block);
+    // Deal with regular uses.
     const HUseList<HInstruction*>& uses = instruction->GetUses();
     for (auto it = uses.begin(), end = uses.end(); it != end;) {
       HInstruction* user = it->GetUser();
@@ -1744,6 +1829,7 @@
         induction_range_.Replace(user, instruction, replacement);  // update induction
       }
     }
+    // Deal with environment uses.
     const HUseList<HEnvironment*>& env_uses = instruction->GetEnvUses();
     for (auto it = env_uses.begin(), end = env_uses.end(); it != end;) {
       HEnvironment* user = it->GetUser();
@@ -1759,7 +1845,6 @@
         }
       }
     }
-    induction_simplication_count_++;
     return true;
   }
   return false;
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index de4bd85..49be8a3 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -104,18 +104,33 @@
     bool lhs;              // def/use
   };
 
+  //
   // Loop setup and traversal.
+  //
+
   void LocalRun();
   void AddLoop(HLoopInformation* loop_info);
   void RemoveLoop(LoopNode* node);
-  void TraverseLoopsInnerToOuter(LoopNode* node);
 
+  // Traverses all loops inner to outer to perform simplifications and optimizations.
+  // Returns true if loops nested inside current loop (node) have changed.
+  bool TraverseLoopsInnerToOuter(LoopNode* node);
+
+  //
   // Optimization.
+  //
+
   void SimplifyInduction(LoopNode* node);
   void SimplifyBlocks(LoopNode* node);
-  void OptimizeInnerLoop(LoopNode* node);
 
+  // Performs optimizations specific to inner loop (empty loop removal,
+  // unrolling, vectorization). Returns true if anything changed.
+  bool OptimizeInnerLoop(LoopNode* node);
+
+  //
   // Vectorization analysis and synthesis.
+  //
+
   bool ShouldVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count);
   void Vectorize(LoopNode* node, HBasicBlock* block, HBasicBlock* exit, int64_t trip_count);
   void GenerateNewLoop(LoopNode* node,
@@ -155,12 +170,20 @@
 
   // Vectorization heuristics.
   bool IsVectorizationProfitable(int64_t trip_count);
-  void SetPeelingCandidate(int64_t trip_count);
+  void SetPeelingCandidate(const ArrayReference* candidate, int64_t trip_count);
   uint32_t GetUnrollingFactor(HBasicBlock* block, int64_t trip_count);
 
+  //
   // Helpers.
+  //
+
   bool TrySetPhiInduction(HPhi* phi, bool restrict_uses);
-  bool TrySetSimpleLoopHeader(HBasicBlock* block);
+  bool TrySetPhiReduction(HPhi* phi);
+
+  // Detects loop header with a single induction (returned in main_phi), possibly
+  // other phis for reductions, but no other side effects. Returns true on success.
+  bool TrySetSimpleLoopHeader(HBasicBlock* block, /*out*/ HPhi** main_phi);
+
   bool IsEmptyBody(HBasicBlock* block);
   bool IsOnlyUsedAfterLoop(HLoopInformation* loop_info,
                            HInstruction* instruction,
@@ -200,10 +223,12 @@
   // Contents reside in phase-local heap memory.
   ArenaSet<HInstruction*>* iset_;
 
-  // Counter that tracks how many induction cycles have been simplified. Useful
-  // to trigger incremental updates of induction variable analysis of outer loops
-  // when the induction of inner loops has changed.
-  uint32_t induction_simplication_count_;
+  // Temporary bookkeeping of reduction instructions. Mapping is two-fold:
+  // (1) reductions in the loop-body are mapped back to their phi definition,
+  // (2) phi definitions are mapped to their initial value (updated during
+  //     code generation to feed the proper values into the new chain).
+  // Contents reside in phase-local heap memory.
+  ArenaSafeMap<HInstruction*, HInstruction*>* reductions_;
 
   // Flag that tracks if any simplifications have occurred.
   bool simplified_;
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index ff193e9..a6036da 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -1774,13 +1774,11 @@
   bool ShouldCompileDexFilesIndividually() const {
     // Compile individually if we are not building an image, not using any compilation, and are
     // using multidex.
-    // This means extract, verify, and quicken will use the individual compilation mode (to reduce
+    // This means extract, verify, and quicken, will use the individual compilation mode (to reduce
     // RAM used by the compiler).
-    // TODO: Still do it for app images to get testing coverage. Note that this will generate empty
-    // app images.
     return !IsImage() &&
         dex_files_.size() > 1 &&
-        !CompilerFilter::IsAnyCompilationEnabled(compiler_options_->GetCompilerFilter());
+        !CompilerFilter::IsAotCompilationEnabled(compiler_options_->GetCompilerFilter());
   }
 
   // Set up and create the compiler driver and then invoke it to compile all the dex files.
@@ -1856,6 +1854,16 @@
                                      profile_compilation_info_.get()));
     driver_->SetDexFilesForOatFile(dex_files_);
 
+    const bool compile_individually = ShouldCompileDexFilesIndividually();
+    if (compile_individually) {
+      // Set the compiler driver in the callbacks so that we can avoid re-verification. This not
+      // only helps performance but also prevents reverifying quickened bytecodes. Attempting
+      // verify quickened bytecode causes verification failures.
+      // Only set the compiler filter if we are doing separate compilation since there is a bit
+      // of overhead when checking if a class was previously verified.
+      callbacks_->SetDoesClassUnloading(true, driver_.get());
+    }
+
     // Setup vdex for compilation.
     if (!DoEagerUnquickeningOfVdex() && input_vdex_file_ != nullptr) {
       callbacks_->SetVerifierDeps(
@@ -1872,7 +1880,7 @@
       callbacks_->SetVerifierDeps(new verifier::VerifierDeps(dex_files_));
     }
     // Invoke the compilation.
-    if (ShouldCompileDexFilesIndividually()) {
+    if (compile_individually) {
       CompileDexFilesIndividually();
       // Return a null classloader since we already freed released it.
       return nullptr;
@@ -2966,6 +2974,8 @@
 
 static dex2oat::ReturnCode CompileImage(Dex2Oat& dex2oat) {
   dex2oat.LoadClassProfileDescriptors();
+  // Keep the class loader that was used for compilation live for the rest of the compilation
+  // process.
   ScopedGlobalRef class_loader(dex2oat.Compile());
 
   if (!dex2oat.WriteOutputFiles()) {
@@ -3014,6 +3024,8 @@
 }
 
 static dex2oat::ReturnCode CompileApp(Dex2Oat& dex2oat) {
+  // Keep the class loader that was used for compilation live for the rest of the compilation
+  // process.
   ScopedGlobalRef class_loader(dex2oat.Compile());
 
   if (!dex2oat.WriteOutputFiles()) {
diff --git a/dex2oat/dex2oat_test.cc b/dex2oat/dex2oat_test.cc
index 12bceb3..6a9d979 100644
--- a/dex2oat/dex2oat_test.cc
+++ b/dex2oat/dex2oat_test.cc
@@ -41,6 +41,7 @@
 namespace art {
 
 static constexpr size_t kMaxMethodIds = 65535;
+static constexpr bool kDebugArgs = false;
 
 using android::base::StringPrintf;
 
@@ -55,7 +56,7 @@
   }
 
  protected:
-  int GenerateOdexForTestWithStatus(const std::string& dex_location,
+  int GenerateOdexForTestWithStatus(const std::vector<std::string>& dex_locations,
                                     const std::string& odex_location,
                                     CompilerFilter::Filter filter,
                                     std::string* error_msg,
@@ -63,7 +64,10 @@
                                     bool use_fd = false) {
     std::unique_ptr<File> oat_file;
     std::vector<std::string> args;
-    args.push_back("--dex-file=" + dex_location);
+    // Add dex file args.
+    for (const std::string& dex_location : dex_locations) {
+      args.push_back("--dex-file=" + dex_location);
+    }
     if (use_fd) {
       oat_file.reset(OS::CreateEmptyFile(odex_location.c_str()));
       CHECK(oat_file != nullptr) << odex_location;
@@ -93,7 +97,7 @@
                            bool use_fd = false,
                            std::function<void(const OatFile&)> check_oat = [](const OatFile&) {}) {
     std::string error_msg;
-    int status = GenerateOdexForTestWithStatus(dex_location,
+    int status = GenerateOdexForTestWithStatus({dex_location},
                                                odex_location,
                                                filter,
                                                &error_msg,
@@ -187,6 +191,14 @@
     CHECK(android_root != nullptr);
     argv.push_back("--android-root=" + std::string(android_root));
 
+    if (kDebugArgs) {
+      std::string all_args;
+      for (const std::string& arg : argv) {
+        all_args += arg + " ";
+      }
+      LOG(ERROR) << all_args;
+    }
+
     int link[2];
 
     if (pipe(link) == -1) {
@@ -951,7 +963,7 @@
     Copy(GetTestDexFileName(), dex_location);
 
     std::string error_msg;
-    return GenerateOdexForTestWithStatus(dex_location,
+    return GenerateOdexForTestWithStatus({dex_location},
                                          odex_location,
                                          CompilerFilter::kSpeed,
                                          &error_msg,
@@ -1107,4 +1119,72 @@
   RunTest(context.c_str(), expected_classpath_key.c_str(), true);
 }
 
+class Dex2oatDeterminism : public Dex2oatTest {};
+
+TEST_F(Dex2oatDeterminism, UnloadCompile) {
+  if (!kUseReadBarrier &&
+      gc::kCollectorTypeDefault != gc::kCollectorTypeCMS &&
+      gc::kCollectorTypeDefault != gc::kCollectorTypeMS) {
+    LOG(INFO) << "Test requires determinism support.";
+    return;
+  }
+  Runtime* const runtime = Runtime::Current();
+  std::string out_dir = GetScratchDir();
+  const std::string base_oat_name = out_dir + "/base.oat";
+  const std::string base_vdex_name = out_dir + "/base.vdex";
+  const std::string unload_oat_name = out_dir + "/unload.oat";
+  const std::string unload_vdex_name = out_dir + "/unload.vdex";
+  const std::string no_unload_oat_name = out_dir + "/nounload.oat";
+  const std::string no_unload_vdex_name = out_dir + "/nounload.vdex";
+  const std::string app_image_name = out_dir + "/unload.art";
+  std::string error_msg;
+  const std::vector<gc::space::ImageSpace*>& spaces = runtime->GetHeap()->GetBootImageSpaces();
+  ASSERT_GT(spaces.size(), 0u);
+  const std::string image_location = spaces[0]->GetImageLocation();
+  // Without passing in an app image, it will unload in between compilations.
+  const int res = GenerateOdexForTestWithStatus(
+      GetLibCoreDexFileNames(),
+      base_oat_name,
+      CompilerFilter::Filter::kQuicken,
+      &error_msg,
+      {"--force-determinism", "--avoid-storing-invocation"});
+  EXPECT_EQ(res, 0);
+  Copy(base_oat_name, unload_oat_name);
+  Copy(base_vdex_name, unload_vdex_name);
+  std::unique_ptr<File> unload_oat(OS::OpenFileForReading(unload_oat_name.c_str()));
+  std::unique_ptr<File> unload_vdex(OS::OpenFileForReading(unload_vdex_name.c_str()));
+  ASSERT_TRUE(unload_oat != nullptr);
+  ASSERT_TRUE(unload_vdex != nullptr);
+  EXPECT_GT(unload_oat->GetLength(), 0u);
+  EXPECT_GT(unload_vdex->GetLength(), 0u);
+  // Regenerate with an app image to disable the dex2oat unloading and verify that the output is
+  // the same.
+  const int res2 = GenerateOdexForTestWithStatus(
+      GetLibCoreDexFileNames(),
+      base_oat_name,
+      CompilerFilter::Filter::kQuicken,
+      &error_msg,
+      {"--force-determinism", "--avoid-storing-invocation", "--app-image-file=" + app_image_name});
+  EXPECT_EQ(res2, 0);
+  Copy(base_oat_name, no_unload_oat_name);
+  Copy(base_vdex_name, no_unload_vdex_name);
+  std::unique_ptr<File> no_unload_oat(OS::OpenFileForReading(no_unload_oat_name.c_str()));
+  std::unique_ptr<File> no_unload_vdex(OS::OpenFileForReading(no_unload_vdex_name.c_str()));
+  ASSERT_TRUE(no_unload_oat != nullptr);
+  ASSERT_TRUE(no_unload_vdex != nullptr);
+  EXPECT_GT(no_unload_oat->GetLength(), 0u);
+  EXPECT_GT(no_unload_vdex->GetLength(), 0u);
+  // Verify that both of the files are the same (odex and vdex).
+  EXPECT_EQ(unload_oat->GetLength(), no_unload_oat->GetLength());
+  EXPECT_EQ(unload_vdex->GetLength(), no_unload_vdex->GetLength());
+  EXPECT_EQ(unload_oat->Compare(no_unload_oat.get()), 0)
+      << unload_oat_name << " " << no_unload_oat_name;
+  EXPECT_EQ(unload_vdex->Compare(no_unload_vdex.get()), 0)
+      << unload_vdex_name << " " << no_unload_vdex_name;
+  // App image file.
+  std::unique_ptr<File> app_image_file(OS::OpenFileForReading(app_image_name.c_str()));
+  ASSERT_TRUE(app_image_file != nullptr);
+  EXPECT_GT(app_image_file->GetLength(), 0u);
+}
+
 }  // namespace art
diff --git a/runtime/aot_class_linker.cc b/runtime/aot_class_linker.cc
index b1bc3f8..9396565 100644
--- a/runtime/aot_class_linker.cc
+++ b/runtime/aot_class_linker.cc
@@ -16,10 +16,12 @@
 
 #include "aot_class_linker.h"
 
+#include "class_reference.h"
+#include "compiler_callbacks.h"
 #include "handle_scope-inl.h"
-#include "mirror/class.h"
-#include "mirror/object-inl.h"
+#include "mirror/class-inl.h"
 #include "runtime.h"
+#include "verifier/verifier_enums.h"
 
 namespace art {
 
@@ -66,4 +68,18 @@
   }
   return success;
 }
+
+verifier::FailureKind AotClassLinker::PerformClassVerification(Thread* self,
+                                                               Handle<mirror::Class> klass,
+                                                               verifier::HardFailLogMode log_level,
+                                                               std::string* error_msg) {
+  Runtime* const runtime = Runtime::Current();
+  CompilerCallbacks* callbacks = runtime->GetCompilerCallbacks();
+  if (callbacks->CanAssumeVerified(ClassReference(&klass->GetDexFile(),
+                                                  klass->GetDexClassDefIndex()))) {
+    return verifier::FailureKind::kNoFailure;
+  }
+  return ClassLinker::PerformClassVerification(self, klass, log_level, error_msg);
+}
+
 }  // namespace art
diff --git a/runtime/aot_class_linker.h b/runtime/aot_class_linker.h
index 11bea86..927b533 100644
--- a/runtime/aot_class_linker.h
+++ b/runtime/aot_class_linker.h
@@ -27,6 +27,16 @@
   explicit AotClassLinker(InternTable *intern_table);
   ~AotClassLinker();
 
+ protected:
+  // Overridden version of PerformClassVerification allows skipping verification if the class was
+  // previously verified but unloaded.
+  verifier::FailureKind PerformClassVerification(Thread* self,
+                                                 Handle<mirror::Class> klass,
+                                                 verifier::HardFailLogMode log_level,
+                                                 std::string* error_msg)
+      OVERRIDE
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   bool InitializeClass(Thread *self,
                        Handle<mirror::Class> klass,
                        bool can_run_clinit,
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 3ac87c5..9756f57 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -3694,7 +3694,8 @@
 
 ObjPtr<mirror::DexCache> ClassLinker::FindDexCache(Thread* self, const DexFile& dex_file) {
   ReaderMutexLock mu(self, *Locks::dex_lock_);
-  ObjPtr<mirror::DexCache> dex_cache = DecodeDexCache(self, FindDexCacheDataLocked(dex_file));
+  DexCacheData dex_cache_data = FindDexCacheDataLocked(dex_file);
+  ObjPtr<mirror::DexCache> dex_cache = DecodeDexCache(self, dex_cache_data);
   if (dex_cache != nullptr) {
     return dex_cache;
   }
@@ -3704,7 +3705,8 @@
       LOG(FATAL_WITHOUT_ABORT) << "Registered dex file " << data.dex_file->GetLocation();
     }
   }
-  LOG(FATAL) << "Failed to find DexCache for DexFile " << dex_file.GetLocation();
+  LOG(FATAL) << "Failed to find DexCache for DexFile " << dex_file.GetLocation()
+             << " " << &dex_file << " " << dex_cache_data.dex_file;
   UNREACHABLE();
 }
 
@@ -4280,13 +4282,7 @@
   std::string error_msg;
   verifier::FailureKind verifier_failure = verifier::FailureKind::kNoFailure;
   if (!preverified) {
-    Runtime* runtime = Runtime::Current();
-    verifier_failure = verifier::MethodVerifier::VerifyClass(self,
-                                                             klass.Get(),
-                                                             runtime->GetCompilerCallbacks(),
-                                                             runtime->IsAotCompiler(),
-                                                             log_level,
-                                                             &error_msg);
+    verifier_failure = PerformClassVerification(self, klass, log_level, &error_msg);
   }
 
   // Verification is done, grab the lock again.
@@ -4354,6 +4350,19 @@
   return verifier_failure;
 }
 
+verifier::FailureKind ClassLinker::PerformClassVerification(Thread* self,
+                                                            Handle<mirror::Class> klass,
+                                                            verifier::HardFailLogMode log_level,
+                                                            std::string* error_msg) {
+  Runtime* const runtime = Runtime::Current();
+  return verifier::MethodVerifier::VerifyClass(self,
+                                               klass.Get(),
+                                               runtime->GetCompilerCallbacks(),
+                                               runtime->IsAotCompiler(),
+                                               log_level,
+                                               error_msg);
+}
+
 bool ClassLinker::VerifyClassUsingOatFile(const DexFile& dex_file,
                                           ObjPtr<mirror::Class> klass,
                                           mirror::Class::Status& oat_file_class_status) {
@@ -7119,7 +7128,7 @@
                                                                       method_alignment_);
   const size_t old_methods_ptr_size = (old_methods != nullptr) ? old_size : 0;
   auto* methods = reinterpret_cast<LengthPrefixedArray<ArtMethod>*>(
-      Runtime::Current()->GetLinearAlloc()->Realloc(
+      class_linker_->GetAllocatorForClassLoader(klass_->GetClassLoader())->Realloc(
           self_, old_methods, old_methods_ptr_size, new_size));
   CHECK(methods != nullptr);  // Native allocation failure aborts.
 
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index bf14aeb..2fbbe79 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -710,6 +710,12 @@
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::dex_lock_);
 
+  virtual verifier::FailureKind PerformClassVerification(Thread* self,
+                                                         Handle<mirror::Class> klass,
+                                                         verifier::HardFailLogMode log_level,
+                                                         std::string* error_msg)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
  private:
   class LinkInterfaceMethodsHelper;
 
diff --git a/runtime/compiler_callbacks.h b/runtime/compiler_callbacks.h
index 806653a..c51bb5e 100644
--- a/runtime/compiler_callbacks.h
+++ b/runtime/compiler_callbacks.h
@@ -22,6 +22,8 @@
 
 namespace art {
 
+class CompilerDriver;
+
 namespace verifier {
 
 class MethodVerifier;
@@ -49,6 +51,13 @@
   virtual verifier::VerifierDeps* GetVerifierDeps() const = 0;
   virtual void SetVerifierDeps(verifier::VerifierDeps* deps ATTRIBUTE_UNUSED) {}
 
+  virtual bool CanAssumeVerified(ClassReference ref ATTRIBUTE_UNUSED) {
+    return false;
+  }
+
+  virtual void SetDoesClassUnloading(bool does_class_unloading ATTRIBUTE_UNUSED,
+                                     CompilerDriver* compiler_driver ATTRIBUTE_UNUSED) {}
+
   bool IsBootImage() {
     return mode_ == CallbackMode::kCompileBootImage;
   }
diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc
index 726bddd..b298db6 100644
--- a/runtime/entrypoints/quick/quick_field_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc
@@ -301,6 +301,7 @@
 extern "C" mirror::Object* artReadBarrierSlow(mirror::Object* ref ATTRIBUTE_UNUSED,
                                               mirror::Object* obj,
                                               uint32_t offset) {
+  // Used only in connection with non-volatile loads.
   DCHECK(kEmitCompilerReadBarrier);
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(obj) + offset;
   mirror::HeapReference<mirror::Object>* ref_addr =
@@ -308,9 +309,10 @@
   constexpr ReadBarrierOption kReadBarrierOption =
       kUseReadBarrier ? kWithReadBarrier : kWithoutReadBarrier;
   mirror::Object* result =
-      ReadBarrier::Barrier<mirror::Object, kReadBarrierOption>(obj,
-                                                               MemberOffset(offset),
-                                                               ref_addr);
+      ReadBarrier::Barrier<mirror::Object, /* kIsVolatile */ false, kReadBarrierOption>(
+        obj,
+        MemberOffset(offset),
+        ref_addr);
   return result;
 }
 
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index fd0cd5f..7d01af0 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -79,8 +79,7 @@
 static mirror::Class* SafeGetClass(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_) {
   char* obj_cls = reinterpret_cast<char*>(obj) + mirror::Object::ClassOffset().SizeValue();
 
-  mirror::HeapReference<mirror::Class> cls =
-      mirror::HeapReference<mirror::Class>::FromMirrorPtr(nullptr);
+  mirror::HeapReference<mirror::Class> cls;
   ssize_t rc = SafeCopy(&cls, obj_cls, sizeof(cls));
   CHECK_NE(-1, rc);
 
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index 2901995..1b3d0da 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -115,8 +115,8 @@
   }
 
  private:
-  template<bool kPoisonReferences>
-  void MarkReference(mirror::ObjectReference<kPoisonReferences, mirror::Object>* obj_ptr) const
+  template<typename CompressedReferenceType>
+  void MarkReference(CompressedReferenceType* obj_ptr) const
       REQUIRES_SHARED(Locks::mutator_lock_) {
     // Only add the reference if it is non null and fits our criteria.
     mirror::Object* ref = obj_ptr->AsMirrorPtr();
diff --git a/runtime/gc/accounting/space_bitmap.cc b/runtime/gc/accounting/space_bitmap.cc
index 92b4360..280c0b1 100644
--- a/runtime/gc/accounting/space_bitmap.cc
+++ b/runtime/gc/accounting/space_bitmap.cc
@@ -48,16 +48,21 @@
   CHECK(mem_map != nullptr);
   uintptr_t* bitmap_begin = reinterpret_cast<uintptr_t*>(mem_map->Begin());
   const size_t bitmap_size = ComputeBitmapSize(heap_capacity);
-  return new SpaceBitmap(name, mem_map, bitmap_begin, bitmap_size, heap_begin);
+  return new SpaceBitmap(name, mem_map, bitmap_begin, bitmap_size, heap_begin, heap_capacity);
 }
 
 template<size_t kAlignment>
-SpaceBitmap<kAlignment>::SpaceBitmap(const std::string& name, MemMap* mem_map, uintptr_t* bitmap_begin,
-                                     size_t bitmap_size, const void* heap_begin)
+SpaceBitmap<kAlignment>::SpaceBitmap(const std::string& name,
+                                     MemMap* mem_map,
+                                     uintptr_t* bitmap_begin,
+                                     size_t bitmap_size,
+                                     const void* heap_begin,
+                                     size_t heap_capacity)
     : mem_map_(mem_map),
       bitmap_begin_(reinterpret_cast<Atomic<uintptr_t>*>(bitmap_begin)),
       bitmap_size_(bitmap_size),
       heap_begin_(reinterpret_cast<uintptr_t>(heap_begin)),
+      heap_limit_(reinterpret_cast<uintptr_t>(heap_begin) + heap_capacity),
       name_(name) {
   CHECK(bitmap_begin_ != nullptr);
   CHECK_NE(bitmap_size, 0U);
@@ -89,6 +94,7 @@
   if (new_size < bitmap_size_) {
     bitmap_size_ = new_size;
   }
+  heap_limit_ = new_end;
   // Not sure if doing this trim is necessary, since nothing past the end of the heap capacity
   // should be marked.
 }
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index 2fe6394..b49e0b7 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -163,6 +163,7 @@
 
   void SetHeapSize(size_t bytes) {
     // TODO: Un-map the end of the mem map.
+    heap_limit_ = heap_begin_ + bytes;
     bitmap_size_ = OffsetToIndex(bytes) * sizeof(intptr_t);
     CHECK_EQ(HeapSize(), bytes);
   }
@@ -173,7 +174,7 @@
 
   // The maximum address which the bitmap can span. (HeapBegin() <= object < HeapLimit()).
   uint64_t HeapLimit() const {
-    return static_cast<uint64_t>(HeapBegin()) + HeapSize();
+    return heap_limit_;
   }
 
   // Set the max address which can covered by the bitmap.
@@ -196,8 +197,12 @@
  private:
   // TODO: heap_end_ is initialized so that the heap bitmap is empty, this doesn't require the -1,
   // however, we document that this is expected on heap_end_
-  SpaceBitmap(const std::string& name, MemMap* mem_map, uintptr_t* bitmap_begin, size_t bitmap_size,
-              const void* heap_begin);
+  SpaceBitmap(const std::string& name,
+              MemMap* mem_map,
+              uintptr_t* bitmap_begin,
+              size_t bitmap_size,
+              const void* heap_begin,
+              size_t heap_capacity);
 
   template<bool kSetBit>
   bool Modify(const mirror::Object* obj);
@@ -211,10 +216,13 @@
   // Size of this bitmap.
   size_t bitmap_size_;
 
-  // The base address of the heap, which corresponds to the word containing the first bit in the
-  // bitmap.
+  // The start address of the memory covered by the bitmap, which corresponds to the word
+  // containing the first bit in the bitmap.
   const uintptr_t heap_begin_;
 
+  // The end address of the memory covered by the bitmap. This may not be on a word boundary.
+  uintptr_t heap_limit_;
+
   // Name of this bitmap.
   std::string name_;
 };
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 9b24885..52b355d 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -2464,6 +2464,7 @@
 }
 
 bool ConcurrentCopying::IsOnAllocStack(mirror::Object* ref) {
+  // TODO: Explain why this is here. What release operation does it pair with?
   QuasiAtomic::ThreadFenceAcquire();
   accounting::ObjectStack* alloc_stack = GetAllocationStack();
   return alloc_stack->Contains(ref);
@@ -2624,9 +2625,8 @@
         }
       } while (!field->CasWeakRelaxed(from_ref, to_ref));
     } else {
-      QuasiAtomic::ThreadFenceRelease();
-      field->Assign(to_ref);
-      QuasiAtomic::ThreadFenceSequentiallyConsistent();
+      // TODO: Why is this seq_cst when the above is relaxed? Document memory ordering.
+      field->Assign</* kIsVolatile */ true>(to_ref);
     }
   }
   return true;
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index dec206b..f722e8d 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -113,7 +113,7 @@
   virtual mirror::Object* IsMarked(mirror::Object* obj)
       REQUIRES_SHARED(Locks::mutator_lock_) = 0;
   // Returns true if the given heap reference is null or is already marked. If it's already marked,
-  // update the reference (uses a CAS if do_atomic_update is true. Otherwise, returns false.
+  // update the reference (uses a CAS if do_atomic_update is true). Otherwise, returns false.
   virtual bool IsNullOrMarkedHeapReference(mirror::HeapReference<mirror::Object>* obj,
                                            bool do_atomic_update)
       REQUIRES_SHARED(Locks::mutator_lock_) = 0;
diff --git a/runtime/gc/collector/semi_space-inl.h b/runtime/gc/collector/semi_space-inl.h
index 78fb2d2..7db5d2c 100644
--- a/runtime/gc/collector/semi_space-inl.h
+++ b/runtime/gc/collector/semi_space-inl.h
@@ -38,9 +38,8 @@
 // Used to mark and copy objects. Any newly-marked objects who are in the from space Get moved to
 // the to-space and have their forward address updated. Objects which have been newly marked are
 // pushed on the mark stack.
-template<bool kPoisonReferences>
-inline void SemiSpace::MarkObject(
-    mirror::ObjectReference<kPoisonReferences, mirror::Object>* obj_ptr) {
+template<typename CompressedReferenceType>
+inline void SemiSpace::MarkObject(CompressedReferenceType* obj_ptr) {
   mirror::Object* obj = obj_ptr->AsMirrorPtr();
   if (obj == nullptr) {
     return;
@@ -73,9 +72,8 @@
   }
 }
 
-template<bool kPoisonReferences>
-inline void SemiSpace::MarkObjectIfNotInToSpace(
-    mirror::ObjectReference<kPoisonReferences, mirror::Object>* obj_ptr) {
+template<typename CompressedReferenceType>
+inline void SemiSpace::MarkObjectIfNotInToSpace(CompressedReferenceType* obj_ptr) {
   if (!to_space_->HasAddress(obj_ptr->AsMirrorPtr())) {
     MarkObject(obj_ptr);
   }
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index fd52da3..6d4d789 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -97,13 +97,13 @@
   // Find the default mark bitmap.
   void FindDefaultMarkBitmap();
 
-  // Updates obj_ptr if the object has moved.
-  template<bool kPoisonReferences>
-  void MarkObject(mirror::ObjectReference<kPoisonReferences, mirror::Object>* obj_ptr)
+  // Updates obj_ptr if the object has moved. Takes either an ObjectReference or a HeapReference.
+  template<typename CompressedReferenceType>
+  void MarkObject(CompressedReferenceType* obj_ptr)
       REQUIRES(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
-  template<bool kPoisonReferences>
-  void MarkObjectIfNotInToSpace(mirror::ObjectReference<kPoisonReferences, mirror::Object>* obj_ptr)
+  template<typename CompressedReferenceType>
+  void MarkObjectIfNotInToSpace(CompressedReferenceType* obj_ptr)
       REQUIRES(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
   virtual mirror::Object* MarkObject(mirror::Object* root) OVERRIDE
diff --git a/runtime/gc/space/large_object_space_test.cc b/runtime/gc/space/large_object_space_test.cc
index 79b775a..9baa016 100644
--- a/runtime/gc/space/large_object_space_test.cc
+++ b/runtime/gc/space/large_object_space_test.cc
@@ -38,12 +38,19 @@
   Thread* const self = Thread::Current();
   for (size_t i = 0; i < 2; ++i) {
     LargeObjectSpace* los = nullptr;
+    const size_t capacity = 128 * MB;
     if (i == 0) {
       los = space::LargeObjectMapSpace::Create("large object space");
     } else {
-      los = space::FreeListSpace::Create("large object space", nullptr, 128 * MB);
+      los = space::FreeListSpace::Create("large object space", nullptr, capacity);
     }
 
+    // Make sure the bitmap is not empty and actually covers at least how much we expect.
+    CHECK_LT(static_cast<uintptr_t>(los->GetLiveBitmap()->HeapBegin()),
+             static_cast<uintptr_t>(los->GetLiveBitmap()->HeapLimit()));
+    CHECK_LE(static_cast<uintptr_t>(los->GetLiveBitmap()->HeapBegin() + capacity),
+             static_cast<uintptr_t>(los->GetLiveBitmap()->HeapLimit()));
+
     static const size_t num_allocations = 64;
     static const size_t max_allocation_size = 0x100000;
     std::vector<std::pair<mirror::Object*, size_t>> requests;
diff --git a/runtime/globals.h b/runtime/globals.h
index 6164225..256306d 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -51,11 +51,17 @@
 static constexpr size_t kObjectAlignment = 1u << kObjectAlignmentShift;
 static constexpr size_t kLargeObjectAlignment = kPageSize;
 
+// Clion, clang analyzer, etc can falsely believe that "if (kIsDebugBuild)" always
+// returns the same value. By wrapping into a call to another constexpr function, we force it
+// to realize that is not actually always evaluating to the same value.
+static constexpr bool GlobalsReturnSelf(bool self) { return self; }
+
 // Whether or not this is a debug build. Useful in conditionals where NDEBUG isn't.
-#if defined(NDEBUG)
-static constexpr bool kIsDebugBuild = false;
+// TODO: Use only __clang_analyzer__ here. b/64455231
+#if defined(NDEBUG) && !defined(__CLION_IDE__)
+static constexpr bool kIsDebugBuild = GlobalsReturnSelf(false);
 #else
-static constexpr bool kIsDebugBuild = true;
+static constexpr bool kIsDebugBuild = GlobalsReturnSelf(true);
 #endif
 
 // ART_TARGET - Defined for target builds of ART.
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index 2c72821..ce06a03 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -1438,7 +1438,11 @@
     mirror::HeapReference<mirror::Object>* field_addr =
         reinterpret_cast<mirror::HeapReference<mirror::Object>*>(
             reinterpret_cast<uint8_t*>(obj) + static_cast<size_t>(offset));
-    ReadBarrier::Barrier<mirror::Object, kWithReadBarrier, /* kAlwaysUpdateField */ true>(
+    ReadBarrier::Barrier<
+        mirror::Object,
+        /* kIsVolatile */ false,
+        kWithReadBarrier,
+        /* kAlwaysUpdateField */ true>(
         obj,
         MemberOffset(offset),
         field_addr);
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 59373eb..31319f1 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -1315,9 +1315,7 @@
     current_capacity_ = max_capacity_;
   }
 
-  if (!kIsDebugBuild || VLOG_IS_ON(jit)) {
-    LOG(INFO) << "Increasing code cache capacity to " << PrettySize(current_capacity_);
-  }
+  VLOG(jit) << "Increasing code cache capacity to " << PrettySize(current_capacity_);
 
   SetFootprintLimit(current_capacity_);
 
@@ -1388,21 +1386,17 @@
       do_full_collection = ShouldDoFullCollection();
     }
 
-    if (!kIsDebugBuild || VLOG_IS_ON(jit)) {
-      LOG(INFO) << "Do "
-                << (do_full_collection ? "full" : "partial")
-                << " code cache collection, code="
-                << PrettySize(CodeCacheSize())
-                << ", data=" << PrettySize(DataCacheSize());
-    }
+    VLOG(jit) << "Do "
+              << (do_full_collection ? "full" : "partial")
+              << " code cache collection, code="
+              << PrettySize(CodeCacheSize())
+              << ", data=" << PrettySize(DataCacheSize());
 
     DoCollection(self, /* collect_profiling_info */ do_full_collection);
 
-    if (!kIsDebugBuild || VLOG_IS_ON(jit)) {
-      LOG(INFO) << "After code cache collection, code="
-                << PrettySize(CodeCacheSize())
-                << ", data=" << PrettySize(DataCacheSize());
-    }
+    VLOG(jit) << "After code cache collection, code="
+              << PrettySize(CodeCacheSize())
+              << ", data=" << PrettySize(DataCacheSize());
 
     {
       MutexLock mu(self, lock_);
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 4f8952d..6eb200d 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -716,11 +716,10 @@
   }
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
   HeapReference<T>* objref_addr = reinterpret_cast<HeapReference<T>*>(raw_addr);
-  T* result = ReadBarrier::Barrier<T, kReadBarrierOption>(this, field_offset, objref_addr);
-  if (kIsVolatile) {
-    // TODO: Refactor to use a SequentiallyConsistent load instead.
-    QuasiAtomic::ThreadFenceAcquire();  // Ensure visibility of operations preceding store.
-  }
+  T* result = ReadBarrier::Barrier<T, kIsVolatile, kReadBarrierOption>(
+      this,
+      field_offset,
+      objref_addr);
   if (kVerifyFlags & kVerifyReads) {
     VerifyObject(result);
   }
@@ -756,15 +755,7 @@
   }
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
   HeapReference<Object>* objref_addr = reinterpret_cast<HeapReference<Object>*>(raw_addr);
-  if (kIsVolatile) {
-    // TODO: Refactor to use a SequentiallyConsistent store instead.
-    QuasiAtomic::ThreadFenceRelease();  // Ensure that prior accesses are visible before store.
-    objref_addr->Assign(new_value.Ptr());
-    QuasiAtomic::ThreadFenceSequentiallyConsistent();
-                                // Ensure this store occurs before any volatile loads.
-  } else {
-    objref_addr->Assign(new_value.Ptr());
-  }
+  objref_addr->Assign<kIsVolatile>(new_value.Ptr());
 }
 
 template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags,
@@ -835,13 +826,12 @@
   if (kTransactionActive) {
     Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
   }
-  HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value));
-  HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value));
+  uint32_t old_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(old_value));
+  uint32_t new_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(new_value));
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
   Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
 
-  bool success = atomic_addr->CompareExchangeWeakSequentiallyConsistent(old_ref.reference_,
-                                                                        new_ref.reference_);
+  bool success = atomic_addr->CompareExchangeWeakSequentiallyConsistent(old_ref, new_ref);
   return success;
 }
 
@@ -877,13 +867,12 @@
   if (kTransactionActive) {
     Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
   }
-  HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value));
-  HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value));
+  uint32_t old_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(old_value));
+  uint32_t new_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(new_value));
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
   Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
 
-  bool success = atomic_addr->CompareExchangeStrongSequentiallyConsistent(old_ref.reference_,
-                                                                          new_ref.reference_);
+  bool success = atomic_addr->CompareExchangeStrongSequentiallyConsistent(old_ref, new_ref);
   return success;
 }
 
@@ -907,13 +896,12 @@
   if (kTransactionActive) {
     Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
   }
-  HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value));
-  HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value));
+  uint32_t old_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(old_value));
+  uint32_t new_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(new_value));
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
   Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
 
-  bool success = atomic_addr->CompareExchangeWeakRelaxed(old_ref.reference_,
-                                                         new_ref.reference_);
+  bool success = atomic_addr->CompareExchangeWeakRelaxed(old_ref, new_ref);
   return success;
 }
 
@@ -937,13 +925,12 @@
   if (kTransactionActive) {
     Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
   }
-  HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value));
-  HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value));
+  uint32_t old_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(old_value));
+  uint32_t new_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(new_value));
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
   Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
 
-  bool success = atomic_addr->CompareExchangeWeakRelease(old_ref.reference_,
-                                                         new_ref.reference_);
+  bool success = atomic_addr->CompareExchangeWeakRelease(old_ref, new_ref);
   return success;
 }
 
diff --git a/runtime/mirror/object-readbarrier-inl.h b/runtime/mirror/object-readbarrier-inl.h
index 69365af..f076940 100644
--- a/runtime/mirror/object-readbarrier-inl.h
+++ b/runtime/mirror/object-readbarrier-inl.h
@@ -211,13 +211,12 @@
   if (kTransactionActive) {
     Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
   }
-  HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value));
-  HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value));
+  uint32_t old_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(old_value));
+  uint32_t new_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(new_value));
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
   Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
 
-  bool success = atomic_addr->CompareExchangeStrongRelaxed(old_ref.reference_,
-                                                           new_ref.reference_);
+  bool success = atomic_addr->CompareExchangeStrongRelaxed(old_ref, new_ref);
   return success;
 }
 
@@ -241,13 +240,12 @@
   if (kTransactionActive) {
     Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
   }
-  HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value));
-  HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value));
+  uint32_t old_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(old_value));
+  uint32_t new_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(new_value));
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
   Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
 
-  bool success = atomic_addr->CompareExchangeStrongRelease(old_ref.reference_,
-                                                           new_ref.reference_);
+  bool success = atomic_addr->CompareExchangeStrongRelease(old_ref, new_ref);
   return success;
 }
 
diff --git a/runtime/mirror/object_reference-inl.h b/runtime/mirror/object_reference-inl.h
index 22fb83c..60f3ce1 100644
--- a/runtime/mirror/object_reference-inl.h
+++ b/runtime/mirror/object_reference-inl.h
@@ -30,17 +30,10 @@
 }
 
 template<class MirrorType>
-HeapReference<MirrorType> HeapReference<MirrorType>::FromObjPtr(ObjPtr<MirrorType> ptr) {
-  return HeapReference<MirrorType>(ptr.Ptr());
-}
-
-template<class MirrorType>
 bool HeapReference<MirrorType>::CasWeakRelaxed(MirrorType* expected_ptr, MirrorType* new_ptr) {
-  HeapReference<Object> expected_ref(HeapReference<Object>::FromMirrorPtr(expected_ptr));
-  HeapReference<Object> new_ref(HeapReference<Object>::FromMirrorPtr(new_ptr));
-  Atomic<uint32_t>* atomic_reference = reinterpret_cast<Atomic<uint32_t>*>(&this->reference_);
-  return atomic_reference->CompareExchangeWeakRelaxed(expected_ref.reference_,
-                                                      new_ref.reference_);
+  return reference_.CompareExchangeWeakRelaxed(
+      Compression::Compress(expected_ptr),
+      Compression::Compress(new_ptr));
 }
 
 }  // namespace mirror
diff --git a/runtime/mirror/object_reference.h b/runtime/mirror/object_reference.h
index a96a120..c62ee6c 100644
--- a/runtime/mirror/object_reference.h
+++ b/runtime/mirror/object_reference.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_MIRROR_OBJECT_REFERENCE_H_
 #define ART_RUNTIME_MIRROR_OBJECT_REFERENCE_H_
 
+#include "atomic.h"
 #include "base/mutex.h"  // For Locks::mutator_lock_.
 #include "globals.h"
 #include "obj_ptr.h"
@@ -30,20 +31,43 @@
 // extra platform specific padding.
 #define MANAGED PACKED(4)
 
+template<bool kPoisonReferences, class MirrorType>
+class PtrCompression {
+ public:
+  // Compress reference to its bit representation.
+  static uint32_t Compress(MirrorType* mirror_ptr) {
+    uintptr_t as_bits = reinterpret_cast<uintptr_t>(mirror_ptr);
+    return static_cast<uint32_t>(kPoisonReferences ? -as_bits : as_bits);
+  }
+
+  // Uncompress an encoded reference from its bit representation.
+  static MirrorType* Decompress(uint32_t ref) {
+    uintptr_t as_bits = kPoisonReferences ? -ref : ref;
+    return reinterpret_cast<MirrorType*>(as_bits);
+  }
+
+  // Convert an ObjPtr to a compressed reference.
+  static uint32_t Compress(ObjPtr<MirrorType> ptr) REQUIRES_SHARED(Locks::mutator_lock_) {
+    return Compress(ptr.Ptr());
+  }
+};
+
 // Value type representing a reference to a mirror::Object of type MirrorType.
 template<bool kPoisonReferences, class MirrorType>
 class MANAGED ObjectReference {
+ private:
+  using Compression = PtrCompression<kPoisonReferences, MirrorType>;
+
  public:
-  MirrorType* AsMirrorPtr() const REQUIRES_SHARED(Locks::mutator_lock_) {
-    return UnCompress();
+  MirrorType* AsMirrorPtr() const {
+    return Compression::Decompress(reference_);
   }
 
-  void Assign(MirrorType* other) REQUIRES_SHARED(Locks::mutator_lock_) {
-    reference_ = Compress(other);
+  void Assign(MirrorType* other) {
+    reference_ = Compression::Compress(other);
   }
 
-  void Assign(ObjPtr<MirrorType> ptr)
-      REQUIRES_SHARED(Locks::mutator_lock_);
+  void Assign(ObjPtr<MirrorType> ptr) REQUIRES_SHARED(Locks::mutator_lock_);
 
   void Clear() {
     reference_ = 0;
@@ -58,48 +82,71 @@
     return reference_;
   }
 
+  static ObjectReference<kPoisonReferences, MirrorType> FromMirrorPtr(MirrorType* mirror_ptr)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    return ObjectReference<kPoisonReferences, MirrorType>(mirror_ptr);
+  }
+
  protected:
-  explicit ObjectReference(MirrorType* mirror_ptr)
-      REQUIRES_SHARED(Locks::mutator_lock_)
-      : reference_(Compress(mirror_ptr)) {
+  explicit ObjectReference(MirrorType* mirror_ptr) REQUIRES_SHARED(Locks::mutator_lock_)
+      : reference_(Compression::Compress(mirror_ptr)) {
   }
 
-  // Compress reference to its bit representation.
-  static uint32_t Compress(MirrorType* mirror_ptr) REQUIRES_SHARED(Locks::mutator_lock_) {
-    uintptr_t as_bits = reinterpret_cast<uintptr_t>(mirror_ptr);
-    return static_cast<uint32_t>(kPoisonReferences ? -as_bits : as_bits);
-  }
-
-  // Uncompress an encoded reference from its bit representation.
-  MirrorType* UnCompress() const REQUIRES_SHARED(Locks::mutator_lock_) {
-    uintptr_t as_bits = kPoisonReferences ? -reference_ : reference_;
-    return reinterpret_cast<MirrorType*>(as_bits);
-  }
-
-  friend class Object;
-
   // The encoded reference to a mirror::Object.
   uint32_t reference_;
 };
 
 // References between objects within the managed heap.
+// Similar API to ObjectReference, but not a value type. Supports atomic access.
 template<class MirrorType>
-class MANAGED HeapReference : public ObjectReference<kPoisonHeapReferences, MirrorType> {
+class MANAGED HeapReference {
+ private:
+  using Compression = PtrCompression<kPoisonHeapReferences, MirrorType>;
+
  public:
+  HeapReference() REQUIRES_SHARED(Locks::mutator_lock_) : HeapReference(nullptr) {}
+
+  template <bool kIsVolatile = false>
+  MirrorType* AsMirrorPtr() const REQUIRES_SHARED(Locks::mutator_lock_) {
+    return Compression::Decompress(
+        kIsVolatile ? reference_.LoadSequentiallyConsistent() : reference_.LoadJavaData());
+  }
+
+  template <bool kIsVolatile = false>
+  void Assign(MirrorType* other) REQUIRES_SHARED(Locks::mutator_lock_) {
+    if (kIsVolatile) {
+      reference_.StoreSequentiallyConsistent(Compression::Compress(other));
+    } else {
+      reference_.StoreJavaData(Compression::Compress(other));
+    }
+  }
+
+  template <bool kIsVolatile = false>
+  void Assign(ObjPtr<MirrorType> ptr) REQUIRES_SHARED(Locks::mutator_lock_);
+
+  void Clear() {
+    reference_.StoreJavaData(0);
+    DCHECK(IsNull());
+  }
+
+  bool IsNull() const {
+    return reference_.LoadJavaData() == 0;
+  }
+
   static HeapReference<MirrorType> FromMirrorPtr(MirrorType* mirror_ptr)
       REQUIRES_SHARED(Locks::mutator_lock_) {
     return HeapReference<MirrorType>(mirror_ptr);
   }
 
-  static HeapReference<MirrorType> FromObjPtr(ObjPtr<MirrorType> ptr)
-      REQUIRES_SHARED(Locks::mutator_lock_);
-
   bool CasWeakRelaxed(MirrorType* old_ptr, MirrorType* new_ptr)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
   explicit HeapReference(MirrorType* mirror_ptr) REQUIRES_SHARED(Locks::mutator_lock_)
-      : ObjectReference<kPoisonHeapReferences, MirrorType>(mirror_ptr) {}
+      : reference_(Compression::Compress(mirror_ptr)) {}
+
+  // The encoded reference to a mirror::Object. Atomically updateable.
+  Atomic<uint32_t> reference_;
 };
 
 static_assert(sizeof(mirror::HeapReference<mirror::Object>) == kHeapReferenceSize,
diff --git a/runtime/native/sun_misc_Unsafe.cc b/runtime/native/sun_misc_Unsafe.cc
index 761362f..b2bdeed 100644
--- a/runtime/native/sun_misc_Unsafe.cc
+++ b/runtime/native/sun_misc_Unsafe.cc
@@ -67,10 +67,12 @@
   if (kUseReadBarrier) {
     // Need to make sure the reference stored in the field is a to-space one before attempting the
     // CAS or the CAS could fail incorrectly.
+    // Note that the read barrier load does NOT need to be volatile.
     mirror::HeapReference<mirror::Object>* field_addr =
         reinterpret_cast<mirror::HeapReference<mirror::Object>*>(
             reinterpret_cast<uint8_t*>(obj.Ptr()) + static_cast<size_t>(offset));
-    ReadBarrier::Barrier<mirror::Object, kWithReadBarrier, /* kAlwaysUpdateField */ true>(
+    ReadBarrier::Barrier<mirror::Object, /* kIsVolatile */ false, kWithReadBarrier,
+        /* kAlwaysUpdateField */ true>(
         obj.Ptr(),
         MemberOffset(offset),
         field_addr);
@@ -112,6 +114,7 @@
                                  jint newValue) {
   ScopedFastNativeObjectAccess soa(env);
   ObjPtr<mirror::Object> obj = soa.Decode<mirror::Object>(javaObj);
+  // TODO: A release store is likely to be faster on future processors.
   QuasiAtomic::ThreadFenceRelease();
   // JNI must use non transactional mode.
   obj->SetField32<false>(MemberOffset(offset), newValue);
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index db10103..b592247 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -533,21 +533,19 @@
 void QuickExceptionHandler::DeoptimizeSingleFrame(DeoptimizationKind kind) {
   DCHECK(is_deoptimization_);
 
-  if (VLOG_IS_ON(deopt) || kDebugExceptionDelivery) {
-    LOG(INFO) << "Single-frame deopting:";
-    DumpFramesWithType(self_, true);
-  }
-
   DeoptimizeStackVisitor visitor(self_, context_, this, true);
   visitor.WalkStack(true);
 
   // Compiled code made an explicit deoptimization.
   ArtMethod* deopt_method = visitor.GetSingleFrameDeoptMethod();
   DCHECK(deopt_method != nullptr);
-  LOG(INFO) << "Deoptimizing "
-            << deopt_method->PrettyMethod()
-            << " due to "
-            << GetDeoptimizationKindName(kind);
+  if (VLOG_IS_ON(deopt) || kDebugExceptionDelivery) {
+    LOG(INFO) << "Single-frame deopting: "
+              << deopt_method->PrettyMethod()
+              << " due to "
+              << GetDeoptimizationKindName(kind);
+    DumpFramesWithType(self_, /* details */ true);
+  }
   if (Runtime::Current()->UseJitCompilation()) {
     Runtime::Current()->GetJit()->GetCodeCache()->InvalidateCompiledCodeFor(
         deopt_method, visitor.GetSingleFrameDeoptQuickMethodHeader());
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index b0935c0..6424599 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -33,7 +33,8 @@
 // Disabled for performance reasons.
 static constexpr bool kCheckDebugDisallowReadBarrierCount = false;
 
-template <typename MirrorType, ReadBarrierOption kReadBarrierOption, bool kAlwaysUpdateField>
+template <typename MirrorType, bool kIsVolatile, ReadBarrierOption kReadBarrierOption,
+          bool kAlwaysUpdateField>
 inline MirrorType* ReadBarrier::Barrier(
     mirror::Object* obj, MemberOffset offset, mirror::HeapReference<MirrorType>* ref_addr) {
   constexpr bool with_read_barrier = kReadBarrierOption == kWithReadBarrier;
@@ -55,7 +56,7 @@
       }
       ref_addr = reinterpret_cast<mirror::HeapReference<MirrorType>*>(
           fake_address_dependency | reinterpret_cast<uintptr_t>(ref_addr));
-      MirrorType* ref = ref_addr->AsMirrorPtr();
+      MirrorType* ref = ref_addr->template AsMirrorPtr<kIsVolatile>();
       MirrorType* old_ref = ref;
       if (is_gray) {
         // Slow-path.
@@ -71,9 +72,9 @@
       return ref;
     } else if (kUseBrooksReadBarrier) {
       // To be implemented.
-      return ref_addr->AsMirrorPtr();
+      return ref_addr->template AsMirrorPtr<kIsVolatile>();
     } else if (kUseTableLookupReadBarrier) {
-      MirrorType* ref = ref_addr->AsMirrorPtr();
+      MirrorType* ref = ref_addr->template AsMirrorPtr<kIsVolatile>();
       MirrorType* old_ref = ref;
       // The heap or the collector can be null at startup. TODO: avoid the need for this null check.
       gc::Heap* heap = Runtime::Current()->GetHeap();
@@ -93,7 +94,7 @@
     }
   } else {
     // No read barrier.
-    return ref_addr->AsMirrorPtr();
+    return ref_addr->template AsMirrorPtr<kIsVolatile>();
   }
 }
 
diff --git a/runtime/read_barrier.h b/runtime/read_barrier.h
index d36acbc..8a106aa 100644
--- a/runtime/read_barrier.h
+++ b/runtime/read_barrier.h
@@ -46,9 +46,13 @@
   // fast-debug environment.
   DECLARE_RUNTIME_DEBUG_FLAG(kEnableReadBarrierInvariantChecks);
 
+  // Return the reference at ref_addr, invoking read barrier as appropriate.
+  // Ref_addr is an address within obj.
   // It's up to the implementation whether the given field gets updated whereas the return value
   // must be an updated reference unless kAlwaysUpdateField is true.
-  template <typename MirrorType, ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
+  template <typename MirrorType,
+            bool kIsVolatile,
+            ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
             bool kAlwaysUpdateField = false>
   ALWAYS_INLINE static MirrorType* Barrier(
       mirror::Object* obj, MemberOffset offset, mirror::HeapReference<MirrorType>* ref_addr)
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 19df0d2..2e06536 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -634,7 +634,7 @@
 void StackVisitor::SanityCheckFrame() const {
   if (kIsDebugBuild) {
     ArtMethod* method = GetMethod();
-    auto* declaring_class = method->GetDeclaringClass();
+    mirror::Class* declaring_class = method->GetDeclaringClass();
     // Runtime methods have null declaring class.
     if (!method->IsRuntimeMethod()) {
       CHECK(declaring_class != nullptr);
@@ -647,11 +647,14 @@
     LinearAlloc* const linear_alloc = runtime->GetLinearAlloc();
     if (!linear_alloc->Contains(method)) {
       // Check class linker linear allocs.
-      mirror::Class* klass = method->GetDeclaringClass();
+      // We get the canonical method as copied methods may have their declaring
+      // class from another class loader.
+      ArtMethod* canonical = method->GetCanonicalMethod();
+      mirror::Class* klass = canonical->GetDeclaringClass();
       LinearAlloc* const class_linear_alloc = (klass != nullptr)
           ? runtime->GetClassLinker()->GetAllocatorForClassLoader(klass->GetClassLoader())
           : linear_alloc;
-      if (!class_linear_alloc->Contains(method)) {
+      if (!class_linear_alloc->Contains(canonical)) {
         // Check image space.
         bool in_image = false;
         for (auto& space : runtime->GetHeap()->GetContinuousSpaces()) {
@@ -660,14 +663,14 @@
             const auto& header = image_space->GetImageHeader();
             const ImageSection& methods = header.GetMethodsSection();
             const ImageSection& runtime_methods = header.GetRuntimeMethodsSection();
-            const size_t offset =  reinterpret_cast<const uint8_t*>(method) - image_space->Begin();
+            const size_t offset =  reinterpret_cast<const uint8_t*>(canonical) - image_space->Begin();
             if (methods.Contains(offset) || runtime_methods.Contains(offset)) {
               in_image = true;
               break;
             }
           }
         }
-        CHECK(in_image) << method->PrettyMethod() << " not in linear alloc or image";
+        CHECK(in_image) << canonical->PrettyMethod() << " not in linear alloc or image";
       }
     }
     if (cur_quick_frame_ != nullptr) {
diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java
index 5103540..f466eea 100644
--- a/test/449-checker-bce/src/Main.java
+++ b/test/449-checker-bce/src/Main.java
@@ -876,6 +876,91 @@
     return true;
   }
 
+  /// CHECK-START: void Main.modArrayIndex1(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArraySet
+
+  /// CHECK-START: void Main.modArrayIndex1(int[]) BCE (after)
+  /// CHECK-NOT: Deoptimize
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArraySet
+  public static void modArrayIndex1(int[] array) {
+    for(int i = 0; i < 100; i++) {
+      // Cannot statically eliminate, for example, when array.length == 5.
+      // Currently dynamic BCE isn't applied for this case.
+      array[i % 10] = i;
+      // Can be eliminated by BCE.
+      array[i % array.length] = i;
+    }
+  }
+
+  /// CHECK-START: void Main.modArrayIndex2(int[], int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArraySet
+
+  /// CHECK-START: void Main.modArrayIndex2(int[], int) BCE (after)
+  /// CHECK-NOT: Deoptimize
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArraySet
+  public static void modArrayIndex2(int array[], int index) {
+    for(int i = 0; i < 100; i++) {
+      // Both bounds checks cannot be statically eliminated, because index can be < 0.
+      // Currently dynamic BCE isn't applied for this case.
+      array[(index+i) % 10] = i;
+      array[(index+i) % array.length] = i;
+    }
+  }
+
+  static final int[] staticArray = new int[10];
+
+  /// CHECK-START: void Main.modArrayIndex3() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArraySet
+
+  /// CHECK-START: void Main.modArrayIndex3() BCE (after)
+  /// CHECK-NOT: Deoptimize
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArraySet
+  public static void modArrayIndex3() {
+    for(int i = 0; i < 100; i++) {
+      // Currently dynamic BCE isn't applied for this case.
+      staticArray[i % 10] = i;
+      // Can be eliminated by BCE.
+      staticArray[i % staticArray.length] = i;
+    }
+  }
+
+  /// CHECK-START: void Main.modArrayIndex4() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArraySet
+
+  /// CHECK-START: void Main.modArrayIndex4() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArraySet
+  public static void modArrayIndex4() {
+    int[] array = new int[20];
+    for(int i = 0; i < 100; i++) {
+      // The local array length is statically know. Both can be eliminated by BCE.
+      array[i % 10] = i;
+      array[i % array.length] = i;
+    }
+  }
 
   /// CHECK-START: void Main.bubbleSort(int[]) GVN (before)
   /// CHECK: BoundsCheck
diff --git a/test/661-checker-simd-reduc/expected.txt b/test/661-checker-simd-reduc/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/661-checker-simd-reduc/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/661-checker-simd-reduc/info.txt b/test/661-checker-simd-reduc/info.txt
new file mode 100644
index 0000000..4d071fb
--- /dev/null
+++ b/test/661-checker-simd-reduc/info.txt
@@ -0,0 +1 @@
+Functional tests on vectorization of the most basic reductions.
diff --git a/test/661-checker-simd-reduc/src/Main.java b/test/661-checker-simd-reduc/src/Main.java
new file mode 100644
index 0000000..741b5fa
--- /dev/null
+++ b/test/661-checker-simd-reduc/src/Main.java
@@ -0,0 +1,292 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for simple integral reductions: same type for accumulator and data.
+ */
+public class Main {
+
+  static final int N = 500;
+
+  //
+  // Basic reductions in loops.
+  //
+
+  // TODO: vectorize these (second step of b/64091002 plan)
+
+  private static byte reductionByte(byte[] x) {
+    byte sum = 0;
+    for (int i = 0; i < x.length; i++) {
+      sum += x[i];
+    }
+    return sum;
+  }
+
+  private static short reductionShort(short[] x) {
+    short sum = 0;
+    for (int i = 0; i < x.length; i++) {
+      sum += x[i];
+    }
+    return sum;
+  }
+
+  private static char reductionChar(char[] x) {
+    char sum = 0;
+    for (int i = 0; i < x.length; i++) {
+      sum += x[i];
+    }
+    return sum;
+  }
+
+  private static int reductionInt(int[] x) {
+    int sum = 0;
+    for (int i = 0; i < x.length; i++) {
+      sum += x[i];
+    }
+    return sum;
+  }
+
+  private static long reductionLong(long[] x) {
+    long sum = 0;
+    for (int i = 0; i < x.length; i++) {
+      sum += x[i];
+    }
+    return sum;
+  }
+
+  private static byte reductionMinusByte(byte[] x) {
+    byte sum = 0;
+    for (int i = 0; i < x.length; i++) {
+      sum -= x[i];
+    }
+    return sum;
+  }
+
+  private static short reductionMinusShort(short[] x) {
+    short sum = 0;
+    for (int i = 0; i < x.length; i++) {
+      sum -= x[i];
+    }
+    return sum;
+  }
+
+  private static char reductionMinusChar(char[] x) {
+    char sum = 0;
+    for (int i = 0; i < x.length; i++) {
+      sum -= x[i];
+    }
+    return sum;
+  }
+
+  private static int reductionMinusInt(int[] x) {
+    int sum = 0;
+    for (int i = 0; i < x.length; i++) {
+      sum -= x[i];
+    }
+    return sum;
+  }
+
+  private static long reductionMinusLong(long[] x) {
+    long sum = 0;
+    for (int i = 0; i < x.length; i++) {
+      sum -= x[i];
+    }
+    return sum;
+  }
+
+  private static byte reductionMinByte(byte[] x) {
+    byte min = Byte.MAX_VALUE;
+    for (int i = 0; i < x.length; i++) {
+      min = (byte) Math.min(min, x[i]);
+    }
+    return min;
+  }
+
+  private static short reductionMinShort(short[] x) {
+    short min = Short.MAX_VALUE;
+    for (int i = 0; i < x.length; i++) {
+      min = (short) Math.min(min, x[i]);
+    }
+    return min;
+  }
+
+  private static char reductionMinChar(char[] x) {
+    char min = Character.MAX_VALUE;
+    for (int i = 0; i < x.length; i++) {
+      min = (char) Math.min(min, x[i]);
+    }
+    return min;
+  }
+
+  private static int reductionMinInt(int[] x) {
+    int min = Integer.MAX_VALUE;
+    for (int i = 0; i < x.length; i++) {
+      min = Math.min(min, x[i]);
+    }
+    return min;
+  }
+
+  private static long reductionMinLong(long[] x) {
+    long min = Long.MAX_VALUE;
+    for (int i = 0; i < x.length; i++) {
+      min = Math.min(min, x[i]);
+    }
+    return min;
+  }
+
+  private static byte reductionMaxByte(byte[] x) {
+    byte max = Byte.MIN_VALUE;
+    for (int i = 0; i < x.length; i++) {
+      max = (byte) Math.max(max, x[i]);
+    }
+    return max;
+  }
+
+  private static short reductionMaxShort(short[] x) {
+    short max = Short.MIN_VALUE;
+    for (int i = 0; i < x.length; i++) {
+      max = (short) Math.max(max, x[i]);
+    }
+    return max;
+  }
+
+  private static char reductionMaxChar(char[] x) {
+    char max = Character.MIN_VALUE;
+    for (int i = 0; i < x.length; i++) {
+      max = (char) Math.max(max, x[i]);
+    }
+    return max;
+  }
+
+  private static int reductionMaxInt(int[] x) {
+    int max = Integer.MIN_VALUE;
+    for (int i = 0; i < x.length; i++) {
+      max = Math.max(max, x[i]);
+    }
+    return max;
+  }
+
+  private static long reductionMaxLong(long[] x) {
+    long max = Long.MIN_VALUE;
+    for (int i = 0; i < x.length; i++) {
+      max = Math.max(max, x[i]);
+    }
+    return max;
+  }
+
+  //
+  // A few special cases.
+  //
+
+  // TODO: consider unrolling
+
+  private static int reductionInt10(int[] x) {
+    int sum = 0;
+    // Amenable to complete unrolling.
+    for (int i = 10; i <= 10; i++) {
+      sum += x[i];
+    }
+    return sum;
+  }
+
+  private static int reductionMinusInt10(int[] x) {
+    int sum = 0;
+    // Amenable to complete unrolling.
+    for (int i = 10; i <= 10; i++) {
+      sum -= x[i];
+    }
+    return sum;
+  }
+
+  private static int reductionMinInt10(int[] x) {
+    int min = Integer.MAX_VALUE;
+    // Amenable to complete unrolling.
+    for (int i = 10; i <= 10; i++) {
+      min = Math.min(min, x[i]);
+    }
+    return min;
+  }
+
+  private static int reductionMaxInt10(int[] x) {
+    int max = Integer.MIN_VALUE;
+    // Amenable to complete unrolling.
+    for (int i = 10; i <= 10; i++) {
+      max = Math.max(max, x[i]);
+    }
+    return max;
+  }
+
+  //
+  // Main driver.
+  //
+
+  public static void main(String[] args) {
+    byte[] xb = new byte[N];
+    short[] xs = new short[N];
+    char[] xc = new char[N];
+    int[] xi = new int[N];
+    long[] xl = new long[N];
+    for (int i = 0, k = -17; i < N; i++, k += 3) {
+      xb[i] = (byte) k;
+      xs[i] = (short) k;
+      xc[i] = (char) k;
+      xi[i] = k;
+      xl[i] = k;
+    }
+
+    // Test various reductions in loops.
+    expectEquals(-74, reductionByte(xb));
+    expectEquals(-27466, reductionShort(xs));
+    expectEquals(38070, reductionChar(xc));
+    expectEquals(365750, reductionInt(xi));
+    expectEquals(365750L, reductionLong(xl));
+    expectEquals(74, reductionMinusByte(xb));
+    expectEquals(27466, reductionMinusShort(xs));
+    expectEquals(27466, reductionMinusChar(xc));
+    expectEquals(-365750, reductionMinusInt(xi));
+    expectEquals(-365750L, reductionMinusLong(xl));
+    expectEquals(-128, reductionMinByte(xb));
+    expectEquals(-17, reductionMinShort(xs));
+    expectEquals(1, reductionMinChar(xc));
+    expectEquals(-17, reductionMinInt(xi));
+    expectEquals(-17L, reductionMinLong(xl));
+    expectEquals(127, reductionMaxByte(xb));
+    expectEquals(1480, reductionMaxShort(xs));
+    expectEquals(65534, reductionMaxChar(xc));
+    expectEquals(1480, reductionMaxInt(xi));
+    expectEquals(1480L, reductionMaxLong(xl));
+
+    // Test special cases.
+    expectEquals(13, reductionInt10(xi));
+    expectEquals(-13, reductionMinusInt10(xi));
+    expectEquals(13, reductionMinInt10(xi));
+    expectEquals(13, reductionMaxInt10(xi));
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/661-classloader-allocator/expected.txt b/test/661-classloader-allocator/expected.txt
new file mode 100644
index 0000000..6a5618e
--- /dev/null
+++ b/test/661-classloader-allocator/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/661-classloader-allocator/info.txt b/test/661-classloader-allocator/info.txt
new file mode 100644
index 0000000..872b5e0
--- /dev/null
+++ b/test/661-classloader-allocator/info.txt
@@ -0,0 +1,3 @@
+Regression test for copied methods which used to not
+be (re-)allocated with the right class loader allocator,
+which crashed the JIT profiler.
diff --git a/test/661-classloader-allocator/src-ex/OtherClass.java b/test/661-classloader-allocator/src-ex/OtherClass.java
new file mode 100644
index 0000000..e59cb95
--- /dev/null
+++ b/test/661-classloader-allocator/src-ex/OtherClass.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package p1;
+
+interface Itf {
+  public default int defaultMethod() {
+    return 42;
+  }
+}
+
+public class OtherClass implements Itf {
+  public void foo() {
+  }
+}
diff --git a/test/661-classloader-allocator/src/Main.java b/test/661-classloader-allocator/src/Main.java
new file mode 100644
index 0000000..40f8f7a
--- /dev/null
+++ b/test/661-classloader-allocator/src/Main.java
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Constructor;
+
+public class Main {
+  static final String DEX_FILE =
+      System.getenv("DEX_LOCATION") + "/661-classloader-allocator-ex.jar";
+  static final String LIBRARY_SEARCH_PATH = System.getProperty("java.library.path");
+
+  private static void doUnloading() {
+    // Stop the JIT to ensure its threads and work queue are not keeping classes
+    // artifically alive.
+    stopJit();
+    // Do multiple GCs to prevent rare flakiness if some other thread is keeping the
+    // classloader live.
+    for (int i = 0; i < 5; ++i) {
+      Runtime.getRuntime().gc();
+    }
+    startJit();
+  }
+
+  public static void main(String[] args) throws Exception {
+    System.loadLibrary(args[0]);
+    loadClass();
+    doUnloading();
+    // fetchProfiles iterate over the ProfilingInfo, we used to crash in the presence
+    // of unloaded copied methods.
+    fetchProfiles();
+  }
+
+  public static void loadClass() throws Exception {
+    Class<?> pathClassLoader = Class.forName("dalvik.system.PathClassLoader");
+    if (pathClassLoader == null) {
+      throw new AssertionError("Couldn't find path class loader class");
+    }
+    Constructor<?> constructor =
+      pathClassLoader.getDeclaredConstructor(String.class, String.class, ClassLoader.class);
+    ClassLoader loader = (ClassLoader) constructor.newInstance(
+      DEX_FILE, LIBRARY_SEARCH_PATH, ClassLoader.getSystemClassLoader());
+    Class<?> otherClass = loader.loadClass("p1.OtherClass");
+    ensureJitCompiled(otherClass, "foo");
+  }
+
+  public static native void ensureJitCompiled(Class<?> cls, String methodName);
+  public static native void fetchProfiles();
+  public static native void stopJit();
+  public static native void startJit();
+}
diff --git a/test/common/runtime_state.cc b/test/common/runtime_state.cc
index 7c0ed69..60c7315 100644
--- a/test/common/runtime_state.cc
+++ b/test/common/runtime_state.cc
@@ -254,4 +254,17 @@
   return Runtime::Current()->GetNumberOfDeoptimizations();
 }
 
+extern "C" JNIEXPORT void JNICALL Java_Main_fetchProfiles(JNIEnv*, jclass) {
+  jit::Jit* jit = GetJitIfEnabled();
+  if (jit == nullptr) {
+    return;
+  }
+  jit::JitCodeCache* code_cache = jit->GetCodeCache();
+  std::vector<ProfileMethodInfo> unused_vector;
+  std::set<std::string> unused_locations;
+  unused_locations.insert("fake_location");
+  ScopedObjectAccess soa(Thread::Current());
+  code_cache->GetProfiledMethods(unused_locations, unused_vector);
+}
+
 }  // namespace art
diff --git a/tools/art b/tools/art
index dea7ebd..15993dd 100644
--- a/tools/art
+++ b/tools/art
@@ -23,6 +23,7 @@
 LIBART=libart.so
 JIT_PROFILE="no"
 VERBOSE="no"
+CLEAN_OAT_FILES="yes"
 EXTRA_OPTIONS=()
 
 # Follow all sym links to get the program name.
@@ -87,6 +88,7 @@
                            report upon completion.
   --profile                Run with profiling, then run using profile data.
   --verbose                Run script verbosely.
+  --no-clean               Don't cleanup oat directories.
 
 The ART_OPTIONS are passed directly to the Android Runtime.
 
@@ -176,12 +178,14 @@
 # Input: Command line arguments to the art script.
 # e.g. -cp foo/classes.dex:bar/classes.dex would delete (foo/oat bar/oat) directories.
 cleanup_oat_directory_for_classpath() {
-  # First try: Use $CLASSPATH environment variable.
-  parse_classpath "$CLASSPATH"
-  # Second try: Look for latest -cp or -classpath arg which will take precedence.
-  find_cp_in_args "$@"
+  if [ "$CLEAN_OAT_FILES" = "yes" ]; then
+    # First try: Use $CLASSPATH environment variable.
+    parse_classpath "$CLASSPATH"
+    # Second try: Look for latest -cp or -classpath arg which will take precedence.
+    find_cp_in_args "$@"
 
-  cleanup_oat_directory "${PARSE_CLASSPATH_RESULT[@]}"
+    cleanup_oat_directory "${PARSE_CLASSPATH_RESULT[@]}"
+  fi
 }
 
 # Attempt to find $ANDROID_ROOT/framework/<isa>/core.art' without knowing what <isa> is.
@@ -291,6 +295,9 @@
   --verbose)
     VERBOSE="yes"
     ;;
+  --no-clean)
+    CLEAN_OAT_FILES="no"
+    ;;
   --*)
     echo "unknown option: $1" 1>&2
     usage
diff --git a/tools/libcore_gcstress_debug_failures.txt b/tools/libcore_gcstress_debug_failures.txt
index d4a0423..5806b61 100644
--- a/tools/libcore_gcstress_debug_failures.txt
+++ b/tools/libcore_gcstress_debug_failures.txt
@@ -8,7 +8,8 @@
   description: "Timeouts on target with gcstress and debug.",
   result: EXEC_FAILED,
   modes: [device],
-  names: ["libcore.icu.TransliteratorTest#testAll",
+  names: ["jsr166.CompletableFutureTest#testCompleteOnTimeout_completed",
+          "libcore.icu.TransliteratorTest#testAll",
           "libcore.icu.RelativeDateTimeFormatterTest#test_bug25821045",
           "libcore.java.lang.ref.ReferenceQueueTest#testRemoveWithDelayedResultAndTimeout",
           "libcore.java.lang.ref.ReferenceQueueTest#testRemoveWithDelayedResultAndNoTimeout",
diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh
index d2322bb..355646b 100755
--- a/tools/run-jdwp-tests.sh
+++ b/tools/run-jdwp-tests.sh
@@ -122,6 +122,9 @@
   fi
 done
 
+# Make sure the debuggee doesn't clean up what the debugger has generated.
+art_debugee="$art_debugee --no-clean"
+
 # For the host:
 #
 # If, on the other hand, there is a variant set, use it to modify the art_debugee parameter to