Merge "MIPS64: Improve string and class loads."
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index d59d8f6..bd7f900 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -26,6 +26,8 @@
   AbstractMethod \
   AllFields \
   DexToDexDecompiler \
+  ErroneousA \
+  ErroneousB \
   ExceptionHandle \
   GetMethodSignature \
   ImageLayoutA \
@@ -85,7 +87,7 @@
 ART_GTEST_dex2oat_environment_tests_DEX_DEPS := Main MainStripped MultiDex MultiDexModifiedSecondary Nested
 
 ART_GTEST_atomic_method_ref_map_test_DEX_DEPS := Interfaces
-ART_GTEST_class_linker_test_DEX_DEPS := Interfaces MethodTypes MultiDex MyClass Nested Statics StaticsFromCode
+ART_GTEST_class_linker_test_DEX_DEPS := ErroneousA ErroneousB Interfaces MethodTypes MultiDex MyClass Nested Statics StaticsFromCode
 ART_GTEST_class_table_test_DEX_DEPS := XandY
 ART_GTEST_compiler_driver_test_DEX_DEPS := AbstractMethod StaticLeafMethods ProfileTestMultiDex
 ART_GTEST_dex_cache_test_DEX_DEPS := Main Packages MethodTypes
diff --git a/compiler/Android.bp b/compiler/Android.bp
index 2eb6fba..46f3358 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -49,6 +49,7 @@
         "optimizing/block_builder.cc",
         "optimizing/bounds_check_elimination.cc",
         "optimizing/builder.cc",
+        "optimizing/cha_guard_optimization.cc",
         "optimizing/code_generator.cc",
         "optimizing/code_generator_utils.cc",
         "optimizing/constant_folding.cc",
diff --git a/compiler/compiled_class.h b/compiler/compiled_class.h
index b88d613..06ce946 100644
--- a/compiler/compiled_class.h
+++ b/compiler/compiled_class.h
@@ -28,8 +28,11 @@
   mirror::Class::Status GetStatus() const {
     return status_;
   }
+  void SetStatus(mirror::Class::Status status) {
+    status_ = status;
+  }
  private:
-  const mirror::Class::Status status_;
+  mirror::Class::Status status_;
 };
 
 }  // namespace art
diff --git a/compiler/dex/dex_to_dex_decompiler.cc b/compiler/dex/dex_to_dex_decompiler.cc
index 051125e..bfd485d 100644
--- a/compiler/dex/dex_to_dex_decompiler.cc
+++ b/compiler/dex/dex_to_dex_decompiler.cc
@@ -27,10 +27,13 @@
 
 class DexDecompiler {
  public:
-  DexDecompiler(const DexFile::CodeItem& code_item, const ArrayRef<const uint8_t>& quickened_info)
+  DexDecompiler(const DexFile::CodeItem& code_item,
+                const ArrayRef<const uint8_t>& quickened_info,
+                bool decompile_return_instruction)
     : code_item_(code_item),
       quickened_info_ptr_(quickened_info.data()),
-      quickened_info_end_(quickened_info.data() + quickened_info.size()) {}
+      quickened_info_end_(quickened_info.data() + quickened_info.size()),
+      decompile_return_instruction_(decompile_return_instruction) {}
 
   bool Decompile();
 
@@ -87,6 +90,7 @@
   const DexFile::CodeItem& code_item_;
   const uint8_t* quickened_info_ptr_;
   const uint8_t* const quickened_info_end_;
+  const bool decompile_return_instruction_;
 
   DISALLOW_COPY_AND_ASSIGN(DexDecompiler);
 };
@@ -102,7 +106,9 @@
 
     switch (inst->Opcode()) {
       case Instruction::RETURN_VOID_NO_BARRIER:
-        inst->SetOpcode(Instruction::RETURN_VOID);
+        if (decompile_return_instruction_) {
+          inst->SetOpcode(Instruction::RETURN_VOID);
+        }
         break;
 
       case Instruction::NOP:
@@ -189,8 +195,12 @@
 }
 
 bool ArtDecompileDEX(const DexFile::CodeItem& code_item,
-                     const ArrayRef<const uint8_t>& quickened_info) {
-  DexDecompiler decompiler(code_item, quickened_info);
+                     const ArrayRef<const uint8_t>& quickened_info,
+                     bool decompile_return_instruction) {
+  if (quickened_info.size() == 0 && !decompile_return_instruction) {
+    return true;
+  }
+  DexDecompiler decompiler(code_item, quickened_info, decompile_return_instruction);
   return decompiler.Decompile();
 }
 
diff --git a/compiler/dex/dex_to_dex_decompiler.h b/compiler/dex/dex_to_dex_decompiler.h
index 5502ca2..b5d5b91 100644
--- a/compiler/dex/dex_to_dex_decompiler.h
+++ b/compiler/dex/dex_to_dex_decompiler.h
@@ -30,7 +30,8 @@
 // consistent with DexToDexCompiler, but we should really change it to
 // DexFile::CodeItem*.
 bool ArtDecompileDEX(const DexFile::CodeItem& code_item,
-                     const ArrayRef<const uint8_t>& quickened_data);
+                     const ArrayRef<const uint8_t>& quickened_data,
+                     bool decompile_return_instruction);
 
 }  // namespace optimizer
 }  // namespace art
diff --git a/compiler/dex/dex_to_dex_decompiler_test.cc b/compiler/dex/dex_to_dex_decompiler_test.cc
index ea6c7a2..9a8d27c 100644
--- a/compiler/dex/dex_to_dex_decompiler_test.cc
+++ b/compiler/dex/dex_to_dex_decompiler_test.cc
@@ -102,7 +102,8 @@
         if (compiled_method != nullptr) {
           table = compiled_method->GetVmapTable();
         }
-        optimizer::ArtDecompileDEX(*it.GetMethodCodeItem(), table);
+        optimizer::ArtDecompileDEX(
+            *it.GetMethodCodeItem(), table, /* decompile_return_instruction */ true);
         it.Next();
       }
       while (it.HasNextVirtualMethod()) {
@@ -113,7 +114,8 @@
         if (compiled_method != nullptr) {
           table = compiled_method->GetVmapTable();
         }
-        optimizer::ArtDecompileDEX(*it.GetMethodCodeItem(), table);
+        optimizer::ArtDecompileDEX(
+            *it.GetMethodCodeItem(), table, /* decompile_return_instruction */ true);
         it.Next();
       }
       DCHECK(!it.HasNext());
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 1b1de78..ec1642e 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -423,7 +423,7 @@
   // Compile:
   // 1) Compile all classes and methods enabled for compilation. May fall back to dex-to-dex
   //    compilation.
-  if (!GetCompilerOptions().VerifyAtRuntime()) {
+  if (!GetCompilerOptions().VerifyAtRuntime() && !GetCompilerOptions().VerifyOnlyProfile()) {
     Compile(class_loader, dex_files, timings);
   }
   if (dump_stats_) {
@@ -435,7 +435,8 @@
 
 // In-place unquicken the given `dex_files` based on `quickening_info`.
 static void Unquicken(const std::vector<const DexFile*>& dex_files,
-                      const ArrayRef<const uint8_t>& quickening_info) {
+                      const ArrayRef<const uint8_t>& quickening_info,
+                      bool decompile_return_instruction) {
   const uint8_t* quickening_info_ptr = quickening_info.data();
   const uint8_t* const quickening_info_end = quickening_info.data() + quickening_info.size();
   for (const DexFile* dex_file : dex_files) {
@@ -454,14 +455,14 @@
         it.Next();
       }
 
-      // Unquicken each method.
       while (it.HasNextDirectMethod()) {
         const DexFile::CodeItem* code_item = it.GetMethodCodeItem();
         if (code_item != nullptr) {
           uint32_t quickening_size = *reinterpret_cast<const uint32_t*>(quickening_info_ptr);
           quickening_info_ptr += sizeof(uint32_t);
-          optimizer::ArtDecompileDEX(
-              *code_item, ArrayRef<const uint8_t>(quickening_info_ptr, quickening_size));
+          optimizer::ArtDecompileDEX(*code_item,
+                                     ArrayRef<const uint8_t>(quickening_info_ptr, quickening_size),
+                                     decompile_return_instruction);
           quickening_info_ptr += quickening_size;
         }
         it.Next();
@@ -472,8 +473,9 @@
         if (code_item != nullptr) {
           uint32_t quickening_size = *reinterpret_cast<const uint32_t*>(quickening_info_ptr);
           quickening_info_ptr += sizeof(uint32_t);
-          optimizer::ArtDecompileDEX(
-              *code_item, ArrayRef<const uint8_t>(quickening_info_ptr, quickening_size));
+          optimizer::ArtDecompileDEX(*code_item,
+                                     ArrayRef<const uint8_t>(quickening_info_ptr, quickening_size),
+                                     decompile_return_instruction);
           quickening_info_ptr += quickening_size;
         }
         it.Next();
@@ -492,7 +494,11 @@
     // TODO: we unquicken unconditionnally, as we don't know
     // if the boot image has changed. How exactly we'll know is under
     // experimentation.
-    Unquicken(dex_files, vdex_file->GetQuickeningInfo());
+    TimingLogger::ScopedTiming t("Unquicken", timings);
+    // We do not decompile a RETURN_VOID_NO_BARRIER into a RETURN_VOID, as the quickening
+    // optimization does not depend on the boot image (the optimization relies on not
+    // having final fields in a class, which does not change for an app).
+    Unquicken(dex_files, vdex_file->GetQuickeningInfo(), /* decompile_return_instruction */ false);
     Runtime::Current()->GetCompilerCallbacks()->SetVerifierDeps(
         new verifier::VerifierDeps(dex_files, vdex_file->GetVerifierDepsData()));
   }
@@ -983,8 +989,10 @@
                << "situations. Please check the log.";
   }
 
-  InitializeClasses(class_loader, dex_files, timings);
-  VLOG(compiler) << "InitializeClasses: " << GetMemoryUsageString(false);
+  if (!verify_only_profile) {
+    InitializeClasses(class_loader, dex_files, timings);
+    VLOG(compiler) << "InitializeClasses: " << GetMemoryUsageString(false);
+  }
 
   UpdateImageClasses(timings);
   VLOG(compiler) << "UpdateImageClasses: " << GetMemoryUsageString(false);
@@ -2060,21 +2068,32 @@
         std::set<dex::TypeIndex> set(unverified_classes.begin(), unverified_classes.end());
         for (uint32_t i = 0; i < dex_file->NumClassDefs(); ++i) {
           const DexFile::ClassDef& class_def = dex_file->GetClassDef(i);
-          const char* descriptor = dex_file->GetClassDescriptor(class_def);
-          cls.Assign(class_linker->FindClass(soa.Self(), descriptor, class_loader));
-          if (cls.Get() == nullptr) {
-            CHECK(soa.Self()->IsExceptionPending());
-            soa.Self()->ClearException();
-          } else if (set.find(class_def.class_idx_) == set.end()) {
-            ObjectLock<mirror::Class> lock(soa.Self(), cls);
-            mirror::Class::SetStatus(cls, mirror::Class::kStatusVerified, soa.Self());
-            // Create `VerifiedMethod`s for each methods, the compiler expects one for
-            // quickening or compiling.
-            // Note that this means:
-            // - We're only going to compile methods that did verify.
-            // - Quickening will not do checkcast ellision.
-            // TODO(ngeoffray): Reconsider this once we refactor compiler filters.
-            PopulateVerifiedMethods(*dex_file, i, verification_results_);
+          if (set.find(class_def.class_idx_) == set.end()) {
+            if (GetCompilerOptions().VerifyOnlyProfile()) {
+              // Just update the compiled_classes_ map. The compiler doesn't need to resolve
+              // the type.
+              compiled_classes_.Overwrite(
+                  ClassReference(dex_file, i), new CompiledClass(mirror::Class::kStatusVerified));
+            } else {
+              // Resolve the type, so later compilation stages know they don't need to verify
+              // the class.
+              const char* descriptor = dex_file->GetClassDescriptor(class_def);
+              cls.Assign(class_linker->FindClass(soa.Self(), descriptor, class_loader));
+              if (cls.Get() != nullptr) {
+                ObjectLock<mirror::Class> lock(soa.Self(), cls);
+                mirror::Class::SetStatus(cls, mirror::Class::kStatusVerified, soa.Self());
+              } else {
+                DCHECK(soa.Self()->IsExceptionPending());
+                soa.Self()->ClearException();
+              }
+              // Create `VerifiedMethod`s for each methods, the compiler expects one for
+              // quickening or compiling.
+              // Note that this means:
+              // - We're only going to compile methods that did verify.
+              // - Quickening will not do checkcast ellision.
+              // TODO(ngeoffray): Reconsider this once we refactor compiler filters.
+              PopulateVerifiedMethods(*dex_file, i, verification_results_);
+            }
           }
         }
       }
@@ -2675,29 +2694,29 @@
 }
 
 void CompilerDriver::RecordClassStatus(ClassReference ref, mirror::Class::Status status) {
+  switch (status) {
+    case mirror::Class::kStatusNotReady:
+    case mirror::Class::kStatusError:
+    case mirror::Class::kStatusRetryVerificationAtRuntime:
+    case mirror::Class::kStatusVerified:
+    case mirror::Class::kStatusInitialized:
+    case mirror::Class::kStatusResolved:
+      break;  // Expected states.
+    default:
+      LOG(FATAL) << "Unexpected class status for class "
+          << PrettyDescriptor(ref.first->GetClassDescriptor(ref.first->GetClassDef(ref.second)))
+          << " of " << status;
+  }
+
   MutexLock mu(Thread::Current(), compiled_classes_lock_);
   auto it = compiled_classes_.find(ref);
-  if (it == compiled_classes_.end() || it->second->GetStatus() != status) {
-    // An entry doesn't exist or the status is lower than the new status.
-    if (it != compiled_classes_.end()) {
-      CHECK_GT(status, it->second->GetStatus());
-      delete it->second;
-    }
-    switch (status) {
-      case mirror::Class::kStatusNotReady:
-      case mirror::Class::kStatusError:
-      case mirror::Class::kStatusRetryVerificationAtRuntime:
-      case mirror::Class::kStatusVerified:
-      case mirror::Class::kStatusInitialized:
-      case mirror::Class::kStatusResolved:
-        break;  // Expected states.
-      default:
-        LOG(FATAL) << "Unexpected class status for class "
-            << PrettyDescriptor(ref.first->GetClassDescriptor(ref.first->GetClassDef(ref.second)))
-            << " of " << status;
-    }
+  if (it == compiled_classes_.end()) {
     CompiledClass* compiled_class = new CompiledClass(status);
     compiled_classes_.Overwrite(ref, compiled_class);
+  } else if (status > it->second->GetStatus()) {
+    // Update the status if we now have a greater one. This happens with vdex,
+    // which records a class is verified, but does not resolve it.
+    it->second->SetStatus(status);
   }
 }
 
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index cc50197..eb69931 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -440,9 +440,12 @@
                       TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_);
 
+  // NO_THREAD_SAFETY_ANALYSIS as the method accesses a guarded value in a
+  // single-threaded way.
   void Verify(jobject class_loader,
               const std::vector<const DexFile*>& dex_files,
-              TimingLogger* timings);
+              TimingLogger* timings)
+    NO_THREAD_SAFETY_ANALYSIS;
 
   void VerifyDexFile(jobject class_loader,
                      const DexFile& dex_file,
diff --git a/compiler/optimizing/cha_guard_optimization.cc b/compiler/optimizing/cha_guard_optimization.cc
new file mode 100644
index 0000000..fe42301
--- /dev/null
+++ b/compiler/optimizing/cha_guard_optimization.cc
@@ -0,0 +1,253 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "cha_guard_optimization.h"
+
+namespace art {
+
+// Note we can only do CHA guard elimination/motion in a single pass, since
+// if a guard is not removed, another guard might be removed due to
+// the existence of the first guard. The first guard should not be further
+// removed in another pass. For example, due to further optimizations,
+// a receiver of a guard might turn out to be a parameter value, or defined at
+// a different site, which makes the guard removable as a result. However
+// it's not safe to remove the guard in another pass since another guard might
+// have been removed due to the existence of this guard.
+//
+// As a consequence, we decided not to rely on other passes to remove them
+// (such as GVN or instruction simplifier).
+
+class CHAGuardVisitor : HGraphVisitor {
+ public:
+  explicit CHAGuardVisitor(HGraph* graph)
+      : HGraphVisitor(graph),
+        block_has_cha_guard_(GetGraph()->GetBlocks().size(),
+                             0,
+                             graph->GetArena()->Adapter(kArenaAllocCHA)) {
+    number_of_guards_to_visit_ = GetGraph()->GetNumberOfCHAGuards();
+    DCHECK_NE(number_of_guards_to_visit_, 0u);
+    // Will recount number of guards during guard optimization.
+    GetGraph()->SetNumberOfCHAGuards(0);
+  }
+
+  void VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) OVERRIDE;
+
+  void VisitBasicBlock(HBasicBlock* block) OVERRIDE;
+
+ private:
+  void RemoveGuard(HShouldDeoptimizeFlag* flag);
+  // Return true if `flag` is removed.
+  bool OptimizeForParameter(HShouldDeoptimizeFlag* flag, HInstruction* receiver);
+  // Return true if `flag` is removed.
+  bool OptimizeWithDominatingGuard(HShouldDeoptimizeFlag* flag, HInstruction* receiver);
+  // Return true if `flag` is hoisted.
+  bool HoistGuard(HShouldDeoptimizeFlag* flag, HInstruction* receiver);
+
+  // Record if each block has any CHA guard. It's updated during the
+  // reverse post order visit. Use int instead of bool since ArenaVector
+  // does not support bool.
+  ArenaVector<int> block_has_cha_guard_;
+
+  // The iterator that's being used for this visitor. Need it to manually
+  // advance the iterator due to removing/moving more than one instruction.
+  HInstructionIterator* instruction_iterator_;
+
+  // Used to short-circuit the pass when there is no more guards left to visit.
+  uint32_t number_of_guards_to_visit_;
+
+  DISALLOW_COPY_AND_ASSIGN(CHAGuardVisitor);
+};
+
+void CHAGuardVisitor::VisitBasicBlock(HBasicBlock* block) {
+  if (number_of_guards_to_visit_ == 0) {
+    return;
+  }
+  // Skip phis, just iterate through instructions.
+  HInstructionIterator it(block->GetInstructions());
+  instruction_iterator_ = &it;
+  for (; !it.Done(); it.Advance()) {
+    DCHECK(it.Current()->IsInBlock());
+    it.Current()->Accept(this);
+  }
+}
+
+void CHAGuardVisitor::RemoveGuard(HShouldDeoptimizeFlag* flag) {
+  HBasicBlock* block = flag->GetBlock();
+  HInstruction* compare = flag->GetNext();
+  DCHECK(compare->IsNotEqual());
+  HInstruction* deopt = compare->GetNext();
+  DCHECK(deopt->IsDeoptimize());
+
+  // Advance instruction iterator first before we remove the guard.
+  // We need to do it twice since we remove three instructions and the
+  // visitor is responsible for advancing it once.
+  instruction_iterator_->Advance();
+  instruction_iterator_->Advance();
+  block->RemoveInstruction(deopt);
+  block->RemoveInstruction(compare);
+  block->RemoveInstruction(flag);
+}
+
+bool CHAGuardVisitor::OptimizeForParameter(HShouldDeoptimizeFlag* flag,
+                                           HInstruction* receiver) {
+  // If some compiled code is invalidated by CHA due to class loading, the
+  // compiled code will not be entered anymore. So the very fact that the
+  // compiled code is invoked guarantees that a parameter receiver conforms
+  // to all the CHA devirtualization assumptions made by the compiled code,
+  // since all parameter receivers pre-exist any (potential) invalidation of
+  // the compiled code.
+  //
+  // TODO: allow more cases such as a phi whose inputs are all parameters.
+  if (receiver->IsParameterValue()) {
+    RemoveGuard(flag);
+    return true;
+  }
+  return false;
+}
+
+bool CHAGuardVisitor::OptimizeWithDominatingGuard(HShouldDeoptimizeFlag* flag,
+                                                  HInstruction* receiver) {
+  // If there is another guard that dominates the current guard, and
+  // that guard is dominated by receiver's definition, then the current
+  // guard can be eliminated, since receiver must pre-exist that other
+  // guard, and passing that guard guarantees that receiver conforms to
+  // all the CHA devirtualization assumptions.
+  HBasicBlock* dominator = flag->GetBlock();
+  HBasicBlock* receiver_def_block = receiver->GetBlock();
+
+  // Complexity of the following algorithm:
+  // We potentially need to traverse the full dominator chain to receiver_def_block,
+  // plus a (partial) linear search within one block for each guard.
+  // So the worst case for each guard is bounded by the size of the
+  // biggest block plus the depth of the dominating tree.
+
+  while (dominator != receiver_def_block) {
+    if (block_has_cha_guard_[dominator->GetBlockId()] == 1) {
+      RemoveGuard(flag);
+      return true;
+    }
+    dominator = dominator->GetDominator();
+  }
+
+  // At this point dominator is the block where receiver is defined.
+  // We do a linear search within dominator to see if there is a guard after
+  // receiver's definition.
+  HInstruction* instruction;
+  if (dominator == flag->GetBlock()) {
+    // Flag and receiver are defined in the same block. Search backward from
+    // the current guard.
+    instruction = flag->GetPrevious();
+  } else {
+    // Search backward from the last instruction of that dominator.
+    instruction = dominator->GetLastInstruction();
+  }
+  while (instruction != receiver) {
+    if (instruction == nullptr) {
+      // receiver must be defined in this block, we didn't find it
+      // in the instruction list, so it must be a Phi.
+      DCHECK(receiver->IsPhi());
+      break;
+    }
+    if (instruction->IsShouldDeoptimizeFlag()) {
+      RemoveGuard(flag);
+      return true;
+    }
+    instruction = instruction->GetPrevious();
+  }
+  return false;
+}
+
+bool CHAGuardVisitor::HoistGuard(HShouldDeoptimizeFlag* flag,
+                                 HInstruction* receiver) {
+  // If receiver is loop invariant, we can hoist the guard out of the
+  // loop since passing a guard before entering the loop guarantees that
+  // receiver conforms to all the CHA devirtualization assumptions.
+  // We only hoist guards out of the inner loop since that offers most of the
+  // benefit and it might help remove other guards in the inner loop.
+  HBasicBlock* block = flag->GetBlock();
+  HLoopInformation* loop_info = block->GetLoopInformation();
+  if (loop_info != nullptr &&
+      !loop_info->IsIrreducible() &&
+      loop_info->IsDefinedOutOfTheLoop(receiver)) {
+    HInstruction* compare = flag->GetNext();
+    DCHECK(compare->IsNotEqual());
+    HInstruction* deopt = compare->GetNext();
+    DCHECK(deopt->IsDeoptimize());
+
+    // Advance instruction iterator first before we move the guard.
+    // We need to do it twice since we move three instructions and the
+    // visitor is responsible for advancing it once.
+    instruction_iterator_->Advance();
+    instruction_iterator_->Advance();
+
+    HBasicBlock* pre_header = loop_info->GetPreHeader();
+    flag->MoveBefore(pre_header->GetLastInstruction());
+    compare->MoveBefore(pre_header->GetLastInstruction());
+
+    block->RemoveInstruction(deopt);
+    HInstruction* suspend = loop_info->GetSuspendCheck();
+    // Need a new deoptimize instruction that copies the environment
+    // of the suspend instruction for the loop.
+    HDeoptimize* deoptimize =
+        new (GetGraph()->GetArena()) HDeoptimize(compare, suspend->GetDexPc());
+    pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction());
+    deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
+        suspend->GetEnvironment(), loop_info->GetHeader());
+    block_has_cha_guard_[pre_header->GetBlockId()] = 1;
+    GetGraph()->IncrementNumberOfCHAGuards();
+    return true;
+  }
+  return false;
+}
+
+void CHAGuardVisitor::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
+  number_of_guards_to_visit_--;
+  HInstruction* receiver = flag->InputAt(0);
+  // Don't need the receiver anymore.
+  flag->RemoveInputAt(0);
+  if (receiver->IsNullCheck()) {
+    receiver = receiver->InputAt(0);
+  }
+
+  if (OptimizeForParameter(flag, receiver)) {
+    DCHECK(!flag->IsInBlock());
+    return;
+  }
+  if (OptimizeWithDominatingGuard(flag, receiver)) {
+    DCHECK(!flag->IsInBlock());
+    return;
+  }
+  if (HoistGuard(flag, receiver)) {
+    DCHECK(flag->IsInBlock());
+    return;
+  }
+
+  // Need to keep the CHA guard in place.
+  block_has_cha_guard_[flag->GetBlock()->GetBlockId()] = 1;
+  GetGraph()->IncrementNumberOfCHAGuards();
+}
+
+void CHAGuardOptimization::Run() {
+  if (graph_->GetNumberOfCHAGuards() == 0) {
+    return;
+  }
+  CHAGuardVisitor visitor(graph_);
+  for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+    visitor.VisitBasicBlock(block);
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/cha_guard_optimization.h b/compiler/optimizing/cha_guard_optimization.h
new file mode 100644
index 0000000..ba0cdb8
--- /dev/null
+++ b/compiler/optimizing/cha_guard_optimization.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_CHA_GUARD_OPTIMIZATION_H_
+#define ART_COMPILER_OPTIMIZING_CHA_GUARD_OPTIMIZATION_H_
+
+#include "optimization.h"
+
+namespace art {
+
+/**
+ * Optimize CHA guards by removing/moving them.
+ */
+class CHAGuardOptimization : public HOptimization {
+ public:
+  explicit CHAGuardOptimization(HGraph* graph)
+      : HOptimization(graph, kCHAGuardOptimizationPassName) {}
+
+  void Run() OVERRIDE;
+
+  static constexpr const char* kCHAGuardOptimizationPassName = "cha_guard_optimization";
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(CHAGuardOptimization);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_CHA_GUARD_OPTIMIZATION_H_
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 55f3c3c..1df00ff 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -46,8 +46,10 @@
 using helpers::InputRegister;
 using helpers::InputRegisterAt;
 using helpers::InputSRegisterAt;
+using helpers::InputVRegister;
 using helpers::InputVRegisterAt;
 using helpers::Int32ConstantFrom;
+using helpers::Int64ConstantFrom;
 using helpers::LocationFrom;
 using helpers::LowRegisterFrom;
 using helpers::LowSRegisterFrom;
@@ -56,6 +58,7 @@
 using helpers::OutputVRegister;
 using helpers::RegisterFrom;
 using helpers::SRegisterFrom;
+using helpers::Uint64ConstantFrom;
 
 using vixl::ExactAssemblyScope;
 using vixl::CodeBufferCheckScope;
@@ -1240,10 +1243,26 @@
       move_resolver_(graph->GetArena(), this),
       assembler_(graph->GetArena()),
       isa_features_(isa_features),
+      uint32_literals_(std::less<uint32_t>(),
+                       graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      method_patches_(MethodReferenceComparator(),
+                      graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      call_patches_(MethodReferenceComparator(),
+                    graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_string_patches_(StringReferenceValueComparator(),
+                                 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+      boot_image_type_patches_(TypeReferenceValueComparator(),
+                               graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_address_patches_(std::less<uint32_t>(),
+                                  graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      jit_string_patches_(StringReferenceValueComparator(),
+                          graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      jit_class_patches_(TypeReferenceValueComparator(),
+                         graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   // Always save the LR register to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(LR));
   // Give d14 and d15 as scratch registers to VIXL.
@@ -1378,7 +1397,7 @@
   if (!skip_overflow_check) {
     UseScratchRegisterScope temps(GetVIXLAssembler());
     vixl32::Register temp = temps.Acquire();
-    __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(kArm)));
+    __ Sub(temp, sp, Operand::From(GetStackOverflowReservedBytes(kArm)));
     // The load must immediately precede RecordPcInfo.
     ExactAssemblyScope aas(GetVIXLAssembler(),
                            vixl32::kMaxInstructionSizeInBytes,
@@ -1795,7 +1814,7 @@
       break;
   }
   if (right.IsConstant()) {
-    int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
+    int64_t value = Int64ConstantFrom(right);
     int32_t val_low = Low32Bits(value);
     int32_t val_high = High32Bits(value);
 
@@ -1880,7 +1899,7 @@
         __ B(true_target);
       }
     } else {
-      DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
+      DCHECK(cond->AsIntConstant()->IsFalse()) << Int32ConstantFrom(cond);
       if (false_target != nullptr) {
         __ B(false_target);
       }
@@ -2482,9 +2501,7 @@
 
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
-      // TODO(VIXL): Consider introducing an InputVRegister()
-      // helper function (equivalent to InputRegister()).
-      __ Vneg(OutputVRegister(neg), InputVRegisterAt(neg, 0));
+      __ Vneg(OutputVRegister(neg), InputVRegister(neg));
       break;
 
     default:
@@ -2774,8 +2791,8 @@
           } else {
             DCHECK(in.IsConstant());
             DCHECK(in.GetConstant()->IsLongConstant());
-            int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
-            __ Mov(OutputRegister(conversion), static_cast<int32_t>(value));
+            int32_t value = Int32ConstantFrom(in);
+            __ Mov(OutputRegister(conversion), value);
           }
           break;
 
@@ -3114,8 +3131,8 @@
       // Extra checks to protect caused by the existence of R1_R2.
       // The algorithm is wrong if out.hi is either in1.lo or in2.lo:
       // (e.g. in1=r0_r1, in2=r2_r3 and out=r1_r2);
-      DCHECK_NE(out_hi.GetCode(), in1_lo.GetCode());
-      DCHECK_NE(out_hi.GetCode(), in2_lo.GetCode());
+      DCHECK(!out_hi.Is(in1_lo));
+      DCHECK(!out_hi.Is(in2_lo));
 
       // input: in1 - 64 bits, in2 - 64 bits
       // output: out
@@ -3155,7 +3172,7 @@
 
   vixl32::Register out = OutputRegister(instruction);
   vixl32::Register dividend = InputRegisterAt(instruction, 0);
-  int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
+  int32_t imm = Int32ConstantFrom(second);
   DCHECK(imm == 1 || imm == -1);
 
   if (instruction->IsRem()) {
@@ -3180,7 +3197,7 @@
   vixl32::Register out = OutputRegister(instruction);
   vixl32::Register dividend = InputRegisterAt(instruction, 0);
   vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
-  int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
+  int32_t imm = Int32ConstantFrom(second);
   uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
   int ctz_imm = CTZ(abs_imm);
 
@@ -3253,7 +3270,7 @@
   Location second = instruction->GetLocations()->InAt(1);
   DCHECK(second.IsConstant());
 
-  int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
+  int32_t imm = Int32ConstantFrom(second);
   if (imm == 0) {
     // Do not generate anything. DivZeroCheck would prevent any code to be executed.
   } else if (imm == 1 || imm == -1) {
@@ -3287,7 +3304,7 @@
         locations->SetInAt(0, Location::RequiresRegister());
         locations->SetInAt(1, Location::ConstantLocation(div->InputAt(1)->AsConstant()));
         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-        int32_t value = div->InputAt(1)->AsIntConstant()->GetValue();
+        int32_t value = Int32ConstantFrom(div->InputAt(1));
         if (value == 1 || value == 0 || value == -1) {
           // No temp register required.
         } else {
@@ -3400,7 +3417,7 @@
         locations->SetInAt(0, Location::RequiresRegister());
         locations->SetInAt(1, Location::ConstantLocation(rem->InputAt(1)->AsConstant()));
         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-        int32_t value = rem->InputAt(1)->AsIntConstant()->GetValue();
+        int32_t value = Int32ConstantFrom(rem->InputAt(1));
         if (value == 1 || value == 0 || value == -1) {
           // No temp register required.
         } else {
@@ -3535,7 +3552,7 @@
         __ CompareAndBranchIfZero(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
       } else {
         DCHECK(value.IsConstant()) << value;
-        if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
+        if (Int32ConstantFrom(value) == 0) {
           __ B(slow_path->GetEntryLabel());
         }
       }
@@ -3549,7 +3566,7 @@
         __ B(eq, slow_path->GetEntryLabel());
       } else {
         DCHECK(value.IsConstant()) << value;
-        if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
+        if (Int64ConstantFrom(value) == 0) {
           __ B(slow_path->GetEntryLabel());
         }
       }
@@ -3759,7 +3776,7 @@
           __ Lsr(out_reg, first_reg, out_reg);
         }
       } else {
-        int32_t cst = second.GetConstant()->AsIntConstant()->GetValue();
+        int32_t cst = Int32ConstantFrom(second);
         uint32_t shift_value = cst & kMaxIntShiftDistance;
         if (shift_value == 0) {  // ARM does not support shifting with 0 immediate.
           __ Mov(out_reg, first_reg);
@@ -3844,7 +3861,7 @@
         // Register allocator doesn't create partial overlap.
         DCHECK(!o_l.Is(high));
         DCHECK(!o_h.Is(low));
-        int32_t cst = second.GetConstant()->AsIntConstant()->GetValue();
+        int32_t cst = Int32ConstantFrom(second);
         uint32_t shift_value = cst & kMaxLongShiftDistance;
         if (shift_value > 32) {
           if (op->IsShl()) {
@@ -4414,7 +4431,7 @@
     locations->AddTemp(Location::RequiresRegister());
   } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
     // We need a temporary register for the read barrier marking slow
-    // path in CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier.
+    // path in CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier.
     locations->AddTemp(Location::RequiresRegister());
   }
 }
@@ -4876,7 +4893,7 @@
         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   }
   // We need a temporary register for the read barrier marking slow
-  // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier.
+  // path in CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier.
   // Also need for String compression feature.
   if ((object_array_get_with_read_barrier && kUseBakerReadBarrier)
       || (mirror::kUseStringCompression && instruction->IsStringCharAt())) {
@@ -4911,7 +4928,7 @@
         codegen_->MaybeRecordImplicitNullCheck(instruction);
       }
       if (index.IsConstant()) {
-        int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
+        int32_t const_index = Int32ConstantFrom(index);
         if (maybe_compressed_char_at) {
           vixl32::Label uncompressed_load, done;
           __ Lsrs(length, length, 1u);  // LSRS has a 16-bit encoding, TST (immediate) does not.
@@ -4945,7 +4962,7 @@
           // `TryExtractArrayAccessAddress()`.
           if (kIsDebugBuild) {
             HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
-            DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset);
+            DCHECK_EQ(Uint64ConstantFrom(tmp->GetOffset()), data_offset);
           }
           temp = obj;
         } else {
@@ -4990,7 +5007,7 @@
         vixl32::Register out = OutputRegister(instruction);
         if (index.IsConstant()) {
           size_t offset =
-              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+              (Int32ConstantFrom(index) << TIMES_4) + data_offset;
           GetAssembler()->LoadFromOffset(kLoadWord, out, obj, offset);
           // TODO(VIXL): Here and for other calls to `MaybeRecordImplicitNullCheck` in this method,
           // we should use a scope and the assembler to emit the load instruction to guarantee that
@@ -5012,7 +5029,7 @@
             // `TryExtractArrayAccessAddress()`.
             if (kIsDebugBuild) {
               HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
-              DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset);
+              DCHECK_EQ(Uint64ConstantFrom(tmp->GetOffset()), data_offset);
             }
             temp = obj;
           } else {
@@ -5037,7 +5054,7 @@
     case Primitive::kPrimLong: {
       if (index.IsConstant()) {
         size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+            (Int32ConstantFrom(index) << TIMES_8) + data_offset;
         GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out_loc), obj, offset);
       } else {
         UseScratchRegisterScope temps(GetVIXLAssembler());
@@ -5051,7 +5068,7 @@
     case Primitive::kPrimFloat: {
       vixl32::SRegister out = SRegisterFrom(out_loc);
       if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+        size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
         GetAssembler()->LoadSFromOffset(out, obj, offset);
       } else {
         UseScratchRegisterScope temps(GetVIXLAssembler());
@@ -5064,7 +5081,7 @@
 
     case Primitive::kPrimDouble: {
       if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+        size_t offset = (Int32ConstantFrom(index) << TIMES_8) + data_offset;
         GetAssembler()->LoadDFromOffset(DRegisterFrom(out_loc), obj, offset);
       } else {
         UseScratchRegisterScope temps(GetVIXLAssembler());
@@ -5138,7 +5155,7 @@
     case Primitive::kPrimChar:
     case Primitive::kPrimInt: {
       if (index.IsConstant()) {
-        int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
+        int32_t const_index = Int32ConstantFrom(index);
         uint32_t full_offset =
             data_offset + (const_index << Primitive::ComponentSizeShift(value_type));
         StoreOperandType store_type = GetStoreOperandType(value_type);
@@ -5153,7 +5170,7 @@
           // `TryExtractArrayAccessAddress()`.
           if (kIsDebugBuild) {
             HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
-            DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == data_offset);
+            DCHECK_EQ(Uint64ConstantFrom(tmp->GetOffset()), data_offset);
           }
           temp = array;
         } else {
@@ -5174,7 +5191,7 @@
         // Just setting null.
         if (index.IsConstant()) {
           size_t offset =
-              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+              (Int32ConstantFrom(index) << TIMES_4) + data_offset;
           GetAssembler()->StoreToOffset(kStoreWord, value, array, offset);
         } else {
           DCHECK(index.IsRegister()) << index;
@@ -5210,7 +5227,7 @@
           __ CompareAndBranchIfNonZero(value, &non_zero);
           if (index.IsConstant()) {
             size_t offset =
-               (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+               (Int32ConstantFrom(index) << TIMES_4) + data_offset;
             GetAssembler()->StoreToOffset(kStoreWord, value, array, offset);
           } else {
             DCHECK(index.IsRegister()) << index;
@@ -5284,7 +5301,7 @@
 
       if (index.IsConstant()) {
         size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+            (Int32ConstantFrom(index) << TIMES_4) + data_offset;
         GetAssembler()->StoreToOffset(kStoreWord, source, array, offset);
       } else {
         DCHECK(index.IsRegister()) << index;
@@ -5321,7 +5338,7 @@
       Location value = locations->InAt(2);
       if (index.IsConstant()) {
         size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+            (Int32ConstantFrom(index) << TIMES_8) + data_offset;
         GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), array, offset);
       } else {
         UseScratchRegisterScope temps(GetVIXLAssembler());
@@ -5336,7 +5353,7 @@
       Location value = locations->InAt(2);
       DCHECK(value.IsFpuRegister());
       if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+        size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
         GetAssembler()->StoreSToOffset(SRegisterFrom(value), array, offset);
       } else {
         UseScratchRegisterScope temps(GetVIXLAssembler());
@@ -5351,7 +5368,7 @@
       Location value = locations->InAt(2);
       DCHECK(value.IsFpuRegisterPair());
       if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+        size_t offset = (Int32ConstantFrom(index) << TIMES_8) + data_offset;
         GetAssembler()->StoreDToOffset(DRegisterFrom(value), array, offset);
       } else {
         UseScratchRegisterScope temps(GetVIXLAssembler());
@@ -5416,7 +5433,7 @@
   if (second.IsRegister()) {
     __ Add(out, first, RegisterFrom(second));
   } else {
-    __ Add(out, first, second.GetConstant()->AsIntConstant()->GetValue());
+    __ Add(out, first, Int32ConstantFrom(second));
   }
 }
 
@@ -5612,7 +5629,7 @@
         GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
       }
     } else if (constant->IsLongConstant()) {
-      int64_t value = constant->AsLongConstant()->GetValue();
+      int64_t value = Int64ConstantFrom(source);
       if (destination.IsRegisterPair()) {
         __ Mov(LowRegisterFrom(destination), Low32Bits(value));
         __ Mov(HighRegisterFrom(destination), High32Bits(value));
@@ -5771,17 +5788,15 @@
     case HLoadClass::LoadKind::kReferrersClass:
       break;
     case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
-      // TODO(VIXL): Enable it back when literal pools are fixed in VIXL.
-      return HLoadClass::LoadKind::kDexCacheViaMethod;
+      DCHECK(!GetCompilerOptions().GetCompilePic());
+      break;
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
       DCHECK(GetCompilerOptions().GetCompilePic());
       break;
     case HLoadClass::LoadKind::kBootImageAddress:
-      // TODO(VIXL): Enable it back when literal pools are fixed in VIXL.
-      return HLoadClass::LoadKind::kDexCacheViaMethod;
+      break;
     case HLoadClass::LoadKind::kJitTableAddress:
-      // TODO(VIXL): Enable it back when literal pools are fixed in VIXL.
-      return HLoadClass::LoadKind::kDexCacheViaMethod;
+      break;
     case HLoadClass::LoadKind::kDexCachePcRelative:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       // We disable pc-relative load when there is an irreducible loop, as the optimization
@@ -5857,7 +5872,9 @@
       break;
     }
     case HLoadClass::LoadKind::kBootImageLinkTimeAddress: {
-      TODO_VIXL32(FATAL);
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
+      __ Ldr(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
+                                                            cls->GetTypeIndex()));
       break;
     }
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
@@ -5868,11 +5885,18 @@
       break;
     }
     case HLoadClass::LoadKind::kBootImageAddress: {
-      TODO_VIXL32(FATAL);
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
+      DCHECK_NE(cls->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
+      __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
       break;
     }
     case HLoadClass::LoadKind::kJitTableAddress: {
-      TODO_VIXL32(FATAL);
+      __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
+                                                       cls->GetTypeIndex(),
+                                                       cls->GetAddress()));
+      // /* GcRoot<mirror::Class> */ out = *out
+      GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption);
       break;
     }
     case HLoadClass::LoadKind::kDexCachePcRelative: {
@@ -5957,21 +5981,19 @@
     HLoadString::LoadKind desired_string_load_kind) {
   switch (desired_string_load_kind) {
     case HLoadString::LoadKind::kBootImageLinkTimeAddress:
-      // TODO(VIXL): Implement missing optimization.
-      return HLoadString::LoadKind::kDexCacheViaMethod;
+      DCHECK(!GetCompilerOptions().GetCompilePic());
+      break;
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
       DCHECK(GetCompilerOptions().GetCompilePic());
       break;
     case HLoadString::LoadKind::kBootImageAddress:
-      // TODO(VIXL): Implement missing optimization.
-      return HLoadString::LoadKind::kDexCacheViaMethod;
+      break;
     case HLoadString::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
     case HLoadString::LoadKind::kJitTableAddress:
       DCHECK(Runtime::Current()->UseJitCompilation());
-      // TODO(VIXL): Implement missing optimization.
-      return HLoadString::LoadKind::kDexCacheViaMethod;
+      break;
     case HLoadString::LoadKind::kDexCacheViaMethod:
       break;
   }
@@ -6013,8 +6035,9 @@
 
   switch (load_kind) {
     case HLoadString::LoadKind::kBootImageLinkTimeAddress: {
-      TODO_VIXL32(FATAL);
-      break;
+      __ Ldr(out, codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(),
+                                                              load->GetStringIndex()));
+      return;  // No dex cache slow path.
     }
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
@@ -6024,8 +6047,10 @@
       return;  // No dex cache slow path.
     }
     case HLoadString::LoadKind::kBootImageAddress: {
-      TODO_VIXL32(FATAL);
-      break;
+      DCHECK_NE(load->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
+      __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
+      return;  // No dex cache slow path.
     }
     case HLoadString::LoadKind::kBssEntry: {
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
@@ -6042,8 +6067,11 @@
       return;
     }
     case HLoadString::LoadKind::kJitTableAddress: {
-      TODO_VIXL32(FATAL);
-      break;
+      __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
+                                                        load->GetStringIndex()));
+      // /* GcRoot<mirror::String> */ out = *out
+      GenerateGcRootFieldLoad(load, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption);
+      return;
     }
     default:
       break;
@@ -7206,19 +7234,6 @@
 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARMVIXL::GetSupportedInvokeStaticOrDirectDispatch(
     const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
     HInvokeStaticOrDirect* invoke) {
-  // TODO(VIXL): Implement optimized code paths.
-  if (desired_dispatch_info.method_load_kind ==
-          HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup ||
-      desired_dispatch_info.code_ptr_location ==
-          HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup) {
-    return {
-      HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod,
-      HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
-      0u,
-      0u
-    };
-  }
-
   HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info;
   // We disable pc-relative load when there is an irreducible loop, as the optimization
   // is incompatible with it.
@@ -7283,7 +7298,7 @@
   switch (invoke->GetCodePtrLocation()) {
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
       // LR = code address from literal pool with link-time patch.
-      TODO_VIXL32(FATAL);
+      __ Ldr(lr, DeduplicateMethodCodeLiteral(invoke->GetTargetMethod()));
       break;
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
       // LR = invoke->GetDirectCodePtr();
@@ -7309,7 +7324,7 @@
       __ Mov(RegisterFrom(temp), Operand::From(invoke->GetMethodAddress()));
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
-      TODO_VIXL32(FATAL);
+      __ Ldr(RegisterFrom(temp), DeduplicateMethodAddressLiteral(invoke->GetTargetMethod()));
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
       HArmDexCacheArraysBase* base =
@@ -7463,6 +7478,57 @@
   return &patches->back();
 }
 
+VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageStringLiteral(
+    const DexFile& dex_file,
+    dex::StringIndex string_index) {
+  return boot_image_string_patches_.GetOrCreate(
+      StringReference(&dex_file, string_index),
+      [this]() {
+        return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u);
+      });
+}
+
+VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageTypeLiteral(
+    const DexFile& dex_file,
+    dex::TypeIndex type_index) {
+  return boot_image_type_patches_.GetOrCreate(
+      TypeReference(&dex_file, type_index),
+      [this]() {
+        return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u);
+      });
+}
+
+VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageAddressLiteral(uint32_t address) {
+  bool needs_patch = GetCompilerOptions().GetIncludePatchInformation();
+  Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_;
+  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
+}
+
+VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateDexCacheAddressLiteral(uint32_t address) {
+  return DeduplicateUint32Literal(address, &uint32_literals_);
+}
+
+VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitStringLiteral(const DexFile& dex_file,
+                                                       dex::StringIndex string_index) {
+  jit_string_roots_.Overwrite(StringReference(&dex_file, string_index), /* placeholder */ 0u);
+  return jit_string_patches_.GetOrCreate(
+      StringReference(&dex_file, string_index),
+      [this]() {
+        return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u);
+      });
+}
+
+VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitClassLiteral(const DexFile& dex_file,
+                                                      dex::TypeIndex type_index,
+                                                      uint64_t address) {
+  jit_class_roots_.Overwrite(TypeReference(&dex_file, type_index), address);
+  return jit_class_patches_.GetOrCreate(
+      TypeReference(&dex_file, type_index),
+      [this]() {
+        return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u);
+      });
+}
+
 template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
 inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches(
     const ArenaDeque<PcRelativePatchInfo>& infos,
@@ -7486,11 +7552,34 @@
 void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
   DCHECK(linker_patches->empty());
   size_t size =
+      method_patches_.size() +
+      call_patches_.size() +
       relative_call_patches_.size() +
       /* MOVW+MOVT for each entry */ 2u * pc_relative_dex_cache_patches_.size() +
+      boot_image_string_patches_.size() +
       /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
-      /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size();
+      boot_image_type_patches_.size() +
+      /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() +
+      boot_image_address_patches_.size();
   linker_patches->reserve(size);
+  for (const auto& entry : method_patches_) {
+    const MethodReference& target_method = entry.first;
+    VIXLUInt32Literal* literal = entry.second;
+    DCHECK(literal->IsBound());
+    uint32_t literal_offset = literal->GetLocation();
+    linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset,
+                                                       target_method.dex_file,
+                                                       target_method.dex_method_index));
+  }
+  for (const auto& entry : call_patches_) {
+    const MethodReference& target_method = entry.first;
+    VIXLUInt32Literal* literal = entry.second;
+    DCHECK(literal->IsBound());
+    uint32_t literal_offset = literal->GetLocation();
+    linker_patches->push_back(LinkerPatch::CodePatch(literal_offset,
+                                                     target_method.dex_file,
+                                                     target_method.dex_method_index));
+  }
   for (const PatchInfo<vixl32::Label>& info : relative_call_patches_) {
     uint32_t literal_offset = info.label.GetLocation();
     linker_patches->push_back(
@@ -7498,6 +7587,15 @@
   }
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
                                                                linker_patches);
+  for (const auto& entry : boot_image_string_patches_) {
+    const StringReference& target_string = entry.first;
+    VIXLUInt32Literal* literal = entry.second;
+    DCHECK(literal->IsBound());
+    uint32_t literal_offset = literal->GetLocation();
+    linker_patches->push_back(LinkerPatch::StringPatch(literal_offset,
+                                                       target_string.dex_file,
+                                                       target_string.string_index.index_));
+  }
   if (!GetCompilerOptions().IsBootImage()) {
     EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
                                                                   linker_patches);
@@ -7505,8 +7603,54 @@
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
                                                                   linker_patches);
   }
+  for (const auto& entry : boot_image_type_patches_) {
+    const TypeReference& target_type = entry.first;
+    VIXLUInt32Literal* literal = entry.second;
+    DCHECK(literal->IsBound());
+    uint32_t literal_offset = literal->GetLocation();
+    linker_patches->push_back(LinkerPatch::TypePatch(literal_offset,
+                                                     target_type.dex_file,
+                                                     target_type.type_index.index_));
+  }
   EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
                                                               linker_patches);
+  for (const auto& entry : boot_image_address_patches_) {
+    DCHECK(GetCompilerOptions().GetIncludePatchInformation());
+    VIXLUInt32Literal* literal = entry.second;
+    DCHECK(literal->IsBound());
+    uint32_t literal_offset = literal->GetLocation();
+    linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
+  }
+}
+
+VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateUint32Literal(
+    uint32_t value,
+    Uint32ToLiteralMap* map) {
+  return map->GetOrCreate(
+      value,
+      [this, value]() {
+        return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ value);
+      });
+}
+
+VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateMethodLiteral(
+    MethodReference target_method,
+    MethodToLiteralMap* map) {
+  return map->GetOrCreate(
+      target_method,
+      [this]() {
+        return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u);
+      });
+}
+
+VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateMethodAddressLiteral(
+    MethodReference target_method) {
+  return DeduplicateMethodLiteral(target_method, &method_patches_);
+}
+
+VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateMethodCodeLiteral(
+    MethodReference target_method) {
+  return DeduplicateMethodLiteral(target_method, &call_patches_);
 }
 
 void LocationsBuilderARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
@@ -7702,6 +7846,31 @@
   }
 }
 
+static void PatchJitRootUse(uint8_t* code,
+                            const uint8_t* roots_data,
+                            VIXLUInt32Literal* literal,
+                            uint64_t index_in_table) {
+  DCHECK(literal->IsBound());
+  uint32_t literal_offset = literal->GetLocation();
+  uintptr_t address =
+      reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
+  uint8_t* data = code + literal_offset;
+  reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
+}
+
+void CodeGeneratorARMVIXL::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
+  for (const auto& entry : jit_string_patches_) {
+    const auto& it = jit_string_roots_.find(entry.first);
+    DCHECK(it != jit_string_roots_.end());
+    PatchJitRootUse(code, roots_data, entry.second, it->second);
+  }
+  for (const auto& entry : jit_class_patches_) {
+    const auto& it = jit_class_roots_.find(entry.first);
+    DCHECK(it != jit_class_roots_.end());
+    PatchJitRootUse(code, roots_data, entry.second, it->second);
+  }
+}
+
 void CodeGeneratorARMVIXL::EmitMovwMovtPlaceholder(
     CodeGeneratorARMVIXL::PcRelativePatchInfo* labels,
     vixl32::Register out) {
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 93ea601..45bd164 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -107,20 +107,20 @@
     arraysize(kRuntimeParameterFpuRegistersVIXL);
 
 class LoadClassSlowPathARMVIXL;
-
 class CodeGeneratorARMVIXL;
 
+using VIXLInt32Literal = vixl::aarch32::Literal<int32_t>;
+using VIXLUInt32Literal = vixl::aarch32::Literal<uint32_t>;
+
 class JumpTableARMVIXL : public DeletableArenaObject<kArenaAllocSwitchTable> {
  public:
-  typedef vixl::aarch32::Literal<int32_t> IntLiteral;
-
   explicit JumpTableARMVIXL(HPackedSwitch* switch_instr)
       : switch_instr_(switch_instr),
         table_start_(),
         bb_addresses_(switch_instr->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
     uint32_t num_entries = switch_instr_->GetNumEntries();
     for (uint32_t i = 0; i < num_entries; i++) {
-      IntLiteral *lit = new IntLiteral(0, vixl32::RawLiteral::kManuallyPlaced);
+      VIXLInt32Literal *lit = new VIXLInt32Literal(0, vixl32::RawLiteral::kManuallyPlaced);
       bb_addresses_.emplace_back(lit);
     }
   }
@@ -133,7 +133,7 @@
  private:
   HPackedSwitch* const switch_instr_;
   vixl::aarch32::Label table_start_;
-  ArenaVector<std::unique_ptr<IntLiteral>> bb_addresses_;
+  ArenaVector<std::unique_ptr<VIXLInt32Literal>> bb_addresses_;
 
   DISALLOW_COPY_AND_ASSIGN(JumpTableARMVIXL);
 };
@@ -566,8 +566,22 @@
   PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index);
   PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
                                                        uint32_t element_offset);
+  VIXLUInt32Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
+                                                       dex::StringIndex string_index);
+  VIXLUInt32Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
+                                                     dex::TypeIndex type_index);
+  VIXLUInt32Literal* DeduplicateBootImageAddressLiteral(uint32_t address);
+  VIXLUInt32Literal* DeduplicateDexCacheAddressLiteral(uint32_t address);
+  VIXLUInt32Literal* DeduplicateJitStringLiteral(const DexFile& dex_file,
+                                                 dex::StringIndex string_index);
+  VIXLUInt32Literal* DeduplicateJitClassLiteral(const DexFile& dex_file,
+                                                dex::TypeIndex type_index,
+                                                uint64_t address);
+
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
+  void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
+
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference field load when Baker's read barriers are used.
   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -673,10 +687,21 @@
   vixl::aarch32::Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
                                                                 vixl::aarch32::Register temp);
 
-  using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::aarch32::Literal<uint32_t>*>;
+  using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, VIXLUInt32Literal*>;
   using MethodToLiteralMap =
-      ArenaSafeMap<MethodReference, vixl::aarch32::Literal<uint32_t>*, MethodReferenceComparator>;
+      ArenaSafeMap<MethodReference, VIXLUInt32Literal*, MethodReferenceComparator>;
+  using StringToLiteralMap = ArenaSafeMap<StringReference,
+                                          VIXLUInt32Literal*,
+                                          StringReferenceValueComparator>;
+  using TypeToLiteralMap = ArenaSafeMap<TypeReference,
+                                        VIXLUInt32Literal*,
+                                        TypeReferenceValueComparator>;
 
+  VIXLUInt32Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map);
+  VIXLUInt32Literal* DeduplicateMethodLiteral(MethodReference target_method,
+                                              MethodToLiteralMap* map);
+  VIXLUInt32Literal* DeduplicateMethodAddressLiteral(MethodReference target_method);
+  VIXLUInt32Literal* DeduplicateMethodCodeLiteral(MethodReference target_method);
   PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file,
                                           uint32_t offset_or_index,
                                           ArenaDeque<PcRelativePatchInfo>* patches);
@@ -697,15 +722,31 @@
   ArmVIXLAssembler assembler_;
   const ArmInstructionSetFeatures& isa_features_;
 
+  // Deduplication map for 32-bit literals, used for non-patchable boot image addresses.
+  Uint32ToLiteralMap uint32_literals_;
+  // Method patch info, map MethodReference to a literal for method address and method code.
+  MethodToLiteralMap method_patches_;
+  MethodToLiteralMap call_patches_;
   // Relative call patch info.
   // Using ArenaDeque<> which retains element addresses on push/emplace_back().
   ArenaDeque<PatchInfo<vixl::aarch32::Label>> relative_call_patches_;
   // PC-relative patch info for each HArmDexCacheArraysBase.
   ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
+  // Deduplication map for boot string literals for kBootImageLinkTimeAddress.
+  StringToLiteralMap boot_image_string_patches_;
   // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
   ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+  // Deduplication map for boot type literals for kBootImageLinkTimeAddress.
+  TypeToLiteralMap boot_image_type_patches_;
   // PC-relative type patch info.
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
+  // Deduplication map for patchable boot image addresses.
+  Uint32ToLiteralMap boot_image_address_patches_;
+
+  // Patches for string literals in JIT compiled code.
+  StringToLiteralMap jit_string_patches_;
+  // Patches for class literals in JIT compiled code.
+  TypeToLiteralMap jit_class_patches_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARMVIXL);
 };
diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h
index eabdbad..21c3ae6 100644
--- a/compiler/optimizing/common_arm.h
+++ b/compiler/optimizing/common_arm.h
@@ -122,10 +122,16 @@
   if (type == Primitive::kPrimFloat) {
     return InputSRegisterAt(instr, input_index);
   } else {
+    DCHECK_EQ(type, Primitive::kPrimDouble);
     return InputDRegisterAt(instr, input_index);
   }
 }
 
+inline vixl::aarch32::VRegister InputVRegister(HInstruction* instr) {
+  DCHECK_EQ(instr->InputCount(), 1u);
+  return InputVRegisterAt(instr, 0);
+}
+
 inline vixl::aarch32::Register OutputRegister(HInstruction* instr) {
   return RegisterFrom(instr->GetLocations()->Out(), instr->GetType());
 }
@@ -140,8 +146,7 @@
   return InputRegisterAt(instr, 0);
 }
 
-inline int32_t Int32ConstantFrom(Location location) {
-  HConstant* instr = location.GetConstant();
+inline int32_t Int32ConstantFrom(HInstruction* instr) {
   if (instr->IsIntConstant()) {
     return instr->AsIntConstant()->GetValue();
   } else if (instr->IsNullConstant()) {
@@ -155,6 +160,10 @@
   }
 }
 
+inline int32_t Int32ConstantFrom(Location location) {
+  return Int32ConstantFrom(location.GetConstant());
+}
+
 inline int64_t Int64ConstantFrom(Location location) {
   HConstant* instr = location.GetConstant();
   if (instr->IsIntConstant()) {
@@ -167,6 +176,11 @@
   }
 }
 
+inline uint64_t Uint64ConstantFrom(HInstruction* instr) {
+  DCHECK(instr->IsConstant()) << instr->DebugName();
+  return instr->AsConstant()->GetValueAsUint64();
+}
+
 inline vixl::aarch32::Operand OperandFrom(Location location, Primitive::Type type) {
   if (location.IsRegister()) {
     return vixl::aarch32::Operand(RegisterFrom(location, type));
diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc
index b21bc09..5456b1e 100644
--- a/compiler/optimizing/induction_var_analysis.cc
+++ b/compiler/optimizing/induction_var_analysis.cc
@@ -73,10 +73,18 @@
 }
 
 /**
- * Returns narrowest data type.
+ * Returns result of implicit widening type conversion done in HIR.
  */
-static Primitive::Type Narrowest(Primitive::Type type1, Primitive::Type type2) {
-  return Primitive::ComponentSize(type1) <= Primitive::ComponentSize(type2) ? type1 : type2;
+static Primitive::Type ImplicitConversion(Primitive::Type type) {
+  switch (type) {
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimBoolean:
+      return Primitive::kPrimInt;
+    default:
+      return type;
+  }
 }
 
 //
@@ -232,9 +240,9 @@
   } else if (instruction->IsSelect()) {
     info = TransferPhi(loop, instruction, /*input_index*/ 0, /*adjust_input_size*/ 1);
   } else if (instruction->IsTypeConversion()) {
-    info = TransferCnv(LookupInfo(loop, instruction->InputAt(0)),
-                       instruction->AsTypeConversion()->GetInputType(),
-                       instruction->AsTypeConversion()->GetResultType());
+    info = TransferConversion(LookupInfo(loop, instruction->InputAt(0)),
+                              instruction->AsTypeConversion()->GetInputType(),
+                              instruction->AsTypeConversion()->GetResultType());
   } else if (instruction->IsBoundsCheck()) {
     info = LookupInfo(loop, instruction->InputAt(0));  // Pass-through.
   }
@@ -267,8 +275,12 @@
     return;
   }
 
-  // Store interesting cycle.
-  AssignCycle(phi->AsPhi());
+  // Store interesting cycle in each loop phi.
+  for (size_t i = 0; i < size; i++) {
+    if (scc_[i]->IsLoopHeaderPhi()) {
+      AssignCycle(scc_[i]->AsPhi());
+    }
+  }
 
   // Singleton is wrap-around induction if all internal links have the same meaning.
   if (size == 1) {
@@ -326,7 +338,7 @@
     } else if (instruction->IsSelect()) {
       update = SolvePhi(instruction, /*input_index*/ 0, /*adjust_input_size*/ 1);  // acts like Phi
     } else if (instruction->IsTypeConversion()) {
-      update = SolveCnv(instruction->AsTypeConversion());
+      update = SolveConversion(loop, phi, instruction->AsTypeConversion());
     }
     if (update == nullptr) {
       return;
@@ -416,8 +428,9 @@
   // wrap-around, or periodic can be combined with an invariant to yield a similar result.
   // Two linear or two polynomial inputs can be combined too. Other combinations fail.
   if (a != nullptr && b != nullptr) {
-    type_ = Narrowest(type_, Narrowest(a->type, b->type));
-    if (a->induction_class == kInvariant && b->induction_class == kInvariant) {
+    if (IsNarrowingLinear(a) || IsNarrowingLinear(b)) {
+      return nullptr;  // no transfer
+    } else if (a->induction_class == kInvariant && b->induction_class == kInvariant) {
       return CreateInvariantOp(op, a, b);
     } else if ((a->induction_class == kLinear && b->induction_class == kLinear) ||
                (a->induction_class == kPolynomial && b->induction_class == kPolynomial)) {
@@ -452,8 +465,9 @@
   // Transfer over a unary negation: an invariant, linear, polynomial, geometric (mul),
   // wrap-around, or periodic input yields a similar but negated induction as result.
   if (a != nullptr) {
-    type_ = Narrowest(type_, a->type);
-    if (a->induction_class == kInvariant) {
+    if (IsNarrowingLinear(a)) {
+      return nullptr;  // no transfer
+    } else if (a->induction_class == kInvariant) {
       return CreateInvariantOp(kNeg, nullptr, a);
     } else if (a->induction_class != kGeometric || a->operation == kMul) {
       return CreateInduction(a->induction_class,
@@ -473,8 +487,9 @@
   // wrap-around, or periodic can be multiplied with an invariant to yield a similar
   // but multiplied result. Two non-invariant inputs cannot be multiplied, however.
   if (a != nullptr && b != nullptr) {
-    type_ = Narrowest(type_, Narrowest(a->type, b->type));
-    if (a->induction_class == kInvariant && b->induction_class == kInvariant) {
+    if (IsNarrowingLinear(a) || IsNarrowingLinear(b)) {
+      return nullptr;  // no transfer
+    } else if (a->induction_class == kInvariant && b->induction_class == kInvariant) {
       return CreateInvariantOp(kMul, a, b);
     } else if (a->induction_class == kInvariant && (b->induction_class != kGeometric ||
                                                     b->operation == kMul)) {
@@ -497,17 +512,17 @@
   return nullptr;
 }
 
-HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferCnv(InductionInfo* a,
-                                                                         Primitive::Type from,
-                                                                         Primitive::Type to) {
+HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferConversion(
+    InductionInfo* a,
+    Primitive::Type from,
+    Primitive::Type to) {
   if (a != nullptr) {
-    // Allow narrowing conversion on linear induction in certain cases.
-    if (IsNarrowingIntegralConversion(from, to)) {
-      if (a->induction_class == kLinear) {
-        if (a->type == to || (a->type == from && IsNarrowingIntegralConversion(from, to))) {
-          return CreateInduction(kLinear, kNop, a->op_a, a->op_b, /*fetch*/ nullptr, to);
-        }
-      }
+    // Allow narrowing conversion on linear induction in certain cases:
+    // induction is already at narrow type, or can be made narrower.
+    if (IsNarrowingIntegralConversion(from, to) &&
+        a->induction_class == kLinear &&
+        (a->type == to || IsNarrowingIntegralConversion(a->type, to))) {
+      return CreateInduction(kLinear, kNop, a->op_a, a->op_b, /*fetch*/ nullptr, to);
     }
   }
   return nullptr;
@@ -700,16 +715,29 @@
   return nullptr;
 }
 
-HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolveCnv(HTypeConversion* conversion) {
+HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolveConversion(
+    HLoopInformation* loop,
+    HInstruction* entry_phi,
+    HTypeConversion* conversion) {
   Primitive::Type from = conversion->GetInputType();
   Primitive::Type to = conversion->GetResultType();
-  // A narrowing conversion is allowed within the cycle of a linear induction, provided that the
-  // narrowest encountered type is recorded with the induction to account for the precision loss.
-  if (IsNarrowingIntegralConversion(from, to)) {
-    auto it = cycle_.find(conversion->GetInput());
-    if (it != cycle_.end() && it->second->induction_class == kInvariant) {
-      type_ = Narrowest(type_, to);
-      return it->second;
+  // A narrowing conversion is allowed as *last* operation of the cycle of a linear induction
+  // with an initial value that fits the type, provided that the narrowest encountered type is
+  // recorded with the induction to account for the precision loss. The narrower induction does
+  // *not* transfer to any wider operations, however, since these may yield out-of-type values
+  if (entry_phi->InputCount() == 2 && conversion == entry_phi->InputAt(1)) {
+    int64_t min = Primitive::MinValueOfIntegralType(to);
+    int64_t max = Primitive::MaxValueOfIntegralType(to);
+    int64_t value = 0;
+    InductionInfo* initial = LookupInfo(loop, entry_phi->InputAt(0));
+    if (IsNarrowingIntegralConversion(from, to) &&
+        IsAtLeast(initial, &value) && value >= min &&
+        IsAtMost(initial, &value)  && value <= max) {
+      auto it = cycle_.find(conversion->GetInput());
+      if (it != cycle_.end() && it->second->induction_class == kInvariant) {
+        type_ = to;
+        return it->second;
+      }
     }
   }
   return nullptr;
@@ -729,7 +757,7 @@
       HCondition* condition = if_expr->AsCondition();
       InductionInfo* a = LookupInfo(loop, condition->InputAt(0));
       InductionInfo* b = LookupInfo(loop, condition->InputAt(1));
-      Primitive::Type type = condition->InputAt(0)->GetType();
+      Primitive::Type type = ImplicitConversion(condition->InputAt(0)->GetType());
       // Determine if the loop control uses a known sequence on an if-exit (X outside) or on
       // an if-iterate (X inside), expressed as if-iterate when passed into VisitCondition().
       if (a == nullptr || b == nullptr) {
@@ -901,8 +929,8 @@
                                      int64_t stride_value,
                                      Primitive::Type type,
                                      IfCondition cmp) {
-  const int64_t min = Primitive::MinValueOfIntegralType(type);
-  const int64_t max = Primitive::MaxValueOfIntegralType(type);
+  int64_t min = Primitive::MinValueOfIntegralType(type);
+  int64_t max = Primitive::MaxValueOfIntegralType(type);
   // Some rules under which it is certain at compile-time that the loop is finite.
   int64_t value;
   switch (cmp) {
@@ -938,8 +966,6 @@
     min++;
   }
   // Do both bounds fit the range?
-  // Note: The `value` is initialized to please valgrind - the compiler can reorder
-  // the return value check with the `value` check, b/27651442 .
   int64_t value = 0;
   return IsAtLeast(lower_expr, &value) && value >= min &&
          IsAtMost(lower_expr, &value)  && value <= max &&
@@ -1046,7 +1072,8 @@
       return CreateSimplifiedInvariant(kSub, b->op_b, b->op_a);
     }
   }
-  return new (graph_->GetArena()) InductionInfo(kInvariant, op, a, b, nullptr, b->type);
+  return new (graph_->GetArena()) InductionInfo(
+      kInvariant, op, a, b, nullptr, ImplicitConversion(b->type));
 }
 
 HInstruction* HInductionVarAnalysis::GetShiftConstant(HLoopInformation* loop,
@@ -1108,6 +1135,16 @@
   return InductionVarRange(this).IsConstant(info, InductionVarRange::kAtLeast, value);
 }
 
+bool HInductionVarAnalysis::IsNarrowingLinear(InductionInfo* info) {
+  return info != nullptr &&
+      info->induction_class == kLinear &&
+      (info->type == Primitive::kPrimByte ||
+       info->type == Primitive::kPrimShort ||
+       info->type == Primitive::kPrimChar ||
+       (info->type == Primitive::kPrimInt && (info->op_a->type == Primitive::kPrimLong ||
+                                              info->op_b->type == Primitive::kPrimLong)));
+}
+
 bool HInductionVarAnalysis::InductionEqual(InductionInfo* info1,
                                            InductionInfo* info2) {
   // Test structural equality only, without accounting for simplifications.
diff --git a/compiler/optimizing/induction_var_analysis.h b/compiler/optimizing/induction_var_analysis.h
index 293aa70..39b39cd 100644
--- a/compiler/optimizing/induction_var_analysis.h
+++ b/compiler/optimizing/induction_var_analysis.h
@@ -167,7 +167,7 @@
   InductionInfo* TransferAddSub(InductionInfo* a, InductionInfo* b, InductionOp op);
   InductionInfo* TransferNeg(InductionInfo* a);
   InductionInfo* TransferMul(InductionInfo* a, InductionInfo* b);
-  InductionInfo* TransferCnv(InductionInfo* a, Primitive::Type from, Primitive::Type to);
+  InductionInfo* TransferConversion(InductionInfo* a, Primitive::Type from, Primitive::Type to);
 
   // Solvers.
   InductionInfo* SolvePhi(HInstruction* phi, size_t input_index, size_t adjust_input_size);
@@ -191,7 +191,9 @@
                            HInstruction* entry_phi,
                            HInstruction* instruction,
                            int64_t oppositive_value);
-  InductionInfo* SolveCnv(HTypeConversion* conversion);
+  InductionInfo* SolveConversion(HLoopInformation* loop,
+                                 HInstruction* entry_phi,
+                                 HTypeConversion* conversion);
 
   // Trip count information.
   void VisitControl(HLoopInformation* loop);
@@ -235,6 +237,7 @@
   bool IsAtLeast(InductionInfo* info, /*out*/ int64_t* value);
 
   // Helpers.
+  static bool IsNarrowingLinear(InductionInfo* info);
   static bool InductionEqual(InductionInfo* info1, InductionInfo* info2);
   static std::string FetchToString(HInstruction* fetch);
   static std::string InductionToString(InductionInfo* info);
diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc
index f52a1aa..82ee93d 100644
--- a/compiler/optimizing/induction_var_analysis_test.cc
+++ b/compiler/optimizing/induction_var_analysis_test.cc
@@ -174,6 +174,12 @@
       iva_->LookupInfo(loop_body_[0]->GetLoopInformation(), instruction2));
   }
 
+  // Returns true for narrowing linear induction.
+  bool IsNarrowingLinear(HInstruction* instruction) {
+    return HInductionVarAnalysis::IsNarrowingLinear(
+        iva_->LookupInfo(loop_body_[0]->GetLoopInformation(), instruction));
+  }
+
   // Performs InductionVarAnalysis (after proper set up).
   void PerformInductionVarAnalysis() {
     graph_->BuildDominatorTree();
@@ -1066,16 +1072,20 @@
   // }
   BuildLoopNest(1);
   HInstruction* conv = InsertInstruction(
-      new (&allocator_) HTypeConversion(Primitive::kPrimByte, basic_[0], -1), 0);
+      new (&allocator_) HTypeConversion(Primitive::kPrimByte, basic_[0], kNoDexPc), 0);
   HInstruction* store1 = InsertArrayStore(conv, 0);
   HInstruction* store2 = InsertArrayStore(basic_[0], 0);
   PerformInductionVarAnalysis();
 
-  // Regular int induction (i) is "transferred" over conversion into byte induction (k).
+  // Regular int induction (i) is transferred over conversion into byte induction (k).
   EXPECT_STREQ("((1) * i + (0)):PrimByte", GetInductionInfo(store1->InputAt(1), 0).c_str());
   EXPECT_STREQ("((1) * i + (0)):PrimInt",  GetInductionInfo(store2->InputAt(1), 0).c_str());
   EXPECT_STREQ("((1) * i + (1)):PrimInt",  GetInductionInfo(increment_[0], 0).c_str());
 
+  // Narrowing detected.
+  EXPECT_TRUE(IsNarrowingLinear(store1->InputAt(1)));
+  EXPECT_FALSE(IsNarrowingLinear(store2->InputAt(1)));
+
   // Type matters!
   EXPECT_FALSE(HaveSameInduction(store1->InputAt(1), store2->InputAt(1)));
 
@@ -1093,7 +1103,7 @@
   // }
   BuildLoopNest(1);
   HInstruction* conv = InsertInstruction(
-      new (&allocator_) HTypeConversion(Primitive::kPrimByte, basic_[0], -1), 0);
+      new (&allocator_) HTypeConversion(Primitive::kPrimByte, basic_[0], kNoDexPc), 0);
   HInstruction* store1 = InsertArrayStore(conv, 0);
   HInstruction* add = InsertInstruction(
       new (&allocator_) HAdd(Primitive::kPrimInt, conv, constant1_), 0);
@@ -1101,11 +1111,86 @@
 
   PerformInductionVarAnalysis();
 
-  // Byte induction (k) is "transferred" over conversion into addition (k + 1).
-  // This means only values within byte range can be trusted (even though
-  // addition can jump out of the range of course).
+  // Byte induction (k) is detected, but it does not transfer over the addition,
+  // since this may yield out-of-type values.
   EXPECT_STREQ("((1) * i + (0)):PrimByte", GetInductionInfo(store1->InputAt(1), 0).c_str());
-  EXPECT_STREQ("((1) * i + (1)):PrimByte", GetInductionInfo(store2->InputAt(1), 0).c_str());
+  EXPECT_STREQ("", GetInductionInfo(store2->InputAt(1), 0).c_str());
+
+  // Narrowing detected.
+  EXPECT_TRUE(IsNarrowingLinear(store1->InputAt(1)));
+  EXPECT_FALSE(IsNarrowingLinear(store2->InputAt(1)));  // works for null
+}
+
+TEST_F(InductionVarAnalysisTest, ByteInduction) {
+  // Setup:
+  // k = -128;
+  // for (int i = 0; i < 100; i++) {
+  //   k = k + 1;
+  //   k = (byte) k;
+  // }
+  BuildLoopNest(1);
+  HPhi* k_header = InsertLoopPhi(0, 0);
+  k_header->AddInput(graph_->GetIntConstant(-128));
+
+  HInstruction* add = InsertInstruction(
+      new (&allocator_) HAdd(Primitive::kPrimInt, k_header, constant1_), 0);
+  HInstruction* conv = InsertInstruction(
+      new (&allocator_) HTypeConversion(Primitive::kPrimByte, add, kNoDexPc), 0);
+  k_header->AddInput(conv);
+  PerformInductionVarAnalysis();
+
+  // Byte induction (k) is detected, but it does not transfer over the addition,
+  // since this may yield out-of-type values.
+  EXPECT_STREQ("((1) * i + (-128)):PrimByte", GetInductionInfo(k_header, 0).c_str());
+  EXPECT_STREQ("", GetInductionInfo(add, 0).c_str());
+
+  // Narrowing detected.
+  EXPECT_TRUE(IsNarrowingLinear(k_header));
+  EXPECT_FALSE(IsNarrowingLinear(add));  // works for null
+}
+
+TEST_F(InductionVarAnalysisTest, NoByteInduction1) {
+  // Setup:
+  // k = -129;  / does not fit!
+  // for (int i = 0; i < 100; i++) {
+  //   k = k + 1;
+  //   k = (byte) k;
+  // }
+  BuildLoopNest(1);
+  HPhi* k_header = InsertLoopPhi(0, 0);
+  k_header->AddInput(graph_->GetIntConstant(-129));
+
+  HInstruction* add = InsertInstruction(
+      new (&allocator_) HAdd(Primitive::kPrimInt, k_header, constant1_), 0);
+  HInstruction* conv = InsertInstruction(
+      new (&allocator_) HTypeConversion(Primitive::kPrimByte, add, kNoDexPc), 0);
+  k_header->AddInput(conv);
+  PerformInductionVarAnalysis();
+
+  EXPECT_STREQ("", GetInductionInfo(k_header, 0).c_str());
+  EXPECT_STREQ("", GetInductionInfo(add, 0).c_str());
+}
+
+TEST_F(InductionVarAnalysisTest, NoByteInduction2) {
+  // Setup:
+  // k = 0;
+  // for (int i = 0; i < 100; i++) {
+  //   k = (byte) k;   // conversion not done last!
+  //   k = k + 1;
+  // }
+  BuildLoopNest(1);
+  HPhi* k_header = InsertLoopPhi(0, 0);
+  k_header->AddInput(constant0_);
+
+  HInstruction* conv = InsertInstruction(
+      new (&allocator_) HTypeConversion(Primitive::kPrimByte, k_header, kNoDexPc), 0);
+  HInstruction* add = InsertInstruction(
+      new (&allocator_) HAdd(Primitive::kPrimInt, conv, constant1_), 0);
+  k_header->AddInput(add);
+  PerformInductionVarAnalysis();
+
+  EXPECT_STREQ("", GetInductionInfo(k_header, 0).c_str());
+  EXPECT_STREQ("", GetInductionInfo(add, 0).c_str());
 }
 
 TEST_F(InductionVarAnalysisTest, ByteLoopControl1) {
@@ -1116,12 +1201,20 @@
   basic_[0]->ReplaceInput(graph_->GetIntConstant(-128), 0);
   HInstruction* ifs = loop_header_[0]->GetLastInstruction()->GetPrevious();
   ifs->ReplaceInput(graph_->GetIntConstant(127), 1);
-  HInstruction* conv = new(&allocator_) HTypeConversion(Primitive::kPrimByte, increment_[0], -1);
+  HInstruction* conv =
+      new (&allocator_) HTypeConversion(Primitive::kPrimByte, increment_[0], kNoDexPc);
   loop_body_[0]->InsertInstructionBefore(conv, increment_[0]->GetNext());
   basic_[0]->ReplaceInput(conv, 1);
   PerformInductionVarAnalysis();
 
-  EXPECT_STREQ("((1) * i + ((-128) + (1))):PrimByte", GetInductionInfo(increment_[0], 0).c_str());
+  // Recorded at the phi, but not transferred to increment.
+  EXPECT_STREQ("((1) * i + (-128)):PrimByte", GetInductionInfo(basic_[0], 0).c_str());
+  EXPECT_STREQ("", GetInductionInfo(increment_[0], 0).c_str());
+
+  // Narrowing detected.
+  EXPECT_TRUE(IsNarrowingLinear(basic_[0]));
+  EXPECT_FALSE(IsNarrowingLinear(increment_[0]));  // works for null
+
   // Trip-count.
   EXPECT_STREQ("(((127) - (-128)) (TC-loop) ((-128) < (127)))", GetTripCount(0).c_str());
 }
@@ -1134,12 +1227,20 @@
   basic_[0]->ReplaceInput(graph_->GetIntConstant(-128), 0);
   HInstruction* ifs = loop_header_[0]->GetLastInstruction()->GetPrevious();
   ifs->ReplaceInput(graph_->GetIntConstant(128), 1);
-  HInstruction* conv = new(&allocator_) HTypeConversion(Primitive::kPrimByte, increment_[0], -1);
+  HInstruction* conv =
+      new (&allocator_) HTypeConversion(Primitive::kPrimByte, increment_[0], kNoDexPc);
   loop_body_[0]->InsertInstructionBefore(conv, increment_[0]->GetNext());
   basic_[0]->ReplaceInput(conv, 1);
   PerformInductionVarAnalysis();
 
-  EXPECT_STREQ("((1) * i + ((-128) + (1))):PrimByte", GetInductionInfo(increment_[0], 0).c_str());
+  // Recorded at the phi, but not transferred to increment.
+  EXPECT_STREQ("((1) * i + (-128)):PrimByte", GetInductionInfo(basic_[0], 0).c_str());
+  EXPECT_STREQ("", GetInductionInfo(increment_[0], 0).c_str());
+
+  // Narrowing detected.
+  EXPECT_TRUE(IsNarrowingLinear(basic_[0]));
+  EXPECT_FALSE(IsNarrowingLinear(increment_[0]));  // works for null
+
   // Trip-count undefined.
   EXPECT_STREQ("", GetTripCount(0).c_str());
 }
@@ -1152,13 +1253,20 @@
   basic_[0]->ReplaceInput(graph_->GetIntConstant(-32768), 0);
   HInstruction* ifs = loop_header_[0]->GetLastInstruction()->GetPrevious();
   ifs->ReplaceInput(graph_->GetIntConstant(32767), 1);
-  HInstruction* conv = new(&allocator_) HTypeConversion(Primitive::kPrimShort, increment_[0], -1);
+  HInstruction* conv =
+      new (&allocator_) HTypeConversion(Primitive::kPrimShort, increment_[0], kNoDexPc);
   loop_body_[0]->InsertInstructionBefore(conv, increment_[0]->GetNext());
   basic_[0]->ReplaceInput(conv, 1);
   PerformInductionVarAnalysis();
 
-  EXPECT_STREQ("((1) * i + ((-32768) + (1))):PrimShort",
-               GetInductionInfo(increment_[0], 0).c_str());
+  // Recorded at the phi, but not transferred to increment.
+  EXPECT_STREQ("((1) * i + (-32768)):PrimShort", GetInductionInfo(basic_[0], 0).c_str());
+  EXPECT_STREQ("", GetInductionInfo(increment_[0], 0).c_str());
+
+  // Narrowing detected.
+  EXPECT_TRUE(IsNarrowingLinear(basic_[0]));
+  EXPECT_FALSE(IsNarrowingLinear(increment_[0]));  // works for null
+
   // Trip-count.
   EXPECT_STREQ("(((32767) - (-32768)) (TC-loop) ((-32768) < (32767)))", GetTripCount(0).c_str());
 }
@@ -1171,13 +1279,20 @@
   basic_[0]->ReplaceInput(graph_->GetIntConstant(-32768), 0);
   HInstruction* ifs = loop_header_[0]->GetLastInstruction()->GetPrevious();
   ifs->ReplaceInput(graph_->GetIntConstant(32768), 1);
-  HInstruction* conv = new(&allocator_) HTypeConversion(Primitive::kPrimShort, increment_[0], -1);
+  HInstruction* conv =
+      new (&allocator_) HTypeConversion(Primitive::kPrimShort, increment_[0], kNoDexPc);
   loop_body_[0]->InsertInstructionBefore(conv, increment_[0]->GetNext());
   basic_[0]->ReplaceInput(conv, 1);
   PerformInductionVarAnalysis();
 
-  EXPECT_STREQ("((1) * i + ((-32768) + (1))):PrimShort",
-               GetInductionInfo(increment_[0], 0).c_str());
+  // Recorded at the phi, but not transferred to increment.
+  EXPECT_STREQ("((1) * i + (-32768)):PrimShort", GetInductionInfo(basic_[0], 0).c_str());
+  EXPECT_STREQ("", GetInductionInfo(increment_[0], 0).c_str());
+
+  // Narrowing detected.
+  EXPECT_TRUE(IsNarrowingLinear(basic_[0]));
+  EXPECT_FALSE(IsNarrowingLinear(increment_[0]));  // works for null
+
   // Trip-count undefined.
   EXPECT_STREQ("", GetTripCount(0).c_str());
 }
@@ -1189,12 +1304,20 @@
   BuildLoopNest(1);
   HInstruction* ifs = loop_header_[0]->GetLastInstruction()->GetPrevious();
   ifs->ReplaceInput(graph_->GetIntConstant(65535), 1);
-  HInstruction* conv = new(&allocator_) HTypeConversion(Primitive::kPrimChar, increment_[0], -1);
+  HInstruction* conv =
+      new (&allocator_) HTypeConversion(Primitive::kPrimChar, increment_[0], kNoDexPc);
   loop_body_[0]->InsertInstructionBefore(conv, increment_[0]->GetNext());
   basic_[0]->ReplaceInput(conv, 1);
   PerformInductionVarAnalysis();
 
-  EXPECT_STREQ("((1) * i + (1)):PrimChar", GetInductionInfo(increment_[0], 0).c_str());
+  // Recorded at the phi, but not transferred to increment.
+  EXPECT_STREQ("((1) * i + (0)):PrimChar", GetInductionInfo(basic_[0], 0).c_str());
+  EXPECT_STREQ("", GetInductionInfo(increment_[0], 0).c_str());
+
+  // Narrowing detected.
+  EXPECT_TRUE(IsNarrowingLinear(basic_[0]));
+  EXPECT_FALSE(IsNarrowingLinear(increment_[0]));  // works for null
+
   // Trip-count.
   EXPECT_STREQ("((65535) (TC-loop) ((0) < (65535)))", GetTripCount(0).c_str());
 }
@@ -1206,12 +1329,20 @@
   BuildLoopNest(1);
   HInstruction* ifs = loop_header_[0]->GetLastInstruction()->GetPrevious();
   ifs->ReplaceInput(graph_->GetIntConstant(65536), 1);
-  HInstruction* conv = new(&allocator_) HTypeConversion(Primitive::kPrimChar, increment_[0], -1);
+  HInstruction* conv =
+      new (&allocator_) HTypeConversion(Primitive::kPrimChar, increment_[0], kNoDexPc);
   loop_body_[0]->InsertInstructionBefore(conv, increment_[0]->GetNext());
   basic_[0]->ReplaceInput(conv, 1);
   PerformInductionVarAnalysis();
 
-  EXPECT_STREQ("((1) * i + (1)):PrimChar", GetInductionInfo(increment_[0], 0).c_str());
+  // Recorded at the phi, but not transferred to increment.
+  EXPECT_STREQ("((1) * i + (0)):PrimChar", GetInductionInfo(basic_[0], 0).c_str());
+  EXPECT_STREQ("", GetInductionInfo(increment_[0], 0).c_str());
+
+  // Narrowing detected.
+  EXPECT_TRUE(IsNarrowingLinear(basic_[0]));
+  EXPECT_FALSE(IsNarrowingLinear(increment_[0]));  // works for null
+
   // Trip-count undefined.
   EXPECT_STREQ("", GetTripCount(0).c_str());
 }
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index 7bcc384..d5c4c2f 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -169,8 +169,8 @@
     case Primitive::kPrimByte: {
       // Constants within range only.
       // TODO: maybe some room for improvement, like allowing widening conversions
-      const int32_t min = Primitive::MinValueOfIntegralType(type);
-      const int32_t max = Primitive::MaxValueOfIntegralType(type);
+      int32_t min = Primitive::MinValueOfIntegralType(type);
+      int32_t max = Primitive::MaxValueOfIntegralType(type);
       return (IsConstantValue(v) && min <= v.b_constant && v.b_constant <= max)
           ? v
           : InductionVarRange::Value();
@@ -551,7 +551,7 @@
   int64_t b = 0;
   if (IsConstant(info->op_a->op_a, kExact, &a) && CanLongValueFitIntoInt(a) && a >= 0 &&
       IsConstant(info->op_a->op_b, kExact, &b) && CanLongValueFitIntoInt(b) && b >= 0) {
-    // Evaluate bounds on sum_i=0^m-1(a * i + b) + c with a,b >= 0 for known
+    // Evaluate bounds on sum_i=0^m-1(a * i + b) + c with a,b >= 0 for
     // maximum index value m as a * (m * (m-1)) / 2 + b * m + c.
     Value c = GetVal(info->op_b, trip, in_body, is_min);
     if (is_min) {
@@ -629,6 +629,7 @@
     }
   } else if (instruction->IsTypeConversion()) {
     // Since analysis is 32-bit (or narrower), chase beyond widening along the path.
+    // For example, this discovers the length in: for (long i = 0; i < a.length; i++);
     if (instruction->AsTypeConversion()->GetInputType() == Primitive::kPrimInt &&
         instruction->AsTypeConversion()->GetResultType() == Primitive::kPrimLong) {
       return GetFetch(instruction->InputAt(0), trip, in_body, is_min);
@@ -843,7 +844,7 @@
 
 InductionVarRange::Value InductionVarRange::AddValue(Value v1, Value v2) const {
   if (v1.is_known && v2.is_known && IsSafeAdd(v1.b_constant, v2.b_constant)) {
-    const int32_t b = v1.b_constant + v2.b_constant;
+    int32_t b = v1.b_constant + v2.b_constant;
     if (v1.a_constant == 0) {
       return Value(v2.instruction, v2.a_constant, b);
     } else if (v2.a_constant == 0) {
@@ -857,7 +858,7 @@
 
 InductionVarRange::Value InductionVarRange::SubValue(Value v1, Value v2) const {
   if (v1.is_known && v2.is_known && IsSafeSub(v1.b_constant, v2.b_constant)) {
-    const int32_t b = v1.b_constant - v2.b_constant;
+    int32_t b = v1.b_constant - v2.b_constant;
     if (v1.a_constant == 0 && IsSafeSub(0, v2.a_constant)) {
       return Value(v2.instruction, -v2.a_constant, b);
     } else if (v2.a_constant == 0) {
@@ -988,13 +989,16 @@
       IsConstant(trip->op_a, kExact, &m) && m >= 1) {
     // Evaluate bounds on sum_i=0^m-1(a * i + b) + c for known
     // maximum index value m as a * (m * (m-1)) / 2 + b * m + c.
-    // TODO: generalize
-    HInstruction* c_instr = nullptr;
-    if (GenerateCode(info->op_b, nullptr, graph, block, graph ? &c_instr : nullptr, false, false)) {
+    HInstruction* c = nullptr;
+    if (GenerateCode(info->op_b, nullptr, graph, block, graph ? &c : nullptr, false, false)) {
       if (graph != nullptr) {
+        Primitive::Type type = info->type;
         int64_t sum = a * ((m * (m - 1)) / 2) + b * m;
-        *result = Insert(block, new (graph->GetArena()) HAdd(info->type,
-                                                             graph->GetIntConstant(sum), c_instr));
+        if (type != Primitive::kPrimLong) {
+          sum = static_cast<int32_t>(sum);  // okay to truncate
+        }
+        *result =
+            Insert(block, new (graph->GetArena()) HAdd(type, graph->GetConstant(type, sum), c));
       }
       return true;
     }
@@ -1011,35 +1015,33 @@
   DCHECK_EQ(info->induction_class, HInductionVarAnalysis::kGeometric);
   // Detect known base and trip count (always taken).
   int64_t f = 0;
-  int64_t t = 0;
-  if (IsIntAndGet(info->fetch, &f) && f >= 1 && IsConstant(trip->op_a, kExact, &t) && t >= 1) {
+  int64_t m = 0;
+  if (IsIntAndGet(info->fetch, &f) && f >= 1 && IsConstant(trip->op_a, kExact, &m) && m >= 1) {
     HInstruction* opa = nullptr;
     HInstruction* opb = nullptr;
     if (GenerateCode(info->op_a, nullptr, graph, block, &opa, false, false) &&
         GenerateCode(info->op_b, nullptr, graph, block, &opb, false, false)) {
-      // Compute f ^ t.
-      int64_t fpowt = IntPow(f, t);
+      // Compute f ^ m for known maximum index value m.
+      int64_t fpow = IntPow(f, m);
       if (graph != nullptr) {
-        DCHECK(info->type == Primitive::kPrimInt);  // due to codegen, generalize?
-        if (fpowt == 0) {
+        DCHECK(info->operation == HInductionVarAnalysis::kMul ||
+               info->operation == HInductionVarAnalysis::kDiv);
+        Primitive::Type type = info->type;
+        if (fpow == 0) {
           // Special case: repeated mul/div always yields zero.
-          *result = graph->GetIntConstant(0);
-        } else if (info->operation == HInductionVarAnalysis::kMul) {
-          // Last value multiplication: a * f ^ t + b.
-          HInstruction* mul = Insert(block,
-                                     new (graph->GetArena()) HMul(info->type,
-                                                                  opa,
-                                                                  graph->GetIntConstant(fpowt)));
-          *result = Insert(block, new (graph->GetArena()) HAdd(info->type, mul, opb));
+          *result = graph->GetConstant(type, 0);
         } else {
-          // Last value multiplication: a * f ^ -t + b.
-          DCHECK_EQ(info->operation, HInductionVarAnalysis::kDiv);
-          HInstruction* div = Insert(block,
-                                     new (graph->GetArena()) HDiv(info->type,
-                                                                  opa,
-                                                                  graph->GetIntConstant(fpowt),
-                                                                  kNoDexPc));
-          *result = Insert(block, new (graph->GetArena()) HAdd(info->type, div, opb));
+          // Last value: a * f ^ m + b or a * f ^ -m + b.
+          if (type != Primitive::kPrimLong) {
+            fpow = static_cast<int32_t>(fpow);  // okay to truncate
+          }
+          HInstruction* e = nullptr;
+          if (info->operation == HInductionVarAnalysis::kMul) {
+            e = new (graph->GetArena()) HMul(type, opa, graph->GetConstant(type, fpow));
+          } else {
+            e = new (graph->GetArena()) HDiv(type, opa, graph->GetConstant(type, fpow), kNoDexPc);
+          }
+          *result = Insert(block, new (graph->GetArena()) HAdd(type, Insert(block, e), opb));
         }
       }
       return true;
@@ -1060,12 +1062,11 @@
   for (; info->induction_class == HInductionVarAnalysis::kWrapAround;
        info = info->op_b, ++depth) {}
   // Handle wrap(x, wrap(.., y)) if trip count reaches an invariant at end.
-  // TODO: generalize
-  int64_t t = 0;
+  // TODO: generalize, but be careful to adjust the terminal.
+  int64_t m = 0;
   if (info->induction_class == HInductionVarAnalysis::kInvariant &&
-      IsConstant(trip->op_a, kExact, &t) && t >= depth &&
-      GenerateCode(info, nullptr, graph, block, result, false, false)) {
-    return true;
+      IsConstant(trip->op_a, kExact, &m) && m >= depth) {
+    return GenerateCode(info, nullptr, graph, block, result, false, false);
   }
   return false;
 }
@@ -1079,43 +1080,49 @@
   DCHECK(info != nullptr);
   DCHECK_EQ(info->induction_class, HInductionVarAnalysis::kPeriodic);
   // Count period.
-  int32_t period = 1;
+  int64_t period = 1;
   for (HInductionVarAnalysis::InductionInfo* p = info;
        p->induction_class == HInductionVarAnalysis::kPeriodic;
        p = p->op_b, ++period) {}
-  // Handle periodic(x, y) case for restricted types.
-  // TODO: generalize
-  if (period != 2 ||
-      trip->op_a->type != Primitive::kPrimInt ||
-      (info->type != Primitive::kPrimInt && info->type != Primitive::kPrimBoolean)) {
-    return false;
+  // Handle any periodic(x, periodic(.., y)) for known maximum index value m.
+  int64_t m = 0;
+  if (IsConstant(trip->op_a, kExact, &m) && m >= 1) {
+    int64_t li = m % period;
+    for (int64_t i = 0; i < li; info = info->op_b, i++) {}
+    if (info->induction_class == HInductionVarAnalysis::kPeriodic) {
+      info = info->op_a;
+    }
+    return GenerateCode(info, nullptr, graph, block, result, false, false);
   }
-  HInstruction* x_instr = nullptr;
-  HInstruction* y_instr = nullptr;
-  HInstruction* trip_expr = nullptr;
-  if (GenerateCode(info->op_a, nullptr, graph, block, graph ? &x_instr   : nullptr, false, false) &&
-      GenerateCode(info->op_b, nullptr, graph, block, graph ? &y_instr   : nullptr, false, false) &&
-      GenerateCode(trip->op_a, nullptr, graph, block, graph ? &trip_expr : nullptr, false, false)) {
-    // During actual code generation (graph != nullptr),
-    // generate is_even ? x : y select instruction.
+  // Handle periodic(x, y) using even/odd-select on trip count. Enter trip count expression
+  // directly to obtain the maximum index value t even if taken test is needed.
+  HInstruction* x = nullptr;
+  HInstruction* y = nullptr;
+  HInstruction* t = nullptr;
+  if (period == 2 &&
+      GenerateCode(info->op_a, nullptr, graph, block, graph ? &x : nullptr, false, false) &&
+      GenerateCode(info->op_b, nullptr, graph, block, graph ? &y : nullptr, false, false) &&
+      GenerateCode(trip->op_a, nullptr, graph, block, graph ? &t : nullptr, false, false)) {
+    // During actual code generation (graph != nullptr), generate is_even ? x : y.
     if (graph != nullptr) {
-      HInstruction* is_even = Insert(block, new (graph->GetArena()) HEqual(
-          Insert(block, new (graph->GetArena()) HAnd(
-              Primitive::kPrimInt, trip_expr, graph->GetIntConstant(1))),
-          graph->GetIntConstant(0), kNoDexPc));
-      *result = Insert(block, new (graph->GetArena()) HSelect(is_even, x_instr, y_instr, kNoDexPc));
+      Primitive::Type type = trip->type;
+      HInstruction* msk =
+          Insert(block, new (graph->GetArena()) HAnd(type, t, graph->GetConstant(type, 1)));
+      HInstruction* is_even =
+          Insert(block, new (graph->GetArena()) HEqual(msk, graph->GetConstant(type, 0), kNoDexPc));
+      *result = Insert(block, new (graph->GetArena()) HSelect(is_even, x, y, kNoDexPc));
     }
     // Guard select with taken test if needed.
     if (*needs_taken_test) {
-      HInstruction* taken_test = nullptr;
-      if (!GenerateCode(
-          trip->op_b, nullptr, graph, block, graph ? &taken_test : nullptr, false, false)) {
+      HInstruction* is_taken = nullptr;
+      if (GenerateCode(trip->op_b, nullptr, graph, block, graph ? &is_taken : nullptr, false, false)) {
+        if (graph != nullptr) {
+          *result = Insert(block, new (graph->GetArena()) HSelect(is_taken, *result, x, kNoDexPc));
+        }
+        *needs_taken_test = false;  // taken care of
+      } else {
         return false;
-      } else if (graph != nullptr) {
-         *result = Insert(block,
-                          new (graph->GetArena()) HSelect(taken_test, *result, x_instr, kNoDexPc));
       }
-      *needs_taken_test = false;  // taken care of
     }
     return true;
   }
@@ -1134,13 +1141,8 @@
     if (graph != nullptr && result == nullptr) {
       return true;
     }
-    // Verify type safety.
-    // TODO: generalize
-    Primitive::Type type = Primitive::kPrimInt;
-    if (info->type != Primitive::kPrimInt && info->type != Primitive::kPrimBoolean) {
-      return false;
-    }
     // Handle current operation.
+    Primitive::Type type = info->type;
     HInstruction* opa = nullptr;
     HInstruction* opb = nullptr;
     switch (info->induction_class) {
@@ -1214,15 +1216,15 @@
           case HInductionVarAnalysis::kTripCountInBodyUnsafe:
             if (is_min) {
               if (graph != nullptr) {
-                *result = graph->GetIntConstant(0);
+                *result = graph->GetConstant(type, 0);
               }
               return true;
             } else if (in_body) {
               if (GenerateCode(info->op_a, trip, graph, block, &opb, in_body, is_min)) {
                 if (graph != nullptr) {
-                  *result = Insert(block,
-                                   new (graph->GetArena())
-                                       HSub(type, opb, graph->GetIntConstant(1)));
+                  *result =
+                      Insert(block,
+                             new (graph->GetArena()) HSub(type, opb, graph->GetConstant(type, 1)));
                 }
                 return true;
               }
@@ -1236,26 +1238,31 @@
         // Linear induction a * i + b, for normalized 0 <= i < TC. For ranges, this should
         // be restricted to a unit stride to avoid arithmetic wrap-around situations that
         // are harder to guard against. For a last value, requesting min/max based on any
-        // stride yields right value.
-        int64_t stride_value = 0;
-        if (IsConstant(info->op_a, kExact, &stride_value)) {
-          const bool is_min_a = stride_value >= 0 ? is_min : !is_min;
-          if (GenerateCode(trip,       trip, graph, block, &opa, in_body, is_min_a) &&
-              GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) {
-            if (graph != nullptr) {
-              HInstruction* oper;
-              if (stride_value == 1) {
-                oper = new (graph->GetArena()) HAdd(type, opa, opb);
-              } else if (stride_value == -1) {
-                oper = new (graph->GetArena()) HSub(type, opb, opa);
-              } else {
-                HInstruction* mul = new (graph->GetArena()) HMul(
-                    type, graph->GetIntConstant(stride_value), opa);
-                oper = new (graph->GetArena()) HAdd(type, Insert(block, mul), opb);
+        // known stride yields right value. Always avoid any narrowing linear induction or
+        // any type mismatch between the linear induction and the trip count expression.
+        // TODO: careful runtime type conversions could generalize this latter restriction.
+        if (!HInductionVarAnalysis::IsNarrowingLinear(info) && trip->type == type) {
+          int64_t stride_value = 0;
+          if (IsConstant(info->op_a, kExact, &stride_value) &&
+              CanLongValueFitIntoInt(stride_value)) {
+            const bool is_min_a = stride_value >= 0 ? is_min : !is_min;
+            if (GenerateCode(trip,       trip, graph, block, &opa, in_body, is_min_a) &&
+                GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) {
+              if (graph != nullptr) {
+                HInstruction* oper;
+                if (stride_value == 1) {
+                  oper = new (graph->GetArena()) HAdd(type, opa, opb);
+                } else if (stride_value == -1) {
+                  oper = new (graph->GetArena()) HSub(type, opb, opa);
+                } else {
+                  HInstruction* mul =
+                      new (graph->GetArena()) HMul(type, graph->GetConstant(type, stride_value), opa);
+                  oper = new (graph->GetArena()) HAdd(type, Insert(block, mul), opb);
+                }
+                *result = Insert(block, oper);
               }
-              *result = Insert(block, oper);
+              return true;
             }
-            return true;
           }
         }
         break;
@@ -1270,7 +1277,7 @@
         Value extreme = GetVal(info, trip, in_body, is_min);
         if (IsConstantValue(extreme)) {
           if (graph != nullptr) {
-            *result = graph->GetIntConstant(extreme.b_constant);
+            *result = graph->GetConstant(type, extreme.b_constant);
           }
           return true;
         }
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index fe4662a..d847879 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -480,13 +480,11 @@
   }
 
   // We successfully inlined, now add a guard.
-  bool is_referrer =
-      (GetMonomorphicType(classes) == outermost_graph_->GetArtMethod()->GetDeclaringClass());
   AddTypeGuard(receiver,
                cursor,
                bb_cursor,
                class_index,
-               is_referrer,
+               GetMonomorphicType(classes),
                invoke_instruction,
                /* with_deoptimization */ true);
 
@@ -506,52 +504,62 @@
                            uint32_t dex_pc,
                            HInstruction* cursor,
                            HBasicBlock* bb_cursor) {
-  HInstruction* deopt_flag = new (graph_->GetArena()) HShouldDeoptimizeFlag(dex_pc);
-  HInstruction* should_deopt = new (graph_->GetArena()) HNotEqual(
+  HShouldDeoptimizeFlag* deopt_flag = new (graph_->GetArena())
+      HShouldDeoptimizeFlag(graph_->GetArena(), dex_pc);
+  HInstruction* compare = new (graph_->GetArena()) HNotEqual(
       deopt_flag, graph_->GetIntConstant(0, dex_pc));
-  HInstruction* deopt = new (graph_->GetArena()) HDeoptimize(should_deopt, dex_pc);
+  HInstruction* deopt = new (graph_->GetArena()) HDeoptimize(compare, dex_pc);
 
   if (cursor != nullptr) {
     bb_cursor->InsertInstructionAfter(deopt_flag, cursor);
   } else {
     bb_cursor->InsertInstructionBefore(deopt_flag, bb_cursor->GetFirstInstruction());
   }
-  bb_cursor->InsertInstructionAfter(should_deopt, deopt_flag);
-  bb_cursor->InsertInstructionAfter(deopt, should_deopt);
+  bb_cursor->InsertInstructionAfter(compare, deopt_flag);
+  bb_cursor->InsertInstructionAfter(deopt, compare);
+
+  // Add receiver as input to aid CHA guard optimization later.
+  deopt_flag->AddInput(invoke_instruction->InputAt(0));
+  DCHECK_EQ(deopt_flag->InputCount(), 1u);
   deopt->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+  outermost_graph_->IncrementNumberOfCHAGuards();
 }
 
 HInstruction* HInliner::AddTypeGuard(HInstruction* receiver,
                                      HInstruction* cursor,
                                      HBasicBlock* bb_cursor,
                                      dex::TypeIndex class_index,
-                                     bool is_referrer,
+                                     mirror::Class* klass,
                                      HInstruction* invoke_instruction,
                                      bool with_deoptimization) {
+  ScopedAssertNoThreadSuspension sants("Adding compiler type guard");
+
   ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
   HInstanceFieldGet* receiver_class = BuildGetReceiverClass(
       class_linker, receiver, invoke_instruction->GetDexPc());
-
-  const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
-  // Note that we will just compare the classes, so we don't need Java semantics access checks.
-  // Also, the caller of `AddTypeGuard` must have guaranteed that the class is in the dex cache.
-  HLoadClass* load_class = new (graph_->GetArena()) HLoadClass(graph_->GetCurrentMethod(),
-                                                               class_index,
-                                                               caller_dex_file,
-                                                               is_referrer,
-                                                               invoke_instruction->GetDexPc(),
-                                                               /* needs_access_check */ false,
-                                                               /* is_in_dex_cache */ true,
-                                                               /* is_in_boot_image */ false);
-
-  HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, receiver_class);
-  // TODO: Extend reference type propagation to understand the guard.
   if (cursor != nullptr) {
     bb_cursor->InsertInstructionAfter(receiver_class, cursor);
   } else {
     bb_cursor->InsertInstructionBefore(receiver_class, bb_cursor->GetFirstInstruction());
   }
+
+  const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
+  bool is_referrer = (klass == outermost_graph_->GetArtMethod()->GetDeclaringClass());
+  // Note that we will just compare the classes, so we don't need Java semantics access checks.
+  // Note that the type index and the dex file are relative to the method this type guard is
+  // inlined into.
+  HLoadClass* load_class = new (graph_->GetArena()) HLoadClass(graph_->GetCurrentMethod(),
+                                                               class_index,
+                                                               caller_dex_file,
+                                                               is_referrer,
+                                                               invoke_instruction->GetDexPc(),
+                                                               /* needs_access_check */ false);
   bb_cursor->InsertInstructionAfter(load_class, receiver_class);
+  // Sharpen after adding the instruction, as the sharpening may remove inputs.
+  HSharpening::SharpenClass(load_class, klass, handles_, codegen_, compiler_driver_);
+
+  // TODO: Extend reference type propagation to understand the guard.
+  HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, receiver_class);
   bb_cursor->InsertInstructionAfter(compare, load_class);
   if (with_deoptimization) {
     HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
@@ -604,7 +612,6 @@
       all_targets_inlined = false;
     } else {
       one_target_inlined = true;
-      bool is_referrer = (classes->Get(i) == outermost_graph_->GetArtMethod()->GetDeclaringClass());
 
       // If we have inlined all targets before, and this receiver is the last seen,
       // we deoptimize instead of keeping the original invoke instruction.
@@ -616,8 +623,13 @@
         // We do not support HDeoptimize in OSR methods.
         deoptimize = false;
       }
-      HInstruction* compare = AddTypeGuard(
-          receiver, cursor, bb_cursor, class_index, is_referrer, invoke_instruction, deoptimize);
+      HInstruction* compare = AddTypeGuard(receiver,
+                                           cursor,
+                                           bb_cursor,
+                                           class_index,
+                                           classes->Get(i),
+                                           invoke_instruction,
+                                           deoptimize);
       if (deoptimize) {
         if (return_replacement != nullptr) {
           invoke_instruction->ReplaceWith(return_replacement);
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index ffebd97..0c64362 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -167,7 +167,7 @@
                              HInstruction* cursor,
                              HBasicBlock* bb_cursor,
                              dex::TypeIndex class_index,
-                             bool is_referrer,
+                             mirror::Class* klass,
                              HInstruction* invoke_instruction,
                              bool with_deoptimization)
     REQUIRES_SHARED(Locks::mutator_lock_);
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index b97581b..1ca3218 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -937,9 +937,7 @@
       outer_dex_file,
       IsOutermostCompilingClass(type_index),
       dex_pc,
-      needs_access_check,
-      /* is_in_dex_cache */ false,
-      /* is_in_boot_image */ false);
+      needs_access_check);
 
   AppendInstruction(load_class);
   HInstruction* cls = load_class;
@@ -1029,9 +1027,7 @@
         outer_dex_file,
         is_outer_class,
         dex_pc,
-        /*needs_access_check*/ false,
-        /* is_in_dex_cache */ false,
-        /* is_in_boot_image */ false);
+        /*needs_access_check*/ false);
     AppendInstruction(load_class);
     clinit_check = new (arena_) HClinitCheck(load_class, dex_pc);
     AppendInstruction(clinit_check);
@@ -1388,9 +1384,7 @@
                                                  outer_dex_file,
                                                  is_outer_class,
                                                  dex_pc,
-                                                 /*needs_access_check*/ false,
-                                                 /* is_in_dex_cache */ false,
-                                                 /* is_in_boot_image */ false);
+                                                 /*needs_access_check*/ false);
   AppendInstruction(constant);
 
   HInstruction* cls = constant;
@@ -1664,9 +1658,7 @@
       dex_file,
       IsOutermostCompilingClass(type_index),
       dex_pc,
-      !can_access,
-      /* is_in_dex_cache */ false,
-      /* is_in_boot_image */ false);
+      !can_access);
   AppendInstruction(cls);
 
   TypeCheckKind check_kind = ComputeTypeCheckKind(resolved_class);
@@ -2656,9 +2648,7 @@
           *dex_file_,
           IsOutermostCompilingClass(type_index),
           dex_pc,
-          !can_access,
-          /* is_in_dex_cache */ false,
-          /* is_in_boot_image */ false));
+          !can_access));
       UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction());
       break;
     }
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 95551c8..641a5c9 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -1509,7 +1509,7 @@
   SlowPathCodeARMVIXL* slow_path = nullptr;
   HInstruction* code_point = invoke->InputAt(1);
   if (code_point->IsIntConstant()) {
-    if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
+    if (static_cast<uint32_t>(Int32ConstantFrom(code_point)) >
         std::numeric_limits<uint16_t>::max()) {
       // Always needs the slow-path. We could directly dispatch to it, but this case should be
       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 1e946d6..b9e284f 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -1108,13 +1108,23 @@
   return HasEnvironment() ? environment_->Size() : 0;
 }
 
-void HPhi::AddInput(HInstruction* input) {
+void HVariableInputSizeInstruction::AddInput(HInstruction* input) {
   DCHECK(input->GetBlock() != nullptr);
   inputs_.push_back(HUserRecord<HInstruction*>(input));
   input->AddUseAt(this, inputs_.size() - 1);
 }
 
-void HPhi::RemoveInputAt(size_t index) {
+void HVariableInputSizeInstruction::InsertInputAt(size_t index, HInstruction* input) {
+  inputs_.insert(inputs_.begin() + index, HUserRecord<HInstruction*>(input));
+  input->AddUseAt(this, index);
+  // Update indexes in use nodes of inputs that have been pushed further back by the insert().
+  for (size_t i = index + 1u, e = inputs_.size(); i < e; ++i) {
+    DCHECK_EQ(inputs_[i].GetUseNode()->GetIndex(), i - 1u);
+    inputs_[i].GetUseNode()->SetIndex(i);
+  }
+}
+
+void HVariableInputSizeInstruction::RemoveInputAt(size_t index) {
   RemoveAsUserOfInput(index);
   inputs_.erase(inputs_.begin() + index);
   // Update indexes in use nodes of inputs that have been pulled forward by the erase().
@@ -1347,7 +1357,9 @@
 void HInstruction::MoveBefore(HInstruction* cursor) {
   DCHECK(!IsPhi());
   DCHECK(!IsControlFlow());
-  DCHECK(CanBeMoved());
+  DCHECK(CanBeMoved() ||
+         // HShouldDeoptimizeFlag can only be moved by CHAGuardOptimization.
+         IsShouldDeoptimizeFlag());
   DCHECK(!cursor->IsPhi());
 
   next_->previous_ = previous_;
@@ -2386,26 +2398,6 @@
   return !opt.GetDoesNotNeedDexCache();
 }
 
-void HInvokeStaticOrDirect::InsertInputAt(size_t index, HInstruction* input) {
-  inputs_.insert(inputs_.begin() + index, HUserRecord<HInstruction*>(input));
-  input->AddUseAt(this, index);
-  // Update indexes in use nodes of inputs that have been pushed further back by the insert().
-  for (size_t i = index + 1u, e = inputs_.size(); i < e; ++i) {
-    DCHECK_EQ(inputs_[i].GetUseNode()->GetIndex(), i - 1u);
-    inputs_[i].GetUseNode()->SetIndex(i);
-  }
-}
-
-void HInvokeStaticOrDirect::RemoveInputAt(size_t index) {
-  RemoveAsUserOfInput(index);
-  inputs_.erase(inputs_.begin() + index);
-  // Update indexes in use nodes of inputs that have been pulled forward by the erase().
-  for (size_t i = index, e = inputs_.size(); i < e; ++i) {
-    DCHECK_EQ(inputs_[i].GetUseNode()->GetIndex(), i + 1u);
-    inputs_[i].GetUseNode()->SetIndex(i);
-  }
-}
-
 std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs) {
   switch (rhs) {
     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 4a8cfcb..1f0c8e8 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -330,6 +330,7 @@
         invoke_type_(invoke_type),
         in_ssa_form_(false),
         should_generate_constructor_barrier_(should_generate_constructor_barrier),
+        number_of_cha_guards_(0),
         instruction_set_(instruction_set),
         cached_null_constant_(nullptr),
         cached_int_constants_(std::less<int32_t>(), arena->Adapter(kArenaAllocConstantsMap)),
@@ -551,9 +552,7 @@
   }
 
   bool HasShouldDeoptimizeFlag() const {
-    // TODO: if all CHA guards can be eliminated, there is no need for the flag
-    // even if cha_single_implementation_list_ is not empty.
-    return !cha_single_implementation_list_.empty();
+    return number_of_cha_guards_ != 0;
   }
 
   bool HasTryCatch() const { return has_try_catch_; }
@@ -572,6 +571,10 @@
 
   ReferenceTypeInfo GetInexactObjectRti() const { return inexact_object_rti_; }
 
+  uint32_t GetNumberOfCHAGuards() { return number_of_cha_guards_; }
+  void SetNumberOfCHAGuards(uint32_t num) { number_of_cha_guards_ = num; }
+  void IncrementNumberOfCHAGuards() { number_of_cha_guards_++; }
+
  private:
   void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const;
   void RemoveDeadBlocks(const ArenaBitVector& visited);
@@ -667,6 +670,10 @@
 
   const bool should_generate_constructor_barrier_;
 
+  // Number of CHA guards in the graph. Used to short-circuit the
+  // CHA guard optimization pass when there is no CHA guard left.
+  uint32_t number_of_cha_guards_;
+
   const InstructionSet instruction_set_;
 
   // Cached constants.
@@ -2347,6 +2354,32 @@
   DISALLOW_COPY_AND_ASSIGN(HBackwardInstructionIterator);
 };
 
+class HVariableInputSizeInstruction : public HInstruction {
+ public:
+  using HInstruction::GetInputRecords;  // Keep the const version visible.
+  ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE {
+    return ArrayRef<HUserRecord<HInstruction*>>(inputs_);
+  }
+
+  void AddInput(HInstruction* input);
+  void InsertInputAt(size_t index, HInstruction* input);
+  void RemoveInputAt(size_t index);
+
+ protected:
+  HVariableInputSizeInstruction(SideEffects side_effects,
+                                uint32_t dex_pc,
+                                ArenaAllocator* arena,
+                                size_t number_of_inputs,
+                                ArenaAllocKind kind)
+      : HInstruction(side_effects, dex_pc),
+        inputs_(number_of_inputs, arena->Adapter(kind)) {}
+
+  ArenaVector<HUserRecord<HInstruction*>> inputs_;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HVariableInputSizeInstruction);
+};
+
 template<size_t N>
 class HTemplateInstruction: public HInstruction {
  public:
@@ -2438,15 +2471,19 @@
   DISALLOW_COPY_AND_ASSIGN(HReturn);
 };
 
-class HPhi FINAL : public HInstruction {
+class HPhi FINAL : public HVariableInputSizeInstruction {
  public:
   HPhi(ArenaAllocator* arena,
        uint32_t reg_number,
        size_t number_of_inputs,
        Primitive::Type type,
        uint32_t dex_pc = kNoDexPc)
-      : HInstruction(SideEffects::None(), dex_pc),
-        inputs_(number_of_inputs, arena->Adapter(kArenaAllocPhiInputs)),
+      : HVariableInputSizeInstruction(
+            SideEffects::None(),
+            dex_pc,
+            arena,
+            number_of_inputs,
+            kArenaAllocPhiInputs),
         reg_number_(reg_number) {
     SetPackedField<TypeField>(ToPhiType(type));
     DCHECK_NE(GetType(), Primitive::kPrimVoid);
@@ -2464,14 +2501,6 @@
 
   bool IsCatchPhi() const { return GetBlock()->IsCatchBlock(); }
 
-  using HInstruction::GetInputRecords;  // Keep the const version visible.
-  ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL {
-    return ArrayRef<HUserRecord<HInstruction*>>(inputs_);
-  }
-
-  void AddInput(HInstruction* input);
-  void RemoveInputAt(size_t index);
-
   Primitive::Type GetType() const OVERRIDE { return GetPackedField<TypeField>(); }
   void SetType(Primitive::Type new_type) {
     // Make sure that only valid type changes occur. The following are allowed:
@@ -2527,7 +2556,6 @@
   static_assert(kNumberOfPhiPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
   using TypeField = BitField<Primitive::Type, kFieldType, kFieldTypeSize>;
 
-  ArenaVector<HUserRecord<HInstruction*>> inputs_;
   const uint32_t reg_number_;
 
   DISALLOW_COPY_AND_ASSIGN(HPhi);
@@ -2904,14 +2932,20 @@
 // if it's true, starts to do deoptimization.
 // It has a 4-byte slot on stack.
 // TODO: allocate a register for this flag.
-class HShouldDeoptimizeFlag FINAL : public HExpression<0> {
+class HShouldDeoptimizeFlag FINAL : public HVariableInputSizeInstruction {
  public:
-  // TODO: use SideEffects to aid eliminating some CHA guards.
-  explicit HShouldDeoptimizeFlag(uint32_t dex_pc)
-      : HExpression(Primitive::kPrimInt, SideEffects::None(), dex_pc) {
+  // CHA guards are only optimized in a separate pass and it has no side effects
+  // with regard to other passes.
+  HShouldDeoptimizeFlag(ArenaAllocator* arena, uint32_t dex_pc)
+      : HVariableInputSizeInstruction(SideEffects::None(), dex_pc, arena, 0, kArenaAllocCHA) {
   }
 
-  // We don't eliminate CHA guards yet.
+  Primitive::Type GetType() const OVERRIDE { return Primitive::kPrimInt; }
+
+  // We do all CHA guard elimination/motion in a single pass, after which there is no
+  // further guard elimination/motion since a guard might have been used for justification
+  // of the elimination of another guard. Therefore, we pretend this guard cannot be moved
+  // to avoid other optimizations trying to move it.
   bool CanBeMoved() const OVERRIDE { return false; }
 
   DECLARE_INSTRUCTION(ShouldDeoptimizeFlag);
@@ -3791,15 +3825,10 @@
   kCanThrow  // Intrinsic may throw exceptions.
 };
 
-class HInvoke : public HInstruction {
+class HInvoke : public HVariableInputSizeInstruction {
  public:
   bool NeedsEnvironment() const OVERRIDE;
 
-  using HInstruction::GetInputRecords;  // Keep the const version visible.
-  ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE {
-    return ArrayRef<HUserRecord<HInstruction*>>(inputs_);
-  }
-
   void SetArgumentAt(size_t index, HInstruction* argument) {
     SetRawInputAt(index, argument);
   }
@@ -3878,12 +3907,14 @@
           uint32_t dex_method_index,
           ArtMethod* resolved_method,
           InvokeType invoke_type)
-    : HInstruction(
-          SideEffects::AllExceptGCDependency(), dex_pc),  // Assume write/read on all fields/arrays.
+    : HVariableInputSizeInstruction(
+          SideEffects::AllExceptGCDependency(),  // Assume write/read on all fields/arrays.
+          dex_pc,
+          arena,
+          number_of_arguments + number_of_other_inputs,
+          kArenaAllocInvokeInputs),
       number_of_arguments_(number_of_arguments),
       resolved_method_(resolved_method),
-      inputs_(number_of_arguments + number_of_other_inputs,
-              arena->Adapter(kArenaAllocInvokeInputs)),
       dex_method_index_(dex_method_index),
       intrinsic_(Intrinsics::kNone),
       intrinsic_optimizations_(0) {
@@ -3894,7 +3925,6 @@
 
   uint32_t number_of_arguments_;
   ArtMethod* const resolved_method_;
-  ArenaVector<HUserRecord<HInstruction*>> inputs_;
   const uint32_t dex_method_index_;
   Intrinsics intrinsic_;
 
@@ -4184,10 +4214,6 @@
 
   DECLARE_INSTRUCTION(InvokeStaticOrDirect);
 
- protected:
-  void InsertInputAt(size_t index, HInstruction* input);
-  void RemoveInputAt(size_t index);
-
  private:
   static constexpr size_t kFieldClinitCheckRequirement = kNumberOfInvokePackedBits;
   static constexpr size_t kFieldClinitCheckRequirementSize =
@@ -5519,9 +5545,7 @@
              const DexFile& dex_file,
              bool is_referrers_class,
              uint32_t dex_pc,
-             bool needs_access_check,
-             bool is_in_dex_cache,
-             bool is_in_boot_image)
+             bool needs_access_check)
       : HInstruction(SideEffectsForArchRuntimeCalls(), dex_pc),
         special_input_(HUserRecord<HInstruction*>(current_method)),
         type_index_(type_index),
@@ -5534,8 +5558,8 @@
     SetPackedField<LoadKindField>(
         is_referrers_class ? LoadKind::kReferrersClass : LoadKind::kDexCacheViaMethod);
     SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check);
-    SetPackedFlag<kFlagIsInDexCache>(is_in_dex_cache);
-    SetPackedFlag<kFlagIsInBootImage>(is_in_boot_image);
+    SetPackedFlag<kFlagIsInDexCache>(false);
+    SetPackedFlag<kFlagIsInBootImage>(false);
     SetPackedFlag<kFlagGenerateClInitCheck>(false);
   }
 
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index ba7012a..4bf5b08 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -54,6 +54,7 @@
 #include "base/timing_logger.h"
 #include "bounds_check_elimination.h"
 #include "builder.h"
+#include "cha_guard_optimization.h"
 #include "code_generator.h"
 #include "compiled_method.h"
 #include "compiler.h"
@@ -517,6 +518,8 @@
     return new (arena) SideEffectsAnalysis(graph);
   } else if (opt_name == HLoopOptimization::kLoopOptimizationPassName) {
     return new (arena) HLoopOptimization(graph, most_recent_induction);
+  } else if (opt_name == CHAGuardOptimization::kCHAGuardOptimizationPassName) {
+    return new (arena) CHAGuardOptimization(graph);
 #ifdef ART_ENABLE_CODEGEN_arm
   } else if (opt_name == arm::DexCacheArrayFixups::kDexCacheArrayFixupsArmPassName) {
     return new (arena) arm::DexCacheArrayFixups(graph, codegen, stats);
@@ -779,6 +782,7 @@
   InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier(
       graph, stats, "instruction_simplifier$before_codegen");
   IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, stats);
+  CHAGuardOptimization* cha_guard = new (arena) CHAGuardOptimization(graph);
 
   HOptimization* optimizations1[] = {
     intrinsics,
@@ -807,6 +811,7 @@
     fold3,  // evaluates code generated by dynamic bce
     simplify3,
     lse,
+    cha_guard,
     dce3,
     // The codegen has a few assumptions that only the instruction simplifier
     // can satisfy. For example, the code generator does not expect to see a
@@ -1193,7 +1198,7 @@
   }
   uint8_t* stack_map_data = nullptr;
   uint8_t* roots_data = nullptr;
-  code_cache->ReserveData(
+  uint32_t data_size = code_cache->ReserveData(
       self, stack_map_size, number_of_roots, method, &stack_map_data, &roots_data);
   if (stack_map_data == nullptr || roots_data == nullptr) {
     return false;
@@ -1212,6 +1217,7 @@
       codegen->GetFpuSpillMask(),
       code_allocator.GetMemory().data(),
       code_allocator.GetSize(),
+      data_size,
       osr,
       roots,
       codegen->GetGraph()->HasShouldDeoptimizeFlag(),
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index 91efb80..91826cf 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -140,6 +140,25 @@
 }
 
 void HSharpening::ProcessLoadClass(HLoadClass* load_class) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<1> hs(soa.Self());
+  Runtime* runtime = Runtime::Current();
+  ClassLinker* class_linker = runtime->GetClassLinker();
+  const DexFile& dex_file = load_class->GetDexFile();
+  dex::TypeIndex type_index = load_class->GetTypeIndex();
+  Handle<mirror::DexCache> dex_cache = IsSameDexFile(dex_file, *compilation_unit_.GetDexFile())
+      ? compilation_unit_.GetDexCache()
+      : hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file));
+  mirror::Class* cls = dex_cache->GetResolvedType(type_index);
+  SharpenClass(load_class, cls, handles_, codegen_, compiler_driver_);
+}
+
+void HSharpening::SharpenClass(HLoadClass* load_class,
+                               mirror::Class* klass,
+                               VariableSizedHandleScope* handles,
+                               CodeGenerator* codegen,
+                               CompilerDriver* compiler_driver) {
+  ScopedAssertNoThreadSuspension sants("Sharpening class in compiler");
   DCHECK(load_class->GetLoadKind() == HLoadClass::LoadKind::kDexCacheViaMethod ||
          load_class->GetLoadKind() == HLoadClass::LoadKind::kReferrersClass)
       << load_class->GetLoadKind();
@@ -153,69 +172,60 @@
   bool is_in_boot_image = false;
   HLoadClass::LoadKind desired_load_kind = static_cast<HLoadClass::LoadKind>(-1);
   uint64_t address = 0u;  // Class or dex cache element address.
-  {
-    ScopedObjectAccess soa(Thread::Current());
-    StackHandleScope<1> hs(soa.Self());
-    Runtime* runtime = Runtime::Current();
-    ClassLinker* class_linker = runtime->GetClassLinker();
-    Handle<mirror::DexCache> dex_cache = IsSameDexFile(dex_file, *compilation_unit_.GetDexFile())
-        ? compilation_unit_.GetDexCache()
-        : hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file));
-    mirror::Class* klass = dex_cache->GetResolvedType(type_index);
-    if (codegen_->GetCompilerOptions().IsBootImage()) {
-      // Compiling boot image. Check if the class is a boot image class.
-      DCHECK(!runtime->UseJitCompilation());
-      if (!compiler_driver_->GetSupportBootImageFixup()) {
-        // MIPS64 or compiler_driver_test. Do not sharpen.
-        desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod;
-      } else if ((klass != nullptr) && compiler_driver_->IsImageClass(
-          dex_file.StringDataByIdx(dex_file.GetTypeId(type_index).descriptor_idx_))) {
-        is_in_boot_image = true;
-        is_in_dex_cache = true;
-        desired_load_kind = codegen_->GetCompilerOptions().GetCompilePic()
-            ? HLoadClass::LoadKind::kBootImageLinkTimePcRelative
-            : HLoadClass::LoadKind::kBootImageLinkTimeAddress;
-      } else {
-        // Not a boot image class. We must go through the dex cache.
-        DCHECK(ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file));
-        desired_load_kind = HLoadClass::LoadKind::kDexCachePcRelative;
-      }
+  Runtime* runtime = Runtime::Current();
+  if (codegen->GetCompilerOptions().IsBootImage()) {
+    // Compiling boot image. Check if the class is a boot image class.
+    DCHECK(!runtime->UseJitCompilation());
+    if (!compiler_driver->GetSupportBootImageFixup()) {
+      // MIPS64 or compiler_driver_test. Do not sharpen.
+      desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod;
+    } else if ((klass != nullptr) && compiler_driver->IsImageClass(
+        dex_file.StringDataByIdx(dex_file.GetTypeId(type_index).descriptor_idx_))) {
+      is_in_boot_image = true;
+      is_in_dex_cache = true;
+      desired_load_kind = codegen->GetCompilerOptions().GetCompilePic()
+          ? HLoadClass::LoadKind::kBootImageLinkTimePcRelative
+          : HLoadClass::LoadKind::kBootImageLinkTimeAddress;
     } else {
-      is_in_boot_image = (klass != nullptr) && runtime->GetHeap()->ObjectIsInBootImageSpace(klass);
-      if (runtime->UseJitCompilation()) {
-        // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus.
-        // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
-        is_in_dex_cache = (klass != nullptr);
-        if (is_in_boot_image) {
-          // TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787
-          desired_load_kind = HLoadClass::LoadKind::kBootImageAddress;
-          address = reinterpret_cast64<uint64_t>(klass);
-        } else if (is_in_dex_cache) {
-          desired_load_kind = HLoadClass::LoadKind::kJitTableAddress;
-          // We store in the address field the location of the stack reference maintained
-          // by the handle. We do this now so that the code generation does not need to figure
-          // out which class loader to use.
-          address = reinterpret_cast<uint64_t>(handles_->NewHandle(klass).GetReference());
-        } else {
-          // Class not loaded yet. This happens when the dex code requesting
-          // this `HLoadClass` hasn't been executed in the interpreter.
-          // Fallback to the dex cache.
-          // TODO(ngeoffray): Generate HDeoptimize instead.
-          desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod;
-        }
-      } else if (is_in_boot_image && !codegen_->GetCompilerOptions().GetCompilePic()) {
-        // AOT app compilation. Check if the class is in the boot image.
+      // Not a boot image class. We must go through the dex cache.
+      DCHECK(ContainsElement(compiler_driver->GetDexFilesForOatFile(), &dex_file));
+      desired_load_kind = HLoadClass::LoadKind::kDexCachePcRelative;
+    }
+  } else {
+    is_in_boot_image = (klass != nullptr) && runtime->GetHeap()->ObjectIsInBootImageSpace(klass);
+    if (runtime->UseJitCompilation()) {
+      // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus.
+      // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
+      is_in_dex_cache = (klass != nullptr);
+      if (is_in_boot_image) {
+        // TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787
         desired_load_kind = HLoadClass::LoadKind::kBootImageAddress;
         address = reinterpret_cast64<uint64_t>(klass);
+      } else if (is_in_dex_cache) {
+        desired_load_kind = HLoadClass::LoadKind::kJitTableAddress;
+        // We store in the address field the location of the stack reference maintained
+        // by the handle. We do this now so that the code generation does not need to figure
+        // out which class loader to use.
+        address = reinterpret_cast<uint64_t>(handles->NewHandle(klass).GetReference());
       } else {
-        // Not JIT and either the klass is not in boot image or we are compiling in PIC mode.
-        // Use PC-relative load from the dex cache if the dex file belongs
-        // to the oat file that we're currently compiling.
-        desired_load_kind =
-            ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &load_class->GetDexFile())
-                ? HLoadClass::LoadKind::kDexCachePcRelative
-                : HLoadClass::LoadKind::kDexCacheViaMethod;
+        // Class not loaded yet. This happens when the dex code requesting
+        // this `HLoadClass` hasn't been executed in the interpreter.
+        // Fallback to the dex cache.
+        // TODO(ngeoffray): Generate HDeoptimize instead.
+        desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod;
       }
+    } else if (is_in_boot_image && !codegen->GetCompilerOptions().GetCompilePic()) {
+      // AOT app compilation. Check if the class is in the boot image.
+      desired_load_kind = HLoadClass::LoadKind::kBootImageAddress;
+      address = reinterpret_cast64<uint64_t>(klass);
+    } else {
+      // Not JIT and either the klass is not in boot image or we are compiling in PIC mode.
+      // Use PC-relative load from the dex cache if the dex file belongs
+      // to the oat file that we're currently compiling.
+      desired_load_kind =
+          ContainsElement(compiler_driver->GetDexFilesForOatFile(), &load_class->GetDexFile())
+              ? HLoadClass::LoadKind::kDexCachePcRelative
+              : HLoadClass::LoadKind::kDexCacheViaMethod;
     }
   }
   DCHECK_NE(desired_load_kind, static_cast<HLoadClass::LoadKind>(-1));
@@ -241,7 +251,7 @@
     load_class->MarkInDexCache();
   }
 
-  HLoadClass::LoadKind load_kind = codegen_->GetSupportedLoadClassKind(desired_load_kind);
+  HLoadClass::LoadKind load_kind = codegen->GetSupportedLoadClassKind(desired_load_kind);
   switch (load_kind) {
     case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
@@ -254,7 +264,7 @@
       load_class->SetLoadKindWithAddress(load_kind, address);
       break;
     case HLoadClass::LoadKind::kDexCachePcRelative: {
-      PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
+      PointerSize pointer_size = InstructionSetPointerSize(codegen->GetInstructionSet());
       DexCacheArraysLayout layout(pointer_size, &dex_file);
       size_t element_index = layout.TypeOffset(type_index);
       load_class->SetLoadKindWithDexCacheReference(load_kind, dex_file, element_index);
diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h
index 7418954..ae5ccb3 100644
--- a/compiler/optimizing/sharpening.h
+++ b/compiler/optimizing/sharpening.h
@@ -47,6 +47,14 @@
 
   static constexpr const char* kSharpeningPassName = "sharpening";
 
+  // Used internally but also by the inliner.
+  static void SharpenClass(HLoadClass* load_class,
+                           mirror::Class* klass,
+                           VariableSizedHandleScope* handles,
+                           CodeGenerator* codegen,
+                           CompilerDriver* compiler_driver)
+    REQUIRES_SHARED(Locks::mutator_lock_);
+
  private:
   void ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke);
   void ProcessLoadClass(HLoadClass* load_class);
diff --git a/compiler/utils/arm/assembler_arm_vixl.cc b/compiler/utils/arm/assembler_arm_vixl.cc
index 76a94e8..453c90a 100644
--- a/compiler/utils/arm/assembler_arm_vixl.cc
+++ b/compiler/utils/arm/assembler_arm_vixl.cc
@@ -479,6 +479,5 @@
   MacroAssembler::B(cond, label);
 }
 
-
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/utils/arm/assembler_arm_vixl.h b/compiler/utils/arm/assembler_arm_vixl.h
index 17cf106..5661249 100644
--- a/compiler/utils/arm/assembler_arm_vixl.h
+++ b/compiler/utils/arm/assembler_arm_vixl.h
@@ -205,6 +205,15 @@
                        int32_t value,
                        vixl32::Condition cond = vixl32::al);
 
+  template <typename T>
+  vixl::aarch32::Literal<T>* CreateLiteralDestroyedWithPool(T value) {
+    vixl::aarch32::Literal<T>* literal =
+        new vixl::aarch32::Literal<T>(value,
+                                      vixl32::RawLiteral::kPlacedWhenUsed,
+                                      vixl32::RawLiteral::kDeletedOnPoolDestruction);
+    return literal;
+  }
+
  private:
   // VIXL assembler.
   ArmVIXLMacroAssembler vixl_masm_;
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
index 4e64f13..d07c047 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
@@ -592,7 +592,9 @@
     ExactAssemblyScope guard(asm_.GetVIXLAssembler(),
                              vixl32::kMaxInstructionSizeInBytes,
                              CodeBufferCheckScope::kMaximumSize);
-    ___ b(ne, Narrow, exception_blocks_.back()->Entry());
+    vixl32::Label* label = exception_blocks_.back()->Entry();
+    ___ b(ne, Narrow, label);
+    ___ AddBranchLabel(label);
   }
   // TODO: think about using CBNZ here.
 }
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 50a1d9f..4e9b619 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -1717,6 +1717,11 @@
 
   __ ExceptionPoll(scratch_register, 0);
 
+  // Push the target out of range of branch emitted by ExceptionPoll.
+  for (int i = 0; i < 64; i++) {
+    __ Store(FrameOffset(2047), scratch_register, 4);
+  }
+
   __ DecreaseFrameSize(4096);
   __ DecreaseFrameSize(32);
   __ RemoveFrame(frame_size, callee_save_regs);
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index 69e1d8f..b16d99a 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -5458,94 +5458,160 @@
 };
 
 const char* const VixlJniHelpersResults[] = {
-  "   0:  e92d 4de0   stmdb sp!, {r5, r6, r7, r8, sl, fp, lr}\n",
-  "   4:  ed2d 8a10   vpush {s16-s31}\n",
-  "   8:  b089        sub sp, #36 ; 0x24\n",
-  "   a:  9000        str r0, [sp, #0]\n",
-  "   c:  9121        str r1, [sp, #132]  ; 0x84\n",
-  "   e:  ed8d 0a22   vstr  s0, [sp, #136]  ; 0x88\n",
-  "  12:  9223        str r2, [sp, #140]  ; 0x8c\n",
-  "  14:  9324        str r3, [sp, #144]  ; 0x90\n",
-  "  16:  b088        sub sp, #32\n",
-  "  18:  f5ad 5d80   sub.w sp, sp, #4096 ; 0x1000\n",
-  "  1c:  9808        ldr r0, [sp, #32]\n",
-  "  1e:  981f        ldr r0, [sp, #124]  ; 0x7c\n",
-  "  20:  9821        ldr r0, [sp, #132]  ; 0x84\n",
-  "  22:  98ff        ldr r0, [sp, #1020] ; 0x3fc\n",
-  "  24:  f8dd 0400   ldr.w r0, [sp, #1024] ; 0x400\n",
-  "  28:  f8dd cffc   ldr.w ip, [sp, #4092] ; 0xffc\n",
-  "  2c:  f50d 5c80   add.w ip, sp, #4096 ; 0x1000\n",
-  "  30:  f8dc c000   ldr.w ip, [ip]\n",
-  "  34:  f8d9 c200   ldr.w ip, [r9, #512]  ; 0x200\n",
-  "  38:  f8dc 0080   ldr.w r0, [ip, #128]  ; 0x80\n",
-  "  3c:  9008        str r0, [sp, #32]\n",
-  "  3e:  901f        str r0, [sp, #124]  ; 0x7c\n",
-  "  40:  9021        str r0, [sp, #132]  ; 0x84\n",
-  "  42:  90ff        str r0, [sp, #1020] ; 0x3fc\n",
-  "  44:  f8cd 0400   str.w r0, [sp, #1024] ; 0x400\n",
-  "  48:  f8cd cffc   str.w ip, [sp, #4092] ; 0xffc\n",
-  "  4c:  f84d 5d04   str.w r5, [sp, #-4]!\n",
-  "  50:  f50d 5580   add.w r5, sp, #4096 ; 0x1000\n",
-  "  54:  f8c5 c004   str.w ip, [r5, #4]\n",
-  "  58:  f85d 5b04   ldr.w r5, [sp], #4\n",
-  "  5c:  f04f 0cff   mov.w ip, #255  ; 0xff\n",
-  "  60:  f8cd c030   str.w ip, [sp, #48] ; 0x30\n",
-  "  64:  f06f 4c7f   mvn.w ip, #4278190080 ; 0xff000000\n",
-  "  68:  f8cd c030   str.w ip, [sp, #48] ; 0x30\n",
-  "  6c:  f8cd c030   str.w ip, [sp, #48] ; 0x30\n",
-  "  70:  f8cd c030   str.w ip, [sp, #48] ; 0x30\n",
-  "  74:  900c        str r0, [sp, #48] ; 0x30\n",
-  "  76:  f8dd c030   ldr.w ip, [sp, #48] ; 0x30\n",
-  "  7a:  f8cd c034   str.w ip, [sp, #52] ; 0x34\n",
-  "  7e:  f50d 5c80   add.w ip, sp, #4096 ; 0x1000\n",
-  "  82:  f8c9 c200   str.w ip, [r9, #512]  ; 0x200\n",
-  "  86:  f8c9 d200   str.w sp, [r9, #512]  ; 0x200\n",
-  "  8a:  f8d0 c030   ldr.w ip, [r0, #48] ; 0x30\n",
-  "  8e:  47e0        blx ip\n",
-  "  90:  f8dd c02c   ldr.w ip, [sp, #44] ; 0x2c\n",
-  "  94:  f8cd c030   str.w ip, [sp, #48] ; 0x30\n",
-  "  98:  f8d9 c200   ldr.w ip, [r9, #512]  ; 0x200\n",
-  "  9c:  f8cd c02c   str.w ip, [sp, #44] ; 0x2c\n",
-  "  a0:  f8dd c02c   ldr.w ip, [sp, #44] ; 0x2c\n",
-  "  a4:  f8cd c030   str.w ip, [sp, #48] ; 0x30\n",
-  "  a8:  4648        mov r0, r9\n",
-  "  aa:  f8cd 9030   str.w r9, [sp, #48] ; 0x30\n",
-  "  ae:  4684        mov ip, r0\n",
-  "  b0:  f1bc 0f00   cmp.w ip, #0\n",
-  "  b4:  bf18        it  ne\n",
-  "  b6:  f10d 0c30   addne.w ip, sp, #48 ; 0x30\n",
-  "  ba:  f10d 0c30   add.w ip, sp, #48 ; 0x30\n",
-  "  be:  f1bc 0f00   cmp.w ip, #0\n",
-  "  c2:  bf0c        ite eq\n",
-  "  c4:  2000        moveq r0, #0\n",
-  "  c6:  a80c        addne r0, sp, #48 ; 0x30\n",
-  "  c8:  f8dd c040   ldr.w ip, [sp, #64] ; 0x40\n",
-  "  cc:  f1bc 0f00   cmp.w ip, #0\n",
-  "  d0:  bf18        it  ne\n",
-  "  d2:  f10d 0c40   addne.w ip, sp, #64 ; 0x40\n",
-  "  d6:  f8cd c030   str.w ip, [sp, #48] ; 0x30\n",
-  "  da:  f1bc 0f00   cmp.w ip, #0\n",
-  "  de:  bf0c        ite eq\n",
-  "  e0:  2000        moveq r0, #0\n",
-  "  e2:  4668        movne r0, sp\n",
-  "  e4:  f1bc 0f00   cmp.w ip, #0\n",
-  "  e8:  bf0c        ite eq\n",
-  "  ea:  2000        moveq r0, #0\n",
-  "  ec:  f20d 4001   addwne  r0, sp, #1025 ; 0x401\n",
-  "  f0:  f1bc 0f00   cmp.w ip, #0\n",
-  "  f4:  bf18        it  ne\n",
-  "  f6:  f20d 4c01   addwne  ip, sp, #1025 ; 0x401\n",
-  "  fa:  f8d9 c084   ldr.w ip, [r9, #132]  ; 0x84\n",
-  "  fe:  f1bc 0f00   cmp.w ip, #0\n",
-  " 102:  d107        bne.n 114 <VixlJniHelpers+0x114>\n",
-  " 104:  f50d 5d80   add.w sp, sp, #4096 ; 0x1000\n",
-  " 108:  b008        add sp, #32\n",
-  " 10a:  b009        add sp, #36 ; 0x24\n",
-  " 10c:  ecbd 8a10   vpop  {s16-s31}\n",
-  " 110:  e8bd 8de0   ldmia.w sp!, {r5, r6, r7, r8, sl, fp, pc}\n",
-  " 114:  4660        mov r0, ip\n",
-  " 116:  f8d9 c2b0   ldr.w ip, [r9, #688]  ; 0x2b0\n",
-  " 11a:  47e0        blx ip\n",
+  "   0:	e92d 4de0 	stmdb	sp!, {r5, r6, r7, r8, sl, fp, lr}\n",
+  "   4:	ed2d 8a10 	vpush	{s16-s31}\n",
+  "   8:	b089      	sub	sp, #36	; 0x24\n",
+  "   a:	9000      	str	r0, [sp, #0]\n",
+  "   c:	9121      	str	r1, [sp, #132]	; 0x84\n",
+  "   e:	ed8d 0a22 	vstr	s0, [sp, #136]	; 0x88\n",
+  "  12:	9223      	str	r2, [sp, #140]	; 0x8c\n",
+  "  14:	9324      	str	r3, [sp, #144]	; 0x90\n",
+  "  16:	b088      	sub	sp, #32\n",
+  "  18:	f5ad 5d80 	sub.w	sp, sp, #4096	; 0x1000\n",
+  "  1c:	9808      	ldr	r0, [sp, #32]\n",
+  "  1e:	981f      	ldr	r0, [sp, #124]	; 0x7c\n",
+  "  20:	9821      	ldr	r0, [sp, #132]	; 0x84\n",
+  "  22:	98ff      	ldr	r0, [sp, #1020]	; 0x3fc\n",
+  "  24:	f8dd 0400 	ldr.w	r0, [sp, #1024]	; 0x400\n",
+  "  28:	f8dd cffc 	ldr.w	ip, [sp, #4092]	; 0xffc\n",
+  "  2c:	f50d 5c80 	add.w	ip, sp, #4096	; 0x1000\n",
+  "  30:	f8dc c000 	ldr.w	ip, [ip]\n",
+  "  34:	f8d9 c200 	ldr.w	ip, [r9, #512]	; 0x200\n",
+  "  38:	f8dc 0080 	ldr.w	r0, [ip, #128]	; 0x80\n",
+  "  3c:	9008      	str	r0, [sp, #32]\n",
+  "  3e:	901f      	str	r0, [sp, #124]	; 0x7c\n",
+  "  40:	9021      	str	r0, [sp, #132]	; 0x84\n",
+  "  42:	90ff      	str	r0, [sp, #1020]	; 0x3fc\n",
+  "  44:	f8cd 0400 	str.w	r0, [sp, #1024]	; 0x400\n",
+  "  48:	f8cd cffc 	str.w	ip, [sp, #4092]	; 0xffc\n",
+  "  4c:	f84d 5d04 	str.w	r5, [sp, #-4]!\n",
+  "  50:	f50d 5580 	add.w	r5, sp, #4096	; 0x1000\n",
+  "  54:	f8c5 c004 	str.w	ip, [r5, #4]\n",
+  "  58:	f85d 5b04 	ldr.w	r5, [sp], #4\n",
+  "  5c:	f04f 0cff 	mov.w	ip, #255	; 0xff\n",
+  "  60:	f8cd c030 	str.w	ip, [sp, #48]	; 0x30\n",
+  "  64:	f06f 4c7f 	mvn.w	ip, #4278190080	; 0xff000000\n",
+  "  68:	f8cd c030 	str.w	ip, [sp, #48]	; 0x30\n",
+  "  6c:	f8cd c030 	str.w	ip, [sp, #48]	; 0x30\n",
+  "  70:	f8cd c030 	str.w	ip, [sp, #48]	; 0x30\n",
+  "  74:	900c      	str	r0, [sp, #48]	; 0x30\n",
+  "  76:	f8dd c030 	ldr.w	ip, [sp, #48]	; 0x30\n",
+  "  7a:	f8cd c034 	str.w	ip, [sp, #52]	; 0x34\n",
+  "  7e:	f50d 5c80 	add.w	ip, sp, #4096	; 0x1000\n",
+  "  82:	f8c9 c200 	str.w	ip, [r9, #512]	; 0x200\n",
+  "  86:	f8c9 d200 	str.w	sp, [r9, #512]	; 0x200\n",
+  "  8a:	f8d0 c030 	ldr.w	ip, [r0, #48]	; 0x30\n",
+  "  8e:	47e0      	blx	ip\n",
+  "  90:	f8dd c02c 	ldr.w	ip, [sp, #44]	; 0x2c\n",
+  "  94:	f8cd c030 	str.w	ip, [sp, #48]	; 0x30\n",
+  "  98:	f8d9 c200 	ldr.w	ip, [r9, #512]	; 0x200\n",
+  "  9c:	f8cd c02c 	str.w	ip, [sp, #44]	; 0x2c\n",
+  "  a0:	f8dd c02c 	ldr.w	ip, [sp, #44]	; 0x2c\n",
+  "  a4:	f8cd c030 	str.w	ip, [sp, #48]	; 0x30\n",
+  "  a8:	4648      	mov	r0, r9\n",
+  "  aa:	f8cd 9030 	str.w	r9, [sp, #48]	; 0x30\n",
+  "  ae:	4684      	mov	ip, r0\n",
+  "  b0:	f1bc 0f00 	cmp.w	ip, #0\n",
+  "  b4:	bf18      	it	ne\n",
+  "  b6:	f10d 0c30 	addne.w	ip, sp, #48	; 0x30\n",
+  "  ba:	f10d 0c30 	add.w	ip, sp, #48	; 0x30\n",
+  "  be:	f1bc 0f00 	cmp.w	ip, #0\n",
+  "  c2:	bf0c      	ite	eq\n",
+  "  c4:	2000      	moveq	r0, #0\n",
+  "  c6:	a80c      	addne	r0, sp, #48	; 0x30\n",
+  "  c8:	f8dd c040 	ldr.w	ip, [sp, #64]	; 0x40\n",
+  "  cc:	f1bc 0f00 	cmp.w	ip, #0\n",
+  "  d0:	bf18      	it	ne\n",
+  "  d2:	f10d 0c40 	addne.w	ip, sp, #64	; 0x40\n",
+  "  d6:	f8cd c030 	str.w	ip, [sp, #48]	; 0x30\n",
+  "  da:	f1bc 0f00 	cmp.w	ip, #0\n",
+  "  de:	bf0c      	ite	eq\n",
+  "  e0:	2000      	moveq	r0, #0\n",
+  "  e2:	4668      	movne	r0, sp\n",
+  "  e4:	f1bc 0f00 	cmp.w	ip, #0\n",
+  "  e8:	bf0c      	ite	eq\n",
+  "  ea:	2000      	moveq	r0, #0\n",
+  "  ec:	f20d 4001 	addwne	r0, sp, #1025	; 0x401\n",
+  "  f0:	f1bc 0f00 	cmp.w	ip, #0\n",
+  "  f4:	bf18      	it	ne\n",
+  "  f6:	f20d 4c01 	addwne	ip, sp, #1025	; 0x401\n",
+  "  fa:	f8d9 c084 	ldr.w	ip, [r9, #132]	; 0x84\n",
+  "  fe:	f1bc 0f00 	cmp.w	ip, #0\n",
+  " 102:	d16f      	bne.n	1e4 <VixlJniHelpers+0x1e4>\n",
+  " 104:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 108:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 10c:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 110:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 114:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 118:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 11c:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 120:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 124:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 128:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 12c:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 130:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 134:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 138:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 13c:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 140:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 144:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 148:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 14c:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 150:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 154:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 158:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 15c:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 160:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 164:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 168:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 16c:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 170:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 174:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 178:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 17c:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 180:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 184:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 188:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 18c:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 190:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 194:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 198:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 19c:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 1a0:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 1a4:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 1a8:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 1ac:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 1b0:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 1b4:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 1b8:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 1bc:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 1c0:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 1c4:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 1c8:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 1cc:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 1d0:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 1d4:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 1d8:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 1dc:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 1e0:	f000 b802 	b.w	1e8 <VixlJniHelpers+0x1e8>\n",
+  " 1e4:	f000 b81a 	b.w	21c <VixlJniHelpers+0x21c>\n",
+  " 1e8:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 1ec:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 1f0:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 1f4:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 1f8:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 1fc:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 200:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 204:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 208:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
+  " 20c:	f50d 5d80 	add.w	sp, sp, #4096	; 0x1000\n",
+  " 210:	b008      	add	sp, #32\n",
+  " 212:	b009      	add	sp, #36	; 0x24\n",
+  " 214:	ecbd 8a10 	vpop	{s16-s31}\n",
+  " 218:	e8bd 8de0 	ldmia.w	sp!, {r5, r6, r7, r8, sl, fp, pc}\n",
+  " 21c:	4660      	mov	r0, ip\n",
+  " 21e:	f8d9 c2b0 	ldr.w	ip, [r9, #688]	; 0x2b0\n",
+  " 222:	47e0      	blx	ip\n",
   nullptr
 };
 
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 5ef1f06..e4972da 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -175,19 +175,19 @@
 #define MIRROR_CLASS_IF_TABLE_OFFSET (16 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_IF_TABLE_OFFSET,
             art::mirror::Class::IfTableOffset().Int32Value())
-#define MIRROR_CLASS_ACCESS_FLAGS_OFFSET (64 + MIRROR_OBJECT_HEADER_SIZE)
+#define MIRROR_CLASS_ACCESS_FLAGS_OFFSET (56 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_ACCESS_FLAGS_OFFSET,
             art::mirror::Class::AccessFlagsOffset().Int32Value())
-#define MIRROR_CLASS_OBJECT_SIZE_OFFSET (96 + MIRROR_OBJECT_HEADER_SIZE)
+#define MIRROR_CLASS_OBJECT_SIZE_OFFSET (88 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_OBJECT_SIZE_OFFSET,
             art::mirror::Class::ObjectSizeOffset().Int32Value())
-#define MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET (100 + MIRROR_OBJECT_HEADER_SIZE)
+#define MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET (92 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET,
             art::mirror::Class::ObjectSizeAllocFastPathOffset().Int32Value())
-#define MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET (104 + MIRROR_OBJECT_HEADER_SIZE)
+#define MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET (96 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET,
             art::mirror::Class::PrimitiveTypeOffset().Int32Value())
-#define MIRROR_CLASS_STATUS_OFFSET (112 + MIRROR_OBJECT_HEADER_SIZE)
+#define MIRROR_CLASS_STATUS_OFFSET (104 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_STATUS_OFFSET,
             art::mirror::Class::StatusOffset().Int32Value())
 
diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc
index 5cdf671..61e0aab 100644
--- a/runtime/base/arena_allocator.cc
+++ b/runtime/base/arena_allocator.cc
@@ -83,18 +83,19 @@
   "GraphChecker ",
   "Verifier     ",
   "CallingConv  ",
+  "CHA          ",
 };
 
 template <bool kCount>
 ArenaAllocatorStatsImpl<kCount>::ArenaAllocatorStatsImpl()
-    : num_allocations_(0u) {
-  std::fill_n(alloc_stats_, arraysize(alloc_stats_), 0u);
+    : num_allocations_(0u),
+      alloc_stats_(kNumArenaAllocKinds, 0u) {
 }
 
 template <bool kCount>
 void ArenaAllocatorStatsImpl<kCount>::Copy(const ArenaAllocatorStatsImpl& other) {
   num_allocations_ = other.num_allocations_;
-  std::copy(other.alloc_stats_, other.alloc_stats_ + arraysize(alloc_stats_), alloc_stats_);
+  std::copy_n(other.alloc_stats_.begin(), kNumArenaAllocKinds, alloc_stats_.begin());
 }
 
 template <bool kCount>
@@ -111,7 +112,7 @@
 template <bool kCount>
 size_t ArenaAllocatorStatsImpl<kCount>::BytesAllocated() const {
   const size_t init = 0u;  // Initial value of the correct type.
-  return std::accumulate(alloc_stats_, alloc_stats_ + arraysize(alloc_stats_), init);
+  return std::accumulate(alloc_stats_.begin(), alloc_stats_.end(), init);
 }
 
 template <bool kCount>
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index 2feb28a..6c764cb 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -21,6 +21,7 @@
 #include <stddef.h>
 
 #include "base/bit_utils.h"
+#include "base/dchecked_vector.h"
 #include "base/memory_tool.h"
 #include "debug_stack.h"
 #include "macros.h"
@@ -132,8 +133,7 @@
 
  private:
   size_t num_allocations_;
-  // TODO: Use std::array<size_t, kNumArenaAllocKinds> from C++11 when we upgrade the STL.
-  size_t alloc_stats_[kNumArenaAllocKinds];  // Bytes used by various allocation kinds.
+  dchecked_vector<size_t> alloc_stats_;  // Bytes used by various allocation kinds.
 
   static const char* const kAllocNames[];
 };
diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h
index 213986a..a11257f 100644
--- a/runtime/class_linker-inl.h
+++ b/runtime/class_linker-inl.h
@@ -73,7 +73,7 @@
   // MethodVerifier refuses methods with string_idx out of bounds.
   DCHECK_LT(string_idx.index_, declaring_class->GetDexFile().NumStringIds());
   ObjPtr<mirror::String> string =
-        mirror::StringDexCachePair::Lookup(declaring_class->GetDexCacheStrings(),
+        mirror::StringDexCachePair::Lookup(declaring_class->GetDexCache()->GetStrings(),
                                            string_idx.index_,
                                            mirror::DexCache::kDexCacheStringCacheSize).Read();
   if (UNLIKELY(string == nullptr)) {
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 92da9b5..5b8d4e4 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1405,39 +1405,20 @@
   return true;
 }
 
-// Update the class loader and resolved string dex cache array of classes. Should only be used on
-// classes in the image space.
-class UpdateClassLoaderAndResolvedStringsVisitor {
+// Update the class loader. Should only be used on classes in the image space.
+class UpdateClassLoaderVisitor {
  public:
-  UpdateClassLoaderAndResolvedStringsVisitor(gc::space::ImageSpace* space,
-                                             ObjPtr<mirror::ClassLoader> class_loader,
-                                             bool forward_strings)
+  UpdateClassLoaderVisitor(gc::space::ImageSpace* space, ObjPtr<mirror::ClassLoader> class_loader)
       : space_(space),
-        class_loader_(class_loader),
-        forward_strings_(forward_strings) {}
+        class_loader_(class_loader) {}
 
   bool operator()(ObjPtr<mirror::Class> klass) const REQUIRES_SHARED(Locks::mutator_lock_) {
-    if (forward_strings_) {
-      mirror::StringDexCacheType* strings = klass->GetDexCacheStrings();
-      if (strings != nullptr) {
-        DCHECK(
-            space_->GetImageHeader().GetImageSection(ImageHeader::kSectionDexCacheArrays).Contains(
-                reinterpret_cast<uint8_t*>(strings) - space_->Begin()))
-            << "String dex cache array for " << klass->PrettyClass() << " is not in app image";
-        // Dex caches have already been updated, so take the strings pointer from there.
-        mirror::StringDexCacheType* new_strings = klass->GetDexCache()->GetStrings();
-        DCHECK_NE(strings, new_strings);
-        klass->SetDexCacheStrings(new_strings);
-      }
-    }
-    // Finally, update class loader.
     klass->SetClassLoader(class_loader_);
     return true;
   }
 
   gc::space::ImageSpace* const space_;
   ObjPtr<mirror::ClassLoader> const class_loader_;
-  const bool forward_strings_;
 };
 
 static std::unique_ptr<const DexFile> OpenOatDexFile(const OatFile* oat_file,
@@ -1864,10 +1845,8 @@
     }
     // Update class loader and resolved strings. If added_class_table is false, the resolved
     // strings were forwarded UpdateAppImageClassLoadersAndDexCaches.
-    UpdateClassLoaderAndResolvedStringsVisitor visitor(space,
-                                                       class_loader.Get(),
-                                                       forward_dex_cache_arrays);
-    for (ClassTable::TableSlot& root : temp_set) {
+    UpdateClassLoaderVisitor visitor(space, class_loader.Get());
+    for (const ClassTable::TableSlot& root : temp_set) {
       visitor(root.Read());
     }
     // forward_dex_cache_arrays is true iff we copied all of the dex cache arrays into the .bss.
@@ -2524,8 +2503,9 @@
       // the Java-side could still succeed for racy programs if another thread is actively
       // modifying the class loader's path list.
 
-      if (Runtime::Current()->IsAotCompiler()) {
-        // Oops, compile-time, can't run actual class-loader code.
+      if (!self->CanCallIntoJava()) {
+        // Oops, we can't call into java so we can't run actual class-loader code.
+        // This is true for e.g. for the compiler (jit or aot).
         ObjPtr<mirror::Throwable> pre_allocated =
             Runtime::Current()->GetPreAllocatedNoClassDefFoundError();
         self->SetException(pre_allocated);
@@ -2667,6 +2647,8 @@
 
   ObjectLock<mirror::Class> lock(self, klass);
   klass->SetClinitThreadId(self->GetTid());
+  // Make sure we have a valid empty iftable even if there are errors.
+  klass->SetIfTable(GetClassRoot(kJavaLangObject)->GetIfTable());
 
   // Add the newly loaded class to the loaded classes table.
   ObjPtr<mirror::Class> existing = InsertClass(descriptor, klass.Get(), hash);
@@ -3018,7 +3000,6 @@
 
   klass->SetDexClassDefIndex(dex_file.GetIndexForClassDef(dex_class_def));
   klass->SetDexTypeIndex(dex_class_def.class_idx_);
-  CHECK(klass->GetDexCacheStrings() != nullptr);
 }
 
 void ClassLinker::LoadClass(Thread* self,
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 685677b..42108d8 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -278,8 +278,6 @@
     EXPECT_FALSE(klass->IsArrayClass());
     EXPECT_TRUE(klass->GetComponentType() == nullptr);
     EXPECT_TRUE(klass->IsInSamePackage(klass.Get()));
-    EXPECT_TRUE(klass->GetDexCacheStrings() != nullptr);
-    EXPECT_EQ(klass->GetDexCacheStrings(), klass->GetDexCache()->GetStrings());
     std::string temp2;
     EXPECT_TRUE(mirror::Class::IsInSamePackage(klass->GetDescriptor(&temp),
                                                klass->GetDescriptor(&temp2)));
@@ -590,7 +588,6 @@
     addOffset(OFFSETOF_MEMBER(mirror::Class, component_type_), "componentType");
     addOffset(OFFSETOF_MEMBER(mirror::Class, copied_methods_offset_), "copiedMethodsOffset");
     addOffset(OFFSETOF_MEMBER(mirror::Class, dex_cache_), "dexCache");
-    addOffset(OFFSETOF_MEMBER(mirror::Class, dex_cache_strings_), "dexCacheStrings");
     addOffset(OFFSETOF_MEMBER(mirror::Class, dex_class_def_idx_), "dexClassDefIndex");
     addOffset(OFFSETOF_MEMBER(mirror::Class, dex_type_idx_), "dexTypeIndex");
     addOffset(OFFSETOF_MEMBER(mirror::Class, ext_data_), "extData");
@@ -1175,6 +1172,24 @@
   EXPECT_TRUE(init->IsInitialized());
 }
 
+TEST_F(ClassLinkerTest, ErroneousClass) {
+  ScopedObjectAccess soa(Thread::Current());
+  jobject jclass_loader = LoadMultiDex("ErroneousA", "ErroneousB");
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::ClassLoader> class_loader(
+      hs.NewHandle(soa.Decode<mirror::ClassLoader>(jclass_loader)));
+  hs.Self()->AssertNoPendingException();
+  const char* descriptor = "LErroneous;";
+  ObjPtr<mirror::Class> klass = class_linker_->FindClass(soa.Self(), descriptor, class_loader);
+  // Erronenous since we are extending final class.
+  hs.Self()->AssertPendingException();
+  EXPECT_TRUE(klass == nullptr);
+  klass = class_linker_->LookupClass(soa.Self(), descriptor, class_loader.Get());
+  EXPECT_FALSE(klass == nullptr);
+  EXPECT_TRUE(klass->IsErroneous());
+  EXPECT_TRUE(klass->GetIfTable() != nullptr);
+}
+
 TEST_F(ClassLinkerTest, FinalizableBit) {
   ScopedObjectAccess soa(Thread::Current());
   mirror::Class* c;
diff --git a/runtime/generated/asm_support_gen.h b/runtime/generated/asm_support_gen.h
index f13ff8c..bebcd71 100644
--- a/runtime/generated/asm_support_gen.h
+++ b/runtime/generated/asm_support_gen.h
@@ -74,8 +74,6 @@
 DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_QUICK_CODE_OFFSET_64), (static_cast<int32_t>(art::ArtMethod:: EntryPointFromQuickCompiledCodeOffset(art::PointerSize::k64).Int32Value())))
 #define ART_METHOD_DECLARING_CLASS_OFFSET 0
 DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_DECLARING_CLASS_OFFSET), (static_cast<int32_t>(art::ArtMethod:: DeclaringClassOffset().Int32Value())))
-#define DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET 40
-DEFINE_CHECK_EQ(static_cast<int32_t>(DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET), (static_cast<int32_t>(art::mirror::Class:: DexCacheStringsOffset().Int32Value())))
 #define STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT 3
 DEFINE_CHECK_EQ(static_cast<int32_t>(STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT), (static_cast<int32_t>(art::WhichPowerOf2(sizeof(art::mirror::StringDexCachePair)))))
 #define STRING_DEX_CACHE_SIZE_MINUS_ONE 1023
diff --git a/runtime/image.cc b/runtime/image.cc
index 52c9f4e..2ef60c3 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -25,7 +25,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '3', '3', '\0' };
+const uint8_t ImageHeader::kImageVersion[] = { '0', '3', '4', '\0' };  // mirror::Class update
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 2e85064..423f054 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -256,7 +256,7 @@
   DCHECK_LT(string_idx.index_ % mirror::DexCache::kDexCacheStringCacheSize,
             declaring_class->GetDexFile().NumStringIds());
   ObjPtr<mirror::String> string_ptr =
-      mirror::StringDexCachePair::Lookup(declaring_class->GetDexCacheStrings(),
+      mirror::StringDexCachePair::Lookup(declaring_class->GetDexCache()->GetStrings(),
                                          string_idx.index_,
                                          mirror::DexCache::kDexCacheStringCacheSize).Read();
   if (UNLIKELY(string_ptr == nullptr)) {
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index dac2e60..b7125a8 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -291,7 +291,7 @@
   // is not null when we instrument.
   thread_pool_.reset(new ThreadPool("Jit thread pool", 1));
   thread_pool_->SetPthreadPriority(kJitPoolThreadPthreadPriority);
-  thread_pool_->StartWorkers(Thread::Current());
+  Start();
 }
 
 void Jit::DeleteThreadPool() {
@@ -710,14 +710,23 @@
   }
 }
 
+void Jit::Stop() {
+  Thread* self = Thread::Current();
+  // TODO(ngeoffray): change API to not require calling WaitForCompilationToFinish twice.
+  WaitForCompilationToFinish(self);
+  GetThreadPool()->StopWorkers(self);
+  WaitForCompilationToFinish(self);
+}
+
+void Jit::Start() {
+  GetThreadPool()->StartWorkers(Thread::Current());
+}
+
 ScopedJitSuspend::ScopedJitSuspend() {
   jit::Jit* jit = Runtime::Current()->GetJit();
   was_on_ = (jit != nullptr) && (jit->GetThreadPool() != nullptr);
   if (was_on_) {
-    Thread* self = Thread::Current();
-    jit->WaitForCompilationToFinish(self);
-    jit->GetThreadPool()->StopWorkers(self);
-    jit->WaitForCompilationToFinish(self);
+    jit->Stop();
   }
 }
 
@@ -725,7 +734,7 @@
   if (was_on_) {
     DCHECK(Runtime::Current()->GetJit() != nullptr);
     DCHECK(Runtime::Current()->GetJit()->GetThreadPool() != nullptr);
-    Runtime::Current()->GetJit()->GetThreadPool()->StartWorkers(Thread::Current());
+    Runtime::Current()->GetJit()->Start();
   }
 }
 
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index a230c78..05c3905 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -179,6 +179,12 @@
     return thread_pool_.get();
   }
 
+  // Stop the JIT by waiting for all current compilations and enqueued compilations to finish.
+  void Stop();
+
+  // Start JIT threads.
+  void Start();
+
  private:
   Jit();
 
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 1b0ad83..f43e30d 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -217,6 +217,7 @@
                                   size_t fp_spill_mask,
                                   const uint8_t* code,
                                   size_t code_size,
+                                  size_t data_size,
                                   bool osr,
                                   Handle<mirror::ObjectArray<mirror::Object>> roots,
                                   bool has_should_deoptimize_flag,
@@ -230,6 +231,7 @@
                                        fp_spill_mask,
                                        code,
                                        code_size,
+                                       data_size,
                                        osr,
                                        roots,
                                        has_should_deoptimize_flag,
@@ -246,6 +248,7 @@
                                 fp_spill_mask,
                                 code,
                                 code_size,
+                                data_size,
                                 osr,
                                 roots,
                                 has_should_deoptimize_flag,
@@ -513,6 +516,7 @@
                                           size_t fp_spill_mask,
                                           const uint8_t* code,
                                           size_t code_size,
+                                          size_t data_size,
                                           bool osr,
                                           Handle<mirror::ObjectArray<mirror::Object>> roots,
                                           bool has_should_deoptimize_flag,
@@ -547,6 +551,11 @@
           core_spill_mask,
           fp_spill_mask,
           code_size);
+      DCHECK_EQ(FromStackMapToRoots(stack_map), roots_data);
+      DCHECK_LE(roots_data, stack_map);
+      // Flush data cache, as compiled code references literals in it.
+      FlushDataCache(reinterpret_cast<char*>(roots_data),
+                     reinterpret_cast<char*>(roots_data + data_size));
       // Flush caches before we remove write permission because on some ARMv8 hardware,
       // flushing caches require write permissions.
       //
@@ -657,12 +666,12 @@
   FreeData(reinterpret_cast<uint8_t*>(roots_data));
 }
 
-void JitCodeCache::ReserveData(Thread* self,
-                               size_t stack_map_size,
-                               size_t number_of_roots,
-                               ArtMethod* method,
-                               uint8_t** stack_map_data,
-                               uint8_t** roots_data) {
+size_t JitCodeCache::ReserveData(Thread* self,
+                                 size_t stack_map_size,
+                                 size_t number_of_roots,
+                                 ArtMethod* method,
+                                 uint8_t** stack_map_data,
+                                 uint8_t** roots_data) {
   size_t table_size = ComputeRootTableSize(number_of_roots);
   size_t size = RoundUp(stack_map_size + table_size, sizeof(void*));
   uint8_t* result = nullptr;
@@ -695,9 +704,11 @@
     *roots_data = result;
     *stack_map_data = result + table_size;
     FillRootTableLength(*roots_data, number_of_roots);
+    return size;
   } else {
     *roots_data = nullptr;
     *stack_map_data = nullptr;
+    return 0;
   }
 }
 
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 30e2efb..d97742d 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -106,6 +106,7 @@
                       size_t fp_spill_mask,
                       const uint8_t* code,
                       size_t code_size,
+                      size_t data_size,
                       bool osr,
                       Handle<mirror::ObjectArray<mirror::Object>> roots,
                       bool has_should_deoptimize_flag,
@@ -121,12 +122,13 @@
 
   // Allocate a region of data that contain `size` bytes, and potentially space
   // for storing `number_of_roots` roots. Returns null if there is no more room.
-  void ReserveData(Thread* self,
-                   size_t size,
-                   size_t number_of_roots,
-                   ArtMethod* method,
-                   uint8_t** stack_map_data,
-                   uint8_t** roots_data)
+  // Return the number of bytes allocated.
+  size_t ReserveData(Thread* self,
+                     size_t size,
+                     size_t number_of_roots,
+                     ArtMethod* method,
+                     uint8_t** stack_map_data,
+                     uint8_t** roots_data)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!lock_);
 
@@ -237,6 +239,7 @@
                               size_t fp_spill_mask,
                               const uint8_t* code,
                               size_t code_size,
+                              size_t data_size,
                               bool osr,
                               Handle<mirror::ObjectArray<mirror::Object>> roots,
                               bool has_should_deoptimize_flag,
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 5fdf8f3..2fb8d28 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -937,14 +937,6 @@
   }
 }
 
-inline void Class::SetDexCacheStrings(StringDexCacheType* new_dex_cache_strings) {
-  SetFieldPtr<false>(DexCacheStringsOffset(), new_dex_cache_strings);
-}
-
-inline StringDexCacheType* Class::GetDexCacheStrings() {
-  return GetFieldPtr64<StringDexCacheType*>(DexCacheStringsOffset());
-}
-
 template<ReadBarrierOption kReadBarrierOption, class Visitor>
 void Class::VisitNativeRoots(Visitor& visitor, PointerSize pointer_size) {
   for (ArtField& field : GetSFieldsUnchecked()) {
@@ -1095,12 +1087,6 @@
   if (methods != new_methods) {
     dest->SetMethodsPtrInternal(new_methods);
   }
-  // Update dex cache strings.
-  StringDexCacheType* strings = GetDexCacheStrings();
-  StringDexCacheType* new_strings = visitor(strings);
-  if (strings != new_strings) {
-    dest->SetDexCacheStrings(new_strings);
-  }
   // Fix up embedded tables.
   if (!IsTemp() && ShouldHaveEmbeddedVTable<kVerifyNone, kReadBarrierOption>()) {
     for (int32_t i = 0, count = GetEmbeddedVTableLength(); i < count; ++i) {
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index c1565df..9964b73 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -191,7 +191,6 @@
 
 void Class::SetDexCache(ObjPtr<DexCache> new_dex_cache) {
   SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, dex_cache_), new_dex_cache);
-  SetDexCacheStrings(new_dex_cache != nullptr ? new_dex_cache->GetStrings() : nullptr);
 }
 
 void Class::SetClassSize(uint32_t new_class_size) {
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index d7449c8..fb2792a 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -1247,13 +1247,6 @@
   bool GetSlowPathEnabled() REQUIRES_SHARED(Locks::mutator_lock_);
   void SetSlowPath(bool enabled) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  StringDexCacheType* GetDexCacheStrings() REQUIRES_SHARED(Locks::mutator_lock_);
-  void SetDexCacheStrings(StringDexCacheType* new_dex_cache_strings)
-      REQUIRES_SHARED(Locks::mutator_lock_);
-  static MemberOffset DexCacheStringsOffset() {
-    return OFFSET_OF_OBJECT_MEMBER(Class, dex_cache_strings_);
-  }
-
   // May cause thread suspension due to EqualParameters.
   ArtMethod* GetDeclaredConstructor(Thread* self,
                                     Handle<ObjectArray<Class>> args,
@@ -1438,9 +1431,6 @@
   // virtual_ methods_ for miranda methods.
   HeapReference<PointerArray> vtable_;
 
-  // Short cuts to dex_cache_ member for fast compiled code access.
-  uint64_t dex_cache_strings_;
-
   // instance fields
   //
   // These describe the layout of the contents of an Object.
diff --git a/runtime/openjdkjvmti/ti_redefine.cc b/runtime/openjdkjvmti/ti_redefine.cc
index e85286c..68815e7 100644
--- a/runtime/openjdkjvmti/ti_redefine.cc
+++ b/runtime/openjdkjvmti/ti_redefine.cc
@@ -396,19 +396,14 @@
   }
 }
 
-// Performs updates to class that will allow us to verify it.
-bool Redefiner::UpdateClass(art::ObjPtr<art::mirror::Class> mclass,
-                            art::ObjPtr<art::mirror::DexCache> new_dex_cache) {
+bool Redefiner::UpdateMethods(art::ObjPtr<art::mirror::Class> mclass,
+                              art::ObjPtr<art::mirror::DexCache> new_dex_cache,
+                              const art::DexFile::ClassDef& class_def) {
   art::ClassLinker* linker = runtime_->GetClassLinker();
   art::PointerSize image_pointer_size = linker->GetImagePointerSize();
-  const art::DexFile::ClassDef* class_def = art::OatFile::OatDexFile::FindClassDef(
-      *dex_file_, class_sig_, art::ComputeModifiedUtf8Hash(class_sig_));
-  if (class_def == nullptr) {
-    RecordFailure(ERR(INVALID_CLASS_FORMAT), "Unable to find ClassDef!");
-    return false;
-  }
-  const art::DexFile::TypeId& declaring_class_id = dex_file_->GetTypeId(class_def->class_idx_);
+  const art::DexFile::TypeId& declaring_class_id = dex_file_->GetTypeId(class_def.class_idx_);
   const art::DexFile& old_dex_file = mclass->GetDexFile();
+  // Update methods.
   for (art::ArtMethod& method : mclass->GetMethods(image_pointer_size)) {
     const art::DexFile::StringId* new_name_id = dex_file_->FindStringId(method.GetName());
     art::dex::TypeIndex method_return_idx =
@@ -435,15 +430,58 @@
     uint32_t dex_method_idx = dex_file_->GetIndexForMethodId(*method_id);
     method.SetDexMethodIndex(dex_method_idx);
     linker->SetEntryPointsToInterpreter(&method);
-    method.SetCodeItemOffset(dex_file_->FindCodeItemOffset(*class_def, dex_method_idx));
+    method.SetCodeItemOffset(dex_file_->FindCodeItemOffset(class_def, dex_method_idx));
     method.SetDexCacheResolvedMethods(new_dex_cache->GetResolvedMethods(), image_pointer_size);
     method.SetDexCacheResolvedTypes(new_dex_cache->GetResolvedTypes(), image_pointer_size);
   }
+  return true;
+}
+
+bool Redefiner::UpdateFields(art::ObjPtr<art::mirror::Class> mclass) {
+  // TODO The IFields & SFields pointers should be combined like the methods_ arrays were.
+  for (auto fields_iter : {mclass->GetIFields(), mclass->GetSFields()}) {
+    for (art::ArtField& field : fields_iter) {
+      std::string declaring_class_name;
+      const art::DexFile::TypeId* new_declaring_id =
+          dex_file_->FindTypeId(field.GetDeclaringClass()->GetDescriptor(&declaring_class_name));
+      const art::DexFile::StringId* new_name_id = dex_file_->FindStringId(field.GetName());
+      const art::DexFile::TypeId* new_type_id = dex_file_->FindTypeId(field.GetTypeDescriptor());
+      // TODO Handle error, cleanup.
+      CHECK(new_name_id != nullptr && new_type_id != nullptr && new_declaring_id != nullptr);
+      const art::DexFile::FieldId* new_field_id =
+          dex_file_->FindFieldId(*new_declaring_id, *new_name_id, *new_type_id);
+      CHECK(new_field_id != nullptr);
+      // We only need to update the index since the other data in the ArtField cannot be updated.
+      field.SetDexFieldIndex(dex_file_->GetIndexForFieldId(*new_field_id));
+    }
+  }
+  return true;
+}
+
+// Performs updates to class that will allow us to verify it.
+bool Redefiner::UpdateClass(art::ObjPtr<art::mirror::Class> mclass,
+                            art::ObjPtr<art::mirror::DexCache> new_dex_cache) {
+  const art::DexFile::ClassDef* class_def = art::OatFile::OatDexFile::FindClassDef(
+      *dex_file_, class_sig_, art::ComputeModifiedUtf8Hash(class_sig_));
+  if (class_def == nullptr) {
+    RecordFailure(ERR(INVALID_CLASS_FORMAT), "Unable to find ClassDef!");
+    return false;
+  }
+  if (!UpdateMethods(mclass, new_dex_cache, *class_def)) {
+    // TODO Investigate appropriate error types.
+    RecordFailure(ERR(INTERNAL), "Unable to update class methods.");
+    return false;
+  }
+  if (!UpdateFields(mclass)) {
+    // TODO Investigate appropriate error types.
+    RecordFailure(ERR(INTERNAL), "Unable to update class fields.");
+    return false;
+  }
+
   // Update the class fields.
   // Need to update class last since the ArtMethod gets its DexFile from the class (which is needed
   // to call GetReturnTypeDescriptor and GetParameterTypeList above).
   mclass->SetDexCache(new_dex_cache.Ptr());
-  mclass->SetDexCacheStrings(new_dex_cache->GetStrings());
   mclass->SetDexClassDefIndex(dex_file_->GetIndexForClassDef(*class_def));
   mclass->SetDexTypeIndex(dex_file_->GetIndexForTypeId(*dex_file_->FindTypeId(class_sig_)));
   return true;
diff --git a/runtime/openjdkjvmti/ti_redefine.h b/runtime/openjdkjvmti/ti_redefine.h
index c819acd..73cfc2b 100644
--- a/runtime/openjdkjvmti/ti_redefine.h
+++ b/runtime/openjdkjvmti/ti_redefine.h
@@ -159,6 +159,14 @@
                           art::ObjPtr<art::mirror::LongArray> original_cookie)
       REQUIRES(art::Locks::mutator_lock_);
 
+  bool UpdateFields(art::ObjPtr<art::mirror::Class> mclass)
+      REQUIRES(art::Locks::mutator_lock_);
+
+  bool UpdateMethods(art::ObjPtr<art::mirror::Class> mclass,
+                     art::ObjPtr<art::mirror::DexCache> new_dex_cache,
+                     const art::DexFile::ClassDef& class_def)
+      REQUIRES(art::Locks::mutator_lock_);
+
   bool UpdateClass(art::ObjPtr<art::mirror::Class> mclass,
                    art::ObjPtr<art::mirror::DexCache> new_dex_cache)
       REQUIRES(art::Locks::mutator_lock_);
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 0977093..2086d70 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -1199,6 +1199,8 @@
   CHECK_EQ(self->GetThreadId(), ThreadList::kMainThreadId);
   CHECK(self != nullptr);
 
+  self->SetCanCallIntoJava(!IsAotCompiler());
+
   // Set us to runnable so tools using a runtime can allocate and GC by default
   self->TransitionFromSuspendedToRunnable();
 
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 9c93a5f..aff12ff 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1807,7 +1807,11 @@
   }
 }
 
-Thread::Thread(bool daemon) : tls32_(daemon), wait_monitor_(nullptr), interrupted_(false) {
+Thread::Thread(bool daemon)
+    : tls32_(daemon),
+      wait_monitor_(nullptr),
+      interrupted_(false),
+      can_call_into_java_(true) {
   wait_mutex_ = new Mutex("a thread wait mutex");
   wait_cond_ = new ConditionVariable("a thread wait condition variable", *wait_mutex_);
   tlsPtr_.instrumentation_stack = new std::deque<instrumentation::InstrumentationStackFrame>;
diff --git a/runtime/thread.h b/runtime/thread.h
index 31cd0eb..411d85f 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -881,6 +881,15 @@
     --tls32_.disable_thread_flip_count;
   }
 
+  // Returns true if the thread is allowed to call into java.
+  bool CanCallIntoJava() const {
+    return can_call_into_java_;
+  }
+
+  void SetCanCallIntoJava(bool can_call_into_java) {
+    can_call_into_java_ = can_call_into_java;
+  }
+
   // Activates single step control for debugging. The thread takes the
   // ownership of the given SingleStepControl*. It is deleted by a call
   // to DeactivateSingleStepControl or upon thread destruction.
@@ -1588,6 +1597,10 @@
   // Pending extra checkpoints if checkpoint_function_ is already used.
   std::list<Closure*> checkpoint_overflow_ GUARDED_BY(Locks::thread_suspend_count_lock_);
 
+  // True if the thread is allowed to call back into java (for e.g. during class resolution).
+  // By default this is true.
+  bool can_call_into_java_;
+
   friend class Dbg;  // For SetStateUnsafe.
   friend class gc::collector::SemiSpace;  // For getting stack traces.
   friend class Runtime;  // For CreatePeer.
diff --git a/runtime/thread_pool.cc b/runtime/thread_pool.cc
index 6abdca1..d9179c3 100644
--- a/runtime/thread_pool.cc
+++ b/runtime/thread_pool.cc
@@ -90,6 +90,8 @@
   Runtime* runtime = Runtime::Current();
   CHECK(runtime->AttachCurrentThread(worker->name_.c_str(), true, nullptr, false));
   worker->thread_ = Thread::Current();
+  // Thread pool workers cannot call into java.
+  worker->thread_->SetCanCallIntoJava(false);
   // Do work until its time to shut down.
   worker->Run();
   runtime->DetachCurrentThread();
diff --git a/runtime/thread_pool.h b/runtime/thread_pool.h
index eaadfe0..7ecfcd1 100644
--- a/runtime/thread_pool.h
+++ b/runtime/thread_pool.h
@@ -80,6 +80,7 @@
   DISALLOW_COPY_AND_ASSIGN(ThreadPoolWorker);
 };
 
+// Note that thread pool workers will set Thread#setCanCallIntoJava to false.
 class ThreadPool {
  public:
   // Returns the number of threads in the thread pool.
diff --git a/runtime/utils.h b/runtime/utils.h
index 04e0dde..16ef706 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -284,6 +284,12 @@
   __builtin___clear_cache(begin, end);
 }
 
+inline void FlushDataCache(char* begin, char* end) {
+  // Same as FlushInstructionCache for lack of other builtin. __builtin___clear_cache
+  // flushes both caches.
+  __builtin___clear_cache(begin, end);
+}
+
 template <typename T>
 constexpr PointerSize ConvertToPointerSize(T any) {
   if (any == 4 || any == 8) {
diff --git a/test/141-class-unload/expected.txt b/test/141-class-unload/expected.txt
index 0a03ecb..a1c5fa8 100644
--- a/test/141-class-unload/expected.txt
+++ b/test/141-class-unload/expected.txt
@@ -20,5 +20,6 @@
 JNI_OnLoad called
 class null false test
 JNI_OnUnload called
+JNI_OnLoad called
 Number of loaded unload-ex maps 0
 Too small false
diff --git a/test/141-class-unload/jni_unload.cc b/test/141-class-unload/jni_unload.cc
index bbbb0a6..9b7e171 100644
--- a/test/141-class-unload/jni_unload.cc
+++ b/test/141-class-unload/jni_unload.cc
@@ -32,5 +32,19 @@
   }
 }
 
+extern "C" JNIEXPORT void JNICALL Java_Main_stopJit(JNIEnv*, jclass) {
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit != nullptr) {
+    jit->Stop();
+  }
+}
+
+extern "C" JNIEXPORT void JNICALL Java_Main_startJit(JNIEnv*, jclass) {
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit != nullptr) {
+    jit->Start();
+  }
+}
+
 }  // namespace
 }  // namespace art
diff --git a/test/141-class-unload/src/Main.java b/test/141-class-unload/src/Main.java
index 2a6e944..595c70d 100644
--- a/test/141-class-unload/src/Main.java
+++ b/test/141-class-unload/src/Main.java
@@ -55,11 +55,15 @@
     }
 
     private static void testOatFilesUnloaded(int pid) throws Exception {
+        System.loadLibrary(nativeLibraryName);
+        // Stop the JIT to ensure its threads and work queue are not keeping classes
+        // artifically alive.
+        stopJit();
+        Runtime.getRuntime().gc();
+        System.runFinalization();
         BufferedReader reader = new BufferedReader(new FileReader ("/proc/" + pid + "/maps"));
         String line;
         int count = 0;
-        Runtime.getRuntime().gc();
-        System.runFinalization();
         while ((line = reader.readLine()) != null) {
             if (line.contains("@141-class-unload-ex.jar")) {
                 System.out.println(line);
@@ -67,6 +71,7 @@
             }
         }
         System.out.println("Number of loaded unload-ex maps " + count);
+        startJit();
     }
 
     private static void stressTest(Constructor<?> constructor) throws Exception {
@@ -229,4 +234,7 @@
     private static int getPid() throws Exception {
       return Integer.parseInt(new File("/proc/self").getCanonicalFile().getName());
     }
+
+    public static native void stopJit();
+    public static native void startJit();
 }
diff --git a/test/530-checker-loops3/src/Main.java b/test/530-checker-loops3/src/Main.java
index 209786a..dfc4a5f 100644
--- a/test/530-checker-loops3/src/Main.java
+++ b/test/530-checker-loops3/src/Main.java
@@ -235,6 +235,59 @@
     }
   }
 
+  /// CHECK-START: void Main.shortBound1(int[], short) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:{{B\d+}}
+  //
+  /// CHECK-START: void Main.shortBound1(int[], short) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.shortBound1(int[], short) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void shortBound1(int[] array, short s) {
+    // Lower precision bound will appear in deopt arithmetic
+    // and follows normal implicit widening conversion.
+    for (int i = 0; i < s; i++) {
+      array[i] = 222;
+    }
+  }
+
+  /// CHECK-START: void Main.shortBound2(int[], short) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:{{B\d+}}
+  //
+  /// CHECK-START: void Main.shortBound2(int[], short) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.shortBound2(int[], short) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void shortBound2(int[] array, short s) {
+    // Lower precision bound will appear in deopt arithmetic
+    // and follows normal implicit widening conversion.
+    for (int i = 0; s > i; i++) {
+      array[i] = 444;
+    }
+  }
+
+  /// CHECK-START: void Main.narrowingFromLong(int[], int) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:{{B\d+}}
+  //
+  /// CHECK-START: void Main.narrowingFromLong(int[], int) BCE (after)
+  /// CHECK-DAG: BoundsCheck loop:{{B\d+}}
+  public static void narrowingFromLong(int[] array, int n) {
+    // Parallel induction in long precision that is narrowed provides type
+    // conversion challenges for BCE in deopt arithmetic when combined
+    // with the int loop induction. Therefore, currently skipped.
+    long l = 0;
+    for (int i = 0; i < n; i++, l++) {
+      array[(int)l] = 888;
+    }
+  }
+
   //
   // Verifier.
   //
@@ -316,6 +369,38 @@
     } catch (ArrayIndexOutOfBoundsException e) {
     }
 
+    shortBound1(a, (short)a.length);
+    for (int i = 0; i < a.length; i++) {
+      expectEquals(222, a[i]);
+    }
+    shortBound2(a, (short)a.length);
+    for (int i = 0; i < a.length; i++) {
+      expectEquals(444, a[i]);
+    }
+
+    try {
+      shortBound1(a, (short)(a.length + 1));
+      throw new Error("Should throw AIOOBE");
+    } catch (ArrayIndexOutOfBoundsException e) {
+    }
+    for (int i = 0; i < a.length; i++) {
+      expectEquals(222, a[i]);
+    }
+
+    try {
+      shortBound2(a, (short)(a.length + 1));
+      throw new Error("Should throw AIOOBE");
+    } catch (ArrayIndexOutOfBoundsException e) {
+    }
+    for (int i = 0; i < a.length; i++) {
+      expectEquals(444, a[i]);
+    }
+
+    narrowingFromLong(a, a.length);
+    for (int i = 0; i < a.length; i++) {
+      expectEquals(888, a[i]);
+    }
+
     System.out.println("passed");
   }
 
diff --git a/test/618-checker-induction/src/Main.java b/test/618-checker-induction/src/Main.java
index ecc129a..ad3ff44 100644
--- a/test/618-checker-induction/src/Main.java
+++ b/test/618-checker-induction/src/Main.java
@@ -465,6 +465,20 @@
     return i;
   }
 
+  // TODO: handle as closed/empty eventually?
+  static int mainIndexShort1(short s) {
+    int i = 0;
+    for (i = 0; i < s; i++) { }
+    return i;
+  }
+
+  // TODO: handle as closed/empty eventually?
+  static int mainIndexShort2(short s) {
+    int i = 0;
+    for (i = 0; s > i; i++) { }
+    return i;
+  }
+
   /// CHECK-START: int Main.periodicReturnedN(int) loop_optimization (before)
   /// CHECK-DAG: <<Phi1:i\d+>> Phi               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Phi2:i\d+>> Phi               loop:<<Loop>>      outer_loop:none
@@ -693,6 +707,75 @@
     return x;
   }
 
+  /// CHECK-START: float Main.periodicFloat10() loop_optimization (before)
+  /// CHECK-DAG: <<Phi1:i\d+>> Phi               loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi2:f\d+>> Phi               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Phi3:f\d+>> Phi               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Phi4:f\d+>> Phi               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               Return [<<Phi2>>] loop:none
+  //
+  /// CHECK-START: float Main.periodicFloat10() loop_optimization (after)
+  /// CHECK-NOT: Phi
+  //
+  /// CHECK-START: float Main.periodicFloat10() loop_optimization (after)
+  /// CHECK-DAG: <<Float:f\d+>>  FloatConstant 2    loop:none
+  /// CHECK-DAG:                 Return [<<Float>>] loop:none
+  private static float periodicFloat10() {
+    float r = 4.5f;
+    float s = 2.0f;
+    float t = -1.0f;
+    for (int i = 0; i < 10; i++) {
+      float tmp = t; t = r; r = s; s = tmp;
+    }
+    return r;
+  }
+
+  /// CHECK-START: float Main.periodicFloat11() loop_optimization (before)
+  /// CHECK-DAG: <<Phi1:i\d+>> Phi               loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi2:f\d+>> Phi               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Phi3:f\d+>> Phi               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Phi4:f\d+>> Phi               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               Return [<<Phi2>>] loop:none
+  //
+  /// CHECK-START: float Main.periodicFloat11() loop_optimization (after)
+  /// CHECK-NOT: Phi
+  //
+  /// CHECK-START: float Main.periodicFloat11() loop_optimization (after)
+  /// CHECK-DAG: <<Float:f\d+>>  FloatConstant -1   loop:none
+  /// CHECK-DAG:                 Return [<<Float>>] loop:none
+  private static float periodicFloat11() {
+    float r = 4.5f;
+    float s = 2.0f;
+    float t = -1.0f;
+    for (int i = 0; i < 11; i++) {
+      float tmp = t; t = r; r = s; s = tmp;
+    }
+    return r;
+  }
+
+  /// CHECK-START: float Main.periodicFloat12() loop_optimization (before)
+  /// CHECK-DAG: <<Phi1:i\d+>> Phi               loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi2:f\d+>> Phi               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Phi3:f\d+>> Phi               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Phi4:f\d+>> Phi               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               Return [<<Phi2>>] loop:none
+  //
+  /// CHECK-START: float Main.periodicFloat12() loop_optimization (after)
+  /// CHECK-NOT: Phi
+  //
+  /// CHECK-START: float Main.periodicFloat12() loop_optimization (after)
+  /// CHECK-DAG: <<Float:f\d+>>  FloatConstant 4.5  loop:none
+  /// CHECK-DAG:                 Return [<<Float>>] loop:none
+  private static float periodicFloat12() {
+    float r = 4.5f;
+    float s = 2.0f;
+    float t = -1.0f;
+    for (int i = 0; i < 12; i++) {
+      float tmp = t; t = r; r = s; s = tmp;
+    }
+    return r;
+  }
+
   private static int exceptionExitBeforeAdd() {
     int k = 0;
     try {
@@ -779,6 +862,8 @@
     for (int n = -4; n < 4; n++) {
       int tc = (n <= 0) ? 0 : n;
       expectEquals(tc, mainIndexReturnedN(n));
+      expectEquals(tc, mainIndexShort1((short) n));
+      expectEquals(tc, mainIndexShort2((short) n));
       expectEquals(tc & 1, periodicReturnedN(n));
       expectEquals((tc * (tc + 1)) / 2, getSumN(n));
     }
@@ -803,6 +888,10 @@
       expectEquals(!even, periodicBoolIdiom3N(false, n));
     }
 
+    expectEquals( 2.0f, periodicFloat10());
+    expectEquals(-1.0f, periodicFloat11());
+    expectEquals( 4.5f, periodicFloat12());
+
     expectEquals(100, exceptionExitBeforeAdd());
     expectEquals(100, exceptionExitAfterAdd());
     a = null;
@@ -815,6 +904,12 @@
     System.out.println("passed");
   }
 
+  private static void expectEquals(float expected, float result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
   private static void expectEquals(int expected, int result) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
diff --git a/test/913-heaps/expected.txt b/test/913-heaps/expected.txt
index e5fa53f..7522a65 100644
--- a/test/913-heaps/expected.txt
+++ b/test/913-heaps/expected.txt
@@ -5,12 +5,12 @@
 root@root --(thread)--> 3000@0 [size=132, length=-1]
 0@0 --(array-element@0)--> 1@1000 [size=16, length=-1]
 1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
-1002@0 --(interface)--> 2001@0 [size=132, length=-1]
+1002@0 --(interface)--> 2001@0 [size=124, length=-1]
 1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
 1@1000 --(class)--> 1000@0 [size=123, length=-1]
 1@1000 --(field@12)--> 3@1001 [size=24, length=-1]
 1@1000 --(field@8)--> 2@1000 [size=16, length=-1]
-2001@0 --(interface)--> 2000@0 [size=132, length=-1]
+2001@0 --(interface)--> 2000@0 [size=124, length=-1]
 2@1000 --(class)--> 1000@0 [size=123, length=-1]
 3@1001 --(class)--> 1001@0 [size=123, length=-1]
 3@1001 --(field@16)--> 4@1000 [size=16, length=-1]
@@ -22,12 +22,12 @@
 6@1000 --(class)--> 1000@0 [size=123, length=-1]
 ---
 1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
-1002@0 --(interface)--> 2001@0 [size=132, length=-1]
+1002@0 --(interface)--> 2001@0 [size=124, length=-1]
 1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
 1@1000 --(class)--> 1000@0 [size=123, length=-1]
 1@1000 --(field@12)--> 3@1001 [size=24, length=-1]
 1@1000 --(field@8)--> 2@1000 [size=16, length=-1]
-2001@0 --(interface)--> 2000@0 [size=132, length=-1]
+2001@0 --(interface)--> 2000@0 [size=124, length=-1]
 2@1000 --(class)--> 1000@0 [size=123, length=-1]
 3@1001 --(class)--> 1001@0 [size=123, length=-1]
 3@1001 --(field@16)--> 4@1000 [size=16, length=-1]
@@ -46,12 +46,12 @@
 root@root --(thread)--> 1@1000 [size=16, length=-1]
 root@root --(thread)--> 3000@0 [size=132, length=-1]
 1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
-1002@0 --(interface)--> 2001@0 [size=132, length=-1]
+1002@0 --(interface)--> 2001@0 [size=124, length=-1]
 1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
 1@1000 --(class)--> 1000@0 [size=123, length=-1]
 1@1000 --(field@12)--> 3@1001 [size=24, length=-1]
 1@1000 --(field@8)--> 2@1000 [size=16, length=-1]
-2001@0 --(interface)--> 2000@0 [size=132, length=-1]
+2001@0 --(interface)--> 2000@0 [size=124, length=-1]
 2@1000 --(class)--> 1000@0 [size=123, length=-1]
 3@1001 --(class)--> 1001@0 [size=123, length=-1]
 3@1001 --(field@16)--> 4@1000 [size=16, length=-1]
@@ -63,12 +63,12 @@
 6@1000 --(class)--> 1000@0 [size=123, length=-1]
 ---
 1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
-1002@0 --(interface)--> 2001@0 [size=132, length=-1]
+1002@0 --(interface)--> 2001@0 [size=124, length=-1]
 1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
 1@1000 --(class)--> 1000@0 [size=123, length=-1]
 1@1000 --(field@12)--> 3@1001 [size=24, length=-1]
 1@1000 --(field@8)--> 2@1000 [size=16, length=-1]
-2001@0 --(interface)--> 2000@0 [size=132, length=-1]
+2001@0 --(interface)--> 2000@0 [size=124, length=-1]
 2@1000 --(class)--> 1000@0 [size=123, length=-1]
 3@1001 --(class)--> 1001@0 [size=123, length=-1]
 3@1001 --(field@16)--> 4@1000 [size=16, length=-1]
diff --git a/test/913-heaps/heaps.cc b/test/913-heaps/heaps.cc
index 9c4fa8d..49ab7dd 100644
--- a/test/913-heaps/heaps.cc
+++ b/test/913-heaps/heaps.cc
@@ -183,7 +183,7 @@
       if (*tag_ptr >= 1000) {
         // This is a class or interface, the size of which will be dependent on the architecture.
         // Do not print the size, but detect known values and "normalize" for the golden file.
-        if ((sizeof(void*) == 4 && size == 180) || (sizeof(void*) == 8 && size == 232)) {
+        if ((sizeof(void*) == 4 && size == 172) || (sizeof(void*) == 8 && size == 224)) {
           adapted_size = 123;
         }
       }
diff --git a/test/Android.arm_vixl.mk b/test/917-fields-transformation/build
old mode 100644
new mode 100755
similarity index 71%
rename from test/Android.arm_vixl.mk
rename to test/917-fields-transformation/build
index c89eb4a..898e2e5
--- a/test/Android.arm_vixl.mk
+++ b/test/917-fields-transformation/build
@@ -1,5 +1,6 @@
+#!/bin/bash
 #
-# Copyright (C) 2016 The Android Open Source Project
+# Copyright 2016 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,9 +13,5 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
 
-# Known broken tests for the ARM VIXL backend.
-TEST_ART_BROKEN_OPTIMIZING_ARM_VIXL_RUN_TESTS := \
-  562-checker-no-intermediate \
-  624-checker-stringops \
+./default-build "$@" --experimental agents
diff --git a/test/917-fields-transformation/expected.txt b/test/917-fields-transformation/expected.txt
new file mode 100644
index 0000000..bcdd201
--- /dev/null
+++ b/test/917-fields-transformation/expected.txt
@@ -0,0 +1,12 @@
+Result is Hello
+take1 is Hello
+take2 is Goodbye
+Result is start
+take1 is start
+take2 is end
+Result is Goodbye
+take1 is Hello
+take2 is Goodbye
+Result is end
+take1 is start
+take2 is end
diff --git a/test/917-fields-transformation/info.txt b/test/917-fields-transformation/info.txt
new file mode 100644
index 0000000..4cd1bd9
--- /dev/null
+++ b/test/917-fields-transformation/info.txt
@@ -0,0 +1 @@
+Tests field access after class redefinition support in the jvmti plugin.
diff --git a/test/917-fields-transformation/run b/test/917-fields-transformation/run
new file mode 100755
index 0000000..a434b63
--- /dev/null
+++ b/test/917-fields-transformation/run
@@ -0,0 +1,43 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+plugin=libopenjdkjvmtid.so
+agent=libtiagentd.so
+lib=tiagentd
+if  [[ "$@" == *"-O"* ]]; then
+  agent=libtiagent.so
+  plugin=libopenjdkjvmti.so
+  lib=tiagent
+fi
+
+if [[ "$@" == *"--jvm"* ]]; then
+  arg="jvm"
+else
+  arg="art"
+  if [[ "$@" != *"--debuggable"* ]]; then
+    other_args=" -Xcompiler-option --debuggable "
+  else
+    other_args=""
+  fi
+fi
+
+./default-run "$@" --experimental agents \
+                   --experimental runtime-plugins \
+                   --runtime-option -agentpath:${agent}=917-fields-transformation,${arg} \
+                   --android-runtime-option -Xplugin:${plugin} \
+                   --android-runtime-option -Xfully-deoptable \
+                   ${other_args} \
+                   --args ${lib}
diff --git a/test/917-fields-transformation/src/Main.java b/test/917-fields-transformation/src/Main.java
new file mode 100644
index 0000000..5378bb7
--- /dev/null
+++ b/test/917-fields-transformation/src/Main.java
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Base64;
+public class Main {
+
+  // base64 encoded class/dex file for
+  // class Transform {
+  //   public String take1;
+  //   public String take2;
+  //
+  //   public Transform(String a, String b) {
+  //     take1 = a;
+  //     take2 = b;
+  //   }
+  //
+  //   public String getResult() {
+  //     return take2;
+  //   }
+  // }
+  private static final byte[] CLASS_BYTES = Base64.getDecoder().decode(
+    "yv66vgAAADQAFwoABQARCQAEABIJAAQAEwcAFAcAFQEABXRha2UxAQASTGphdmEvbGFuZy9TdHJp" +
+    "bmc7AQAFdGFrZTIBAAY8aW5pdD4BACcoTGphdmEvbGFuZy9TdHJpbmc7TGphdmEvbGFuZy9TdHJp" +
+    "bmc7KVYBAARDb2RlAQAPTGluZU51bWJlclRhYmxlAQAJZ2V0UmVzdWx0AQAUKClMamF2YS9sYW5n" +
+    "L1N0cmluZzsBAApTb3VyY2VGaWxlAQAOVHJhbnNmb3JtLmphdmEMAAkAFgwABgAHDAAIAAcBAAlU" +
+    "cmFuc2Zvcm0BABBqYXZhL2xhbmcvT2JqZWN0AQADKClWACAABAAFAAAAAgABAAYABwAAAAEACAAH" +
+    "AAAAAgABAAkACgABAAsAAAAzAAIAAwAAAA8qtwABKiu1AAIqLLUAA7EAAAABAAwAAAASAAQAAAAU" +
+    "AAQAFQAJABYADgAXAAEADQAOAAEACwAAAB0AAQABAAAABSq0AAOwAAAAAQAMAAAABgABAAAAGgAB" +
+    "AA8AAAACABA=");
+  private static final byte[] DEX_BYTES = Base64.getDecoder().decode(
+    "ZGV4CjAzNQAGUTBb4jIABRlaI9rejdk7RCfyqR2kmNSkAgAAcAAAAHhWNBIAAAAAAAAAAAQCAAAM" +
+    "AAAAcAAAAAQAAACgAAAAAwAAALAAAAACAAAA1AAAAAMAAADkAAAAAQAAAPwAAACIAQAAHAEAAFwB" +
+    "AABkAQAAZwEAAHQBAACIAQAAnAEAAKwBAACvAQAAtAEAAMgBAADTAQAA2gEAAAIAAAADAAAABAAA" +
+    "AAYAAAABAAAAAgAAAAAAAAAGAAAAAwAAAAAAAAAHAAAAAwAAAFQBAAAAAAIACgAAAAAAAgALAAAA" +
+    "AAACAAAAAAAAAAAACQAAAAEAAQAAAAAAAAAAAAAAAAABAAAAAAAAAAUAAAAAAAAA8AEAAAAAAAAD" +
+    "AAMAAQAAAOEBAAAIAAAAcBACAAAAWwEAAFsCAQAOAAIAAQAAAAAA6wEAAAMAAABUEAEAEQAAAAIA" +
+    "AAACAAIABjxpbml0PgABTAALTFRyYW5zZm9ybTsAEkxqYXZhL2xhbmcvT2JqZWN0OwASTGphdmEv" +
+    "bGFuZy9TdHJpbmc7AA5UcmFuc2Zvcm0uamF2YQABVgADVkxMABJlbWl0dGVyOiBqYWNrLTQuMTkA" +
+    "CWdldFJlc3VsdAAFdGFrZTEABXRha2UyABQCAAAHDjwtLQAaAAcOAAACAQEAAQEBAIGABJwCAQG8" +
+    "AgAADQAAAAAAAAABAAAAAAAAAAEAAAAMAAAAcAAAAAIAAAAEAAAAoAAAAAMAAAADAAAAsAAAAAQA" +
+    "AAACAAAA1AAAAAUAAAADAAAA5AAAAAYAAAABAAAA/AAAAAEgAAACAAAAHAEAAAEQAAABAAAAVAEA" +
+    "AAIgAAAMAAAAXAEAAAMgAAACAAAA4QEAAAAgAAABAAAA8AEAAAAQAAABAAAABAIAAA==");
+
+  public static void main(String[] args) {
+    System.loadLibrary(args[1]);
+    doTest(new Transform("Hello", "Goodbye"),
+           new Transform("start", "end"));
+  }
+
+  private static void printTransform(Transform t) {
+    System.out.println("Result is " + t.getResult());
+    System.out.println("take1 is " + t.take1);
+    System.out.println("take2 is " + t.take2);
+  }
+  public static void doTest(Transform t1, Transform t2) {
+    printTransform(t1);
+    printTransform(t2);
+    doCommonClassRedefinition(Transform.class, CLASS_BYTES, DEX_BYTES);
+    printTransform(t1);
+    printTransform(t2);
+  }
+
+  // Transforms the class
+  private static native void doCommonClassRedefinition(Class<?> target,
+                                                       byte[] class_file,
+                                                       byte[] dex_file);
+}
diff --git a/test/917-fields-transformation/src/Transform.java b/test/917-fields-transformation/src/Transform.java
new file mode 100644
index 0000000..6fe6223
--- /dev/null
+++ b/test/917-fields-transformation/src/Transform.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Transform {
+  public String take1;
+  public String take2;
+
+  public Transform(String take1, String take2) {
+    this.take1 = take1;
+    this.take2 = take2;
+  }
+
+  public String getResult() {
+    return take1;
+  }
+}
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index c02999b..543ac04 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -226,6 +226,10 @@
 $(shell echo $(1) | tr '[:lower:]' '[:upper:]' | tr '-' '_')
 endef  # name-to-var
 
+# Disable 153-reference-stress temporarily until a fix arrives. b/33389022.
+ART_TEST_RUN_TEST_SKIP += \
+  153-reference-stress
+
 ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
         $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
         $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(ART_TEST_RUN_TEST_SKIP), $(ALL_ADDRESS_SIZES))
@@ -280,6 +284,7 @@
   911-get-stack-trace \
   912-classes \
   913-heaps \
+  917-fields-transformation \
 
 ifneq (,$(filter target,$(TARGET_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -564,26 +569,6 @@
       $(TEST_ART_BROKEN_OPTIMIZING_GRAPH_COLOR),$(ALL_ADDRESS_SIZES))
 endif
 
-# Known broken tests for the ARM VIXL backend.
-# Android.arm_vixl.mk defines TEST_ART_BROKEN_OPTIMIZING_ARM_VIXL_RUN_TESTS.
-include $(LOCAL_PATH)/Android.arm_vixl.mk
-
-ifdef ART_USE_VIXL_ARM_BACKEND
-  ifeq (arm,$(filter arm,$(TARGET_ARCH) $(TARGET_2ND_ARCH)))
-    ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES)))
-      ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
-          $(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-          $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
-          $(TEST_ART_BROKEN_OPTIMIZING_ARM_VIXL_RUN_TESTS),32)
-    endif
-  endif
-  # TODO(VIXL): These two tests currently fail, but adding them to `ART_TEST_KNOWN_BROKEN` breaks
-  # `export ART_USE_VIXL_ARM_BACKEND=true && mma -j6 test-art-target-gtest dist`
-  #ART_TEST_KNOWN_BROKEN += test-art-target-gtest-dex2oat_test32
-  #ART_TEST_KNOWN_BROKEN += test-art-target-gtest-image_test32
-endif
-
-
 # Known broken tests for the mips32 optimizing compiler backend.
 TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS := \
 
diff --git a/test/ErroneousA/ErroneousA.java b/test/ErroneousA/ErroneousA.java
new file mode 100644
index 0000000..49da544
--- /dev/null
+++ b/test/ErroneousA/ErroneousA.java
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+final class FinalSuper {}
diff --git a/test/ErroneousB/ErroneousB.java b/test/ErroneousB/ErroneousB.java
new file mode 100644
index 0000000..6c2902a
--- /dev/null
+++ b/test/ErroneousB/ErroneousB.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Only final in first dex.
+class FinalSuper {}
+
+class Erroneous extends FinalSuper {}
diff --git a/test/ti-agent/common_load.cc b/test/ti-agent/common_load.cc
index 2795cbc..3886148 100644
--- a/test/ti-agent/common_load.cc
+++ b/test/ti-agent/common_load.cc
@@ -66,6 +66,7 @@
   { "911-get-stack-trace", Test911GetStackTrace::OnLoad, nullptr },
   { "912-classes", Test912Classes::OnLoad, nullptr },
   { "913-heaps", Test913Heaps::OnLoad, nullptr },
+  { "917-fields-transformation", common_redefine::OnLoad, nullptr },
 };
 
 static AgentLib* FindAgent(char* name) {
diff --git a/tools/cpp-define-generator/offset_dexcache.def b/tools/cpp-define-generator/offset_dexcache.def
index 4b9d481..abb5e1e 100644
--- a/tools/cpp-define-generator/offset_dexcache.def
+++ b/tools/cpp-define-generator/offset_dexcache.def
@@ -38,7 +38,6 @@
 DEFINE_ART_METHOD_OFFSET_SIZED(JNI,                  EntryPointFromJni)
 DEFINE_ART_METHOD_OFFSET_SIZED(QUICK_CODE,           EntryPointFromQuickCompiledCode)
 DEFINE_ART_METHOD_OFFSET(DECLARING_CLASS,            DeclaringClass)
-DEFINE_DECLARING_CLASS_OFFSET(DEX_CACHE_STRINGS,     DexCacheStrings)
 
 #undef DEFINE_ART_METHOD_OFFSET
 #undef DEFINE_ART_METHOD_OFFSET_32