Merge "Make SafeDexInstructionIterator more robust"
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 547ffbc..135f9c7 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -66,6 +66,7 @@
 #include "nativehelper/ScopedLocalRef.h"
 #include "object_lock.h"
 #include "runtime.h"
+#include "runtime_intrinsics.h"
 #include "scoped_thread_state_change-inl.h"
 #include "thread.h"
 #include "thread_list.h"
@@ -365,28 +366,6 @@
 }
 #undef CREATE_TRAMPOLINE
 
-static void SetupIntrinsic(Thread* self,
-                           Intrinsics intrinsic,
-                           InvokeType invoke_type,
-                           const char* class_name,
-                           const char* method_name,
-                           const char* signature)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  PointerSize image_size = class_linker->GetImagePointerSize();
-  ObjPtr<mirror::Class> cls = class_linker->FindSystemClass(self, class_name);
-  if (cls == nullptr) {
-    LOG(FATAL) << "Could not find class of intrinsic " << class_name;
-  }
-  ArtMethod* method = cls->FindClassMethod(method_name, signature, image_size);
-  if (method == nullptr || method->GetDeclaringClass() != cls) {
-    LOG(FATAL) << "Could not find method of intrinsic "
-               << class_name << " " << method_name << " " << signature;
-  }
-  DCHECK_EQ(method->GetInvokeType(), invoke_type);
-  method->SetIntrinsic(static_cast<uint32_t>(intrinsic));
-}
-
 void CompilerDriver::CompileAll(jobject class_loader,
                                 const std::vector<const DexFile*>& dex_files,
                                 TimingLogger* timings) {
@@ -405,14 +384,7 @@
     // We don't need to setup the intrinsics for non boot image compilation, as
     // those compilations will pick up a boot image that have the ArtMethod already
     // set with the intrinsics flag.
-    ScopedObjectAccess soa(Thread::Current());
-#define SETUP_INTRINSICS(Name, InvokeType, NeedsEnvironmentOrCache, SideEffects, Exceptions, \
-                         ClassName, MethodName, Signature) \
-  SetupIntrinsic(soa.Self(), Intrinsics::k##Name, InvokeType, ClassName, MethodName, Signature);
-#include "intrinsics_list.h"
-    INTRINSICS_LIST(SETUP_INTRINSICS)
-#undef INTRINSICS_LIST
-#undef SETUP_INTRINSICS
+    InitializeIntrinsics();
   }
   // Compile:
   // 1) Compile all classes and methods enabled for compilation. May fall back to dex-to-dex
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 9d0b5c8..b8d1f52 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -536,6 +536,7 @@
       break;
     case kVirtual:
     case kInterface:
+    case kPolymorphic:
       LOG(FATAL) << "Unexpected invoke type: " << invoke->GetInvokeType();
       UNREACHABLE();
   }
@@ -563,6 +564,9 @@
     case kInterface:
       entrypoint = kQuickInvokeInterfaceTrampolineWithAccessCheck;
       break;
+    case kPolymorphic:
+      LOG(FATAL) << "Unexpected invoke type: " << invoke->GetInvokeType();
+      UNREACHABLE();
   }
   InvokeRuntime(entrypoint, invoke, invoke->GetDexPc(), nullptr);
 }
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 3851877..f7fd910 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -527,6 +527,10 @@
     StartAttributeStream("packed_type") << vec_operation->GetPackedType();
   }
 
+  void VisitVecMemoryOperation(HVecMemoryOperation* vec_mem_operation) OVERRIDE {
+    StartAttributeStream("alignment") << vec_mem_operation->GetAlignment().ToString();
+  }
+
   void VisitVecHalvingAdd(HVecHalvingAdd* hadd) OVERRIDE {
     VisitVecBinaryOperation(hadd);
     StartAttributeStream("unsigned") << std::boolalpha << hadd->IsUnsigned() << std::noboolalpha;
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 4429e6e..bdeb261 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -256,30 +256,63 @@
   LOG(FATAL) << "Unreachable: intrinsic " << invoke->GetIntrinsic()      \
              << " should have been converted to HIR";                    \
 }
-#define UNREACHABLE_INTRINSICS(Arch)                  \
-UNREACHABLE_INTRINSIC(Arch, FloatFloatToIntBits)      \
-UNREACHABLE_INTRINSIC(Arch, DoubleDoubleToLongBits)   \
-UNREACHABLE_INTRINSIC(Arch, FloatIsNaN)               \
-UNREACHABLE_INTRINSIC(Arch, DoubleIsNaN)              \
-UNREACHABLE_INTRINSIC(Arch, IntegerRotateLeft)        \
-UNREACHABLE_INTRINSIC(Arch, LongRotateLeft)           \
-UNREACHABLE_INTRINSIC(Arch, IntegerRotateRight)       \
-UNREACHABLE_INTRINSIC(Arch, LongRotateRight)          \
-UNREACHABLE_INTRINSIC(Arch, IntegerCompare)           \
-UNREACHABLE_INTRINSIC(Arch, LongCompare)              \
-UNREACHABLE_INTRINSIC(Arch, IntegerSignum)            \
-UNREACHABLE_INTRINSIC(Arch, LongSignum)               \
-UNREACHABLE_INTRINSIC(Arch, StringCharAt)             \
-UNREACHABLE_INTRINSIC(Arch, StringIsEmpty)            \
-UNREACHABLE_INTRINSIC(Arch, StringLength)             \
-UNREACHABLE_INTRINSIC(Arch, UnsafeLoadFence)          \
-UNREACHABLE_INTRINSIC(Arch, UnsafeStoreFence)         \
-UNREACHABLE_INTRINSIC(Arch, UnsafeFullFence)          \
-UNREACHABLE_INTRINSIC(Arch, VarHandleFullFence)       \
-UNREACHABLE_INTRINSIC(Arch, VarHandleAcquireFence)    \
-UNREACHABLE_INTRINSIC(Arch, VarHandleReleaseFence)    \
-UNREACHABLE_INTRINSIC(Arch, VarHandleLoadLoadFence)   \
-UNREACHABLE_INTRINSIC(Arch, VarHandleStoreStoreFence)
+#define UNREACHABLE_INTRINSICS(Arch)                            \
+UNREACHABLE_INTRINSIC(Arch, FloatFloatToIntBits)                \
+UNREACHABLE_INTRINSIC(Arch, DoubleDoubleToLongBits)             \
+UNREACHABLE_INTRINSIC(Arch, FloatIsNaN)                         \
+UNREACHABLE_INTRINSIC(Arch, DoubleIsNaN)                        \
+UNREACHABLE_INTRINSIC(Arch, IntegerRotateLeft)                  \
+UNREACHABLE_INTRINSIC(Arch, LongRotateLeft)                     \
+UNREACHABLE_INTRINSIC(Arch, IntegerRotateRight)                 \
+UNREACHABLE_INTRINSIC(Arch, LongRotateRight)                    \
+UNREACHABLE_INTRINSIC(Arch, IntegerCompare)                     \
+UNREACHABLE_INTRINSIC(Arch, LongCompare)                        \
+UNREACHABLE_INTRINSIC(Arch, IntegerSignum)                      \
+UNREACHABLE_INTRINSIC(Arch, LongSignum)                         \
+UNREACHABLE_INTRINSIC(Arch, StringCharAt)                       \
+UNREACHABLE_INTRINSIC(Arch, StringIsEmpty)                      \
+UNREACHABLE_INTRINSIC(Arch, StringLength)                       \
+UNREACHABLE_INTRINSIC(Arch, UnsafeLoadFence)                    \
+UNREACHABLE_INTRINSIC(Arch, UnsafeStoreFence)                   \
+UNREACHABLE_INTRINSIC(Arch, UnsafeFullFence)                    \
+UNREACHABLE_INTRINSIC(Arch, VarHandleFullFence)                 \
+UNREACHABLE_INTRINSIC(Arch, VarHandleAcquireFence)              \
+UNREACHABLE_INTRINSIC(Arch, VarHandleReleaseFence)              \
+UNREACHABLE_INTRINSIC(Arch, VarHandleLoadLoadFence)             \
+UNREACHABLE_INTRINSIC(Arch, VarHandleStoreStoreFence)           \
+UNREACHABLE_INTRINSIC(Arch, MethodHandleInvokeExact)            \
+UNREACHABLE_INTRINSIC(Arch, MethodHandleInvoke)                 \
+UNREACHABLE_INTRINSIC(Arch, VarHandleCompareAndExchange)        \
+UNREACHABLE_INTRINSIC(Arch, VarHandleCompareAndExchangeAcquire) \
+UNREACHABLE_INTRINSIC(Arch, VarHandleCompareAndExchangeRelease) \
+UNREACHABLE_INTRINSIC(Arch, VarHandleCompareAndSet)             \
+UNREACHABLE_INTRINSIC(Arch, VarHandleGet)                       \
+UNREACHABLE_INTRINSIC(Arch, VarHandleGetAcquire)                \
+UNREACHABLE_INTRINSIC(Arch, VarHandleGetAndAdd)                 \
+UNREACHABLE_INTRINSIC(Arch, VarHandleGetAndAddAcquire)          \
+UNREACHABLE_INTRINSIC(Arch, VarHandleGetAndAddRelease)          \
+UNREACHABLE_INTRINSIC(Arch, VarHandleGetAndBitwiseAnd)          \
+UNREACHABLE_INTRINSIC(Arch, VarHandleGetAndBitwiseAndAcquire)   \
+UNREACHABLE_INTRINSIC(Arch, VarHandleGetAndBitwiseAndRelease)   \
+UNREACHABLE_INTRINSIC(Arch, VarHandleGetAndBitwiseOr)           \
+UNREACHABLE_INTRINSIC(Arch, VarHandleGetAndBitwiseOrAcquire)    \
+UNREACHABLE_INTRINSIC(Arch, VarHandleGetAndBitwiseOrRelease)    \
+UNREACHABLE_INTRINSIC(Arch, VarHandleGetAndBitwiseXor)          \
+UNREACHABLE_INTRINSIC(Arch, VarHandleGetAndBitwiseXorAcquire)   \
+UNREACHABLE_INTRINSIC(Arch, VarHandleGetAndBitwiseXorRelease)   \
+UNREACHABLE_INTRINSIC(Arch, VarHandleGetAndSet)                 \
+UNREACHABLE_INTRINSIC(Arch, VarHandleGetAndSetAcquire)          \
+UNREACHABLE_INTRINSIC(Arch, VarHandleGetAndSetRelease)          \
+UNREACHABLE_INTRINSIC(Arch, VarHandleGetOpaque)                 \
+UNREACHABLE_INTRINSIC(Arch, VarHandleGetVolatile)               \
+UNREACHABLE_INTRINSIC(Arch, VarHandleSet)                       \
+UNREACHABLE_INTRINSIC(Arch, VarHandleSetOpaque)                 \
+UNREACHABLE_INTRINSIC(Arch, VarHandleSetRelease)                \
+UNREACHABLE_INTRINSIC(Arch, VarHandleSetVolatile)               \
+UNREACHABLE_INTRINSIC(Arch, VarHandleWeakCompareAndSet)         \
+UNREACHABLE_INTRINSIC(Arch, VarHandleWeakCompareAndSetAcquire)  \
+UNREACHABLE_INTRINSIC(Arch, VarHandleWeakCompareAndSetPlain)    \
+UNREACHABLE_INTRINSIC(Arch, VarHandleWeakCompareAndSetRelease)
 
 template <typename IntrinsicLocationsBuilder, typename Codegenerator>
 bool IsCallFreeIntrinsic(HInvoke* invoke, Codegenerator* codegen) {
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 8f84796..74de077 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -25,6 +25,8 @@
 #include "arch/x86_64/instruction_set_features_x86_64.h"
 #include "driver/compiler_driver.h"
 #include "linear_order.h"
+#include "mirror/array-inl.h"
+#include "mirror/string.h"
 
 namespace art {
 
@@ -71,12 +73,25 @@
 // Enables vectorization (SIMDization) in the loop optimizer.
 static constexpr bool kEnableVectorization = true;
 
-// All current SIMD targets want 16-byte alignment.
-static constexpr size_t kAlignedBase = 16;
-
 // No loop unrolling factor (just one copy of the loop-body).
 static constexpr uint32_t kNoUnrollingFactor = 1;
 
+//
+// Static helpers.
+//
+
+// Base alignment for arrays/strings guaranteed by the Android runtime.
+static uint32_t BaseAlignment() {
+  return kObjectAlignment;
+}
+
+// Hidden offset for arrays/strings guaranteed by the Android runtime.
+static uint32_t HiddenOffset(DataType::Type type, bool is_string_char_at) {
+  return is_string_char_at
+      ? mirror::String::ValueOffset().Uint32Value()
+      : mirror::Array::DataOffset(DataType::Size(type)).Uint32Value();
+}
+
 // Remove the instruction from the graph. A bit more elaborate than the usual
 // instruction removal, since there may be a cycle in the use structure.
 static void RemoveFromCycle(HInstruction* instruction) {
@@ -288,7 +303,7 @@
 }
 
 // Compute relative vector length based on type difference.
-static size_t GetOtherVL(DataType::Type other_type, DataType::Type vector_type, size_t vl) {
+static uint32_t GetOtherVL(DataType::Type other_type, DataType::Type vector_type, uint32_t vl) {
   DCHECK(DataType::IsIntegralType(other_type));
   DCHECK(DataType::IsIntegralType(vector_type));
   DCHECK_GE(DataType::SizeShift(other_type), DataType::SizeShift(vector_type));
@@ -395,7 +410,7 @@
   } else if (reduction->IsVecMax()) {
     return HVecReduce::kMax;
   }
-  LOG(FATAL) << "Unsupported SIMD reduction";
+  LOG(FATAL) << "Unsupported SIMD reduction " << reduction->GetId();
   UNREACHABLE();
 }
 
@@ -446,7 +461,8 @@
       simplified_(false),
       vector_length_(0),
       vector_refs_(nullptr),
-      vector_peeling_candidate_(nullptr),
+      vector_static_peeling_factor_(0),
+      vector_dynamic_peeling_candidate_(nullptr),
       vector_runtime_test_a_(nullptr),
       vector_runtime_test_b_(nullptr),
       vector_map_(nullptr),
@@ -746,7 +762,8 @@
   // Reset vector bookkeeping.
   vector_length_ = 0;
   vector_refs_->clear();
-  vector_peeling_candidate_ = nullptr;
+  vector_static_peeling_factor_ = 0;
+  vector_dynamic_peeling_candidate_ = nullptr;
   vector_runtime_test_a_ =
   vector_runtime_test_b_= nullptr;
 
@@ -763,10 +780,17 @@
     }
   }
 
-  // Does vectorization seem profitable?
-  if (!IsVectorizationProfitable(trip_count)) {
-    return false;
-  }
+  // Prepare alignment analysis:
+  // (1) find desired alignment (SIMD vector size in bytes).
+  // (2) initialize static loop peeling votes (peeling factor that will
+  //     make one particular reference aligned), never to exceed (1).
+  // (3) variable to record how many references share same alignment.
+  // (4) variable to record suitable candidate for dynamic loop peeling.
+  uint32_t desired_alignment = GetVectorSizeInBytes();
+  DCHECK_LE(desired_alignment, 16u);
+  uint32_t peeling_votes[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+  uint32_t max_num_same_alignment = 0;
+  const ArrayReference* peeling_candidate = nullptr;
 
   // Data dependence analysis. Find each pair of references with same type, where
   // at least one is a write. Each such pair denotes a possible data dependence.
@@ -774,9 +798,10 @@
   // aliased, as well as the property that references either point to the same
   // array or to two completely disjoint arrays, i.e., no partial aliasing.
   // Other than a few simply heuristics, no detailed subscript analysis is done.
-  // The scan over references also finds a suitable dynamic loop peeling candidate.
-  const ArrayReference* candidate = nullptr;
+  // The scan over references also prepares finding a suitable alignment strategy.
   for (auto i = vector_refs_->begin(); i != vector_refs_->end(); ++i) {
+    uint32_t num_same_alignment = 0;
+    // Scan over all next references.
     for (auto j = i; ++j != vector_refs_->end(); ) {
       if (i->type == j->type && (i->lhs || j->lhs)) {
         // Found same-typed a[i+x] vs. b[i+y], where at least one is a write.
@@ -790,6 +815,10 @@
           if (x != y) {
             return false;
           }
+          // Count the number of references that have the same alignment (since
+          // base and offset are the same) and where at least one is a write, so
+          // e.g. a[i] = a[i] + b[i] counts a[i] but not b[i]).
+          num_same_alignment++;
         } else {
           // Found a[i+x] vs. b[i+y]. Accept if x == y (at worst loop-independent data dependence).
           // Conservatively assume a potential loop-carried data dependence otherwise, avoided by
@@ -808,10 +837,38 @@
         }
       }
     }
-  }
+    // Update information for finding suitable alignment strategy:
+    // (1) update votes for static loop peeling,
+    // (2) update suitable candidate for dynamic loop peeling.
+    Alignment alignment = ComputeAlignment(i->offset, i->type, i->is_string_char_at);
+    if (alignment.Base() >= desired_alignment) {
+      // If the array/string object has a known, sufficient alignment, use the
+      // initial offset to compute the static loop peeling vote (this always
+      // works, since elements have natural alignment).
+      uint32_t offset = alignment.Offset() & (desired_alignment - 1u);
+      uint32_t vote = (offset == 0)
+          ? 0
+          : ((desired_alignment - offset) >> DataType::SizeShift(i->type));
+      DCHECK_LT(vote, 16u);
+      ++peeling_votes[vote];
+    } else if (BaseAlignment() >= desired_alignment &&
+               num_same_alignment > max_num_same_alignment) {
+      // Otherwise, if the array/string object has a known, sufficient alignment
+      // for just the base but with an unknown offset, record the candidate with
+      // the most occurrences for dynamic loop peeling (again, the peeling always
+      // works, since elements have natural alignment).
+      max_num_same_alignment = num_same_alignment;
+      peeling_candidate = &(*i);
+    }
+  }  // for i
 
-  // Consider dynamic loop peeling for alignment.
-  SetPeelingCandidate(candidate, trip_count);
+  // Find a suitable alignment strategy.
+  SetAlignmentStrategy(peeling_votes, peeling_candidate);
+
+  // Does vectorization seem profitable?
+  if (!IsVectorizationProfitable(trip_count)) {
+    return false;
+  }
 
   // Success!
   return true;
@@ -828,9 +885,12 @@
   uint32_t unroll = GetUnrollingFactor(block, trip_count);
   uint32_t chunk = vector_length_ * unroll;
 
+  DCHECK(trip_count == 0 || (trip_count >= MaxNumberPeeled() + chunk));
+
   // A cleanup loop is needed, at least, for any unknown trip count or
   // for a known trip count with remainder iterations after vectorization.
-  bool needs_cleanup = trip_count == 0 || (trip_count % chunk) != 0;
+  bool needs_cleanup = trip_count == 0 ||
+      ((trip_count - vector_static_peeling_factor_) % chunk) != 0;
 
   // Adjust vector bookkeeping.
   HPhi* main_phi = nullptr;
@@ -844,21 +904,40 @@
   DCHECK(induc_type == DataType::Type::kInt32 || induc_type == DataType::Type::kInt64)
       << induc_type;
 
-  // Generate dynamic loop peeling trip count, if needed, under the assumption
-  // that the Android runtime guarantees at least "component size" alignment:
-  // ptc = (ALIGN - (&a[initial] % ALIGN)) / type-size
+  // Generate the trip count for static or dynamic loop peeling, if needed:
+  // ptc = <peeling factor>;
   HInstruction* ptc = nullptr;
-  if (vector_peeling_candidate_ != nullptr) {
-    DCHECK_LT(vector_length_, trip_count) << "dynamic peeling currently requires known trip count";
-    //
-    // TODO: Implement this. Compute address of first access memory location and
-    //       compute peeling factor to obtain kAlignedBase alignment.
-    //
-    needs_cleanup = true;
+  if (vector_static_peeling_factor_ != 0) {
+    // Static loop peeling for SIMD alignment (using the most suitable
+    // fixed peeling factor found during prior alignment analysis).
+    DCHECK(vector_dynamic_peeling_candidate_ == nullptr);
+    ptc = graph_->GetConstant(induc_type, vector_static_peeling_factor_);
+  } else if (vector_dynamic_peeling_candidate_ != nullptr) {
+    // Dynamic loop peeling for SIMD alignment (using the most suitable
+    // candidate found during prior alignment analysis):
+    // rem = offset % ALIGN;    // adjusted as #elements
+    // ptc = rem == 0 ? 0 : (ALIGN - rem);
+    uint32_t shift = DataType::SizeShift(vector_dynamic_peeling_candidate_->type);
+    uint32_t align = GetVectorSizeInBytes() >> shift;
+    uint32_t hidden_offset = HiddenOffset(vector_dynamic_peeling_candidate_->type,
+                                          vector_dynamic_peeling_candidate_->is_string_char_at);
+    HInstruction* adjusted_offset = graph_->GetConstant(induc_type, hidden_offset >> shift);
+    HInstruction* offset = Insert(preheader, new (global_allocator_) HAdd(
+        induc_type, vector_dynamic_peeling_candidate_->offset, adjusted_offset));
+    HInstruction* rem = Insert(preheader, new (global_allocator_) HAnd(
+        induc_type, offset, graph_->GetConstant(induc_type, align - 1u)));
+    HInstruction* sub = Insert(preheader, new (global_allocator_) HSub(
+        induc_type, graph_->GetConstant(induc_type, align), rem));
+    HInstruction* cond = Insert(preheader, new (global_allocator_) HEqual(
+        rem, graph_->GetConstant(induc_type, 0)));
+    ptc = Insert(preheader, new (global_allocator_) HSelect(
+        cond, graph_->GetConstant(induc_type, 0), sub, kNoDexPc));
+    needs_cleanup = true;  // don't know the exact amount
   }
 
   // Generate loop control:
   // stc = <trip-count>;
+  // ptc = min(stc, ptc);
   // vtc = stc - (stc - ptc) % chunk;
   // i = 0;
   HInstruction* stc = induction_range_.GenerateTripCount(node->loop_info, graph_, preheader);
@@ -867,6 +946,10 @@
     DCHECK(IsPowerOfTwo(chunk));
     HInstruction* diff = stc;
     if (ptc != nullptr) {
+      if (trip_count == 0) {
+        HInstruction* cond = Insert(preheader, new (global_allocator_) HAboveOrEqual(stc, ptc));
+        ptc = Insert(preheader, new (global_allocator_) HSelect(cond, ptc, stc, kNoDexPc));
+      }
       diff = Insert(preheader, new (global_allocator_) HSub(induc_type, stc, ptc));
     }
     HInstruction* rem = Insert(
@@ -889,9 +972,13 @@
     needs_cleanup = true;
   }
 
-  // Generate dynamic peeling loop for alignment, if needed:
+  // Generate alignment peeling loop, if needed:
   // for ( ; i < ptc; i += 1)
   //    <loop-body>
+  //
+  // NOTE: The alignment forced by the peeling loop is preserved even if data is
+  //       moved around during suspend checks, since all analysis was based on
+  //       nothing more than the Android runtime alignment conventions.
   if (ptc != nullptr) {
     vector_mode_ = kSequential;
     GenerateNewLoop(node,
@@ -1118,7 +1205,7 @@
         GenerateVecSub(index, offset);
         GenerateVecMem(instruction, vector_map_->Get(index), nullptr, offset, type);
       } else {
-        vector_refs_->insert(ArrayReference(base, offset, type, /*lhs*/ false));
+        vector_refs_->insert(ArrayReference(base, offset, type, /*lhs*/ false, is_string_char_at));
       }
       return true;
     }
@@ -1144,9 +1231,9 @@
     DataType::Type from = conversion->GetInputType();
     DataType::Type to = conversion->GetResultType();
     if (DataType::IsIntegralType(from) && DataType::IsIntegralType(to)) {
-      size_t size_vec = DataType::Size(type);
-      size_t size_from = DataType::Size(from);
-      size_t size_to = DataType::Size(to);
+      uint32_t size_vec = DataType::Size(type);
+      uint32_t size_from = DataType::Size(from);
+      uint32_t size_to = DataType::Size(to);
       // Accept an integral conversion
       // (1a) narrowing into vector type, "wider" operations cannot bring in higher order bits, or
       // (1b) widening from at least vector type, and
@@ -1325,6 +1412,16 @@
   return false;
 }
 
+uint32_t HLoopOptimization::GetVectorSizeInBytes() {
+  switch (compiler_driver_->GetInstructionSet()) {
+    case kArm:
+    case kThumb2:
+      return 8;  // 64-bit SIMD
+    default:
+      return 16;  // 128-bit SIMD
+  }
+}
+
 bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrictions) {
   const InstructionSetFeatures* features = compiler_driver_->GetInstructionSetFeatures();
   switch (compiler_driver_->GetInstructionSet()) {
@@ -1537,12 +1634,13 @@
   HInstruction* vector = nullptr;
   if (vector_mode_ == kVector) {
     // Vector store or load.
+    bool is_string_char_at = false;
     HInstruction* base = org->InputAt(0);
     if (opb != nullptr) {
       vector = new (global_allocator_) HVecStore(
           global_allocator_, base, opa, opb, type, org->GetSideEffects(), vector_length_, dex_pc);
     } else  {
-      bool is_string_char_at = org->AsArrayGet()->IsStringCharAt();
+      is_string_char_at = org->AsArrayGet()->IsStringCharAt();
       vector = new (global_allocator_) HVecLoad(global_allocator_,
                                                 base,
                                                 opa,
@@ -1552,11 +1650,17 @@
                                                 is_string_char_at,
                                                 dex_pc);
     }
-    // Known dynamically enforced alignment?
-    if (vector_peeling_candidate_ != nullptr &&
-        vector_peeling_candidate_->base == base &&
-        vector_peeling_candidate_->offset == offset) {
-      vector->AsVecMemoryOperation()->SetAlignment(Alignment(kAlignedBase, 0));
+    // Known (forced/adjusted/original) alignment?
+    if (vector_dynamic_peeling_candidate_ != nullptr) {
+      if (vector_dynamic_peeling_candidate_->offset == offset &&  // TODO: diffs too?
+          DataType::Size(vector_dynamic_peeling_candidate_->type) == DataType::Size(type) &&
+          vector_dynamic_peeling_candidate_->is_string_char_at == is_string_char_at) {
+        vector->AsVecMemoryOperation()->SetAlignment(  // forced
+            Alignment(GetVectorSizeInBytes(), 0));
+      }
+    } else {
+      vector->AsVecMemoryOperation()->SetAlignment(  // adjusted/original
+          ComputeAlignment(offset, type, is_string_char_at, vector_static_peeling_factor_));
     }
   } else {
     // Scalar store or load.
@@ -1612,7 +1716,7 @@
     // a [initial, initial, .., initial] vector for min/max.
     HVecOperation* red_vector = new_red->AsVecOperation();
     HVecReduce::ReductionKind kind = GetReductionKind(red_vector);
-    size_t vector_length = red_vector->GetVectorLength();
+    uint32_t vector_length = red_vector->GetVectorLength();
     DataType::Type type = red_vector->GetPackedType();
     if (kind == HVecReduce::ReductionKind::kSum) {
       new_init = Insert(vector_preheader_,
@@ -1644,9 +1748,9 @@
 HInstruction* HLoopOptimization::ReduceAndExtractIfNeeded(HInstruction* instruction) {
   if (instruction->IsPhi()) {
     HInstruction* input = instruction->InputAt(1);
-    if (input->IsVecOperation()) {
+    if (input->IsVecOperation() && !input->IsVecExtractScalar()) {
       HVecOperation* input_vector = input->AsVecOperation();
-      size_t vector_length = input_vector->GetVectorLength();
+      uint32_t vector_length = input_vector->GetVectorLength();
       DataType::Type type = input_vector->GetPackedType();
       HVecReduce::ReductionKind kind = GetReductionKind(input_vector);
       HBasicBlock* exit = instruction->GetBlock()->GetSuccessors()[0];
@@ -1774,7 +1878,7 @@
             break;
           }
           default:
-            LOG(FATAL) << "Unsupported SIMD intrinsic";
+            LOG(FATAL) << "Unsupported SIMD intrinsic " << org->GetId();
             UNREACHABLE();
         }  // switch invoke
       } else {
@@ -2005,35 +2109,72 @@
 // Vectorization heuristics.
 //
 
+Alignment HLoopOptimization::ComputeAlignment(HInstruction* offset,
+                                              DataType::Type type,
+                                              bool is_string_char_at,
+                                              uint32_t peeling) {
+  // Combine the alignment and hidden offset that is guaranteed by
+  // the Android runtime with a known starting index adjusted as bytes.
+  int64_t value = 0;
+  if (IsInt64AndGet(offset, /*out*/ &value)) {
+    uint32_t start_offset =
+        HiddenOffset(type, is_string_char_at) + (value + peeling) * DataType::Size(type);
+    return Alignment(BaseAlignment(), start_offset & (BaseAlignment() - 1u));
+  }
+  // Otherwise, the Android runtime guarantees at least natural alignment.
+  return Alignment(DataType::Size(type), 0);
+}
+
+void HLoopOptimization::SetAlignmentStrategy(uint32_t peeling_votes[],
+                                             const ArrayReference* peeling_candidate) {
+  // Current heuristic: pick the best static loop peeling factor, if any,
+  // or otherwise use dynamic loop peeling on suggested peeling candidate.
+  uint32_t max_vote = 0;
+  for (int32_t i = 0; i < 16; i++) {
+    if (peeling_votes[i] > max_vote) {
+      max_vote = peeling_votes[i];
+      vector_static_peeling_factor_ = i;
+    }
+  }
+  if (max_vote == 0) {
+    vector_dynamic_peeling_candidate_ = peeling_candidate;
+  }
+}
+
+uint32_t HLoopOptimization::MaxNumberPeeled() {
+  if (vector_dynamic_peeling_candidate_ != nullptr) {
+    return vector_length_ - 1u;  // worst-case
+  }
+  return vector_static_peeling_factor_;  // known exactly
+}
+
 bool HLoopOptimization::IsVectorizationProfitable(int64_t trip_count) {
-  // Current heuristic: non-empty body with sufficient number
-  // of iterations (if known).
+  // Current heuristic: non-empty body with sufficient number of iterations (if known).
   // TODO: refine by looking at e.g. operation count, alignment, etc.
+  // TODO: trip count is really unsigned entity, provided the guarding test
+  //       is satisfied; deal with this more carefully later
+  uint32_t max_peel = MaxNumberPeeled();
   if (vector_length_ == 0) {
     return false;  // nothing found
-  } else if (0 < trip_count && trip_count < vector_length_) {
+  } else if (trip_count < 0) {
+    return false;  // guard against non-taken/large
+  } else if ((0 < trip_count) && (trip_count < (vector_length_ + max_peel))) {
     return false;  // insufficient iterations
   }
   return true;
 }
 
-void HLoopOptimization::SetPeelingCandidate(const ArrayReference* candidate,
-                                            int64_t trip_count ATTRIBUTE_UNUSED) {
-  // Current heuristic: none.
-  // TODO: implement
-  vector_peeling_candidate_ = candidate;
-}
-
 static constexpr uint32_t ARM64_SIMD_MAXIMUM_UNROLL_FACTOR = 8;
 static constexpr uint32_t ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE = 50;
 
 uint32_t HLoopOptimization::GetUnrollingFactor(HBasicBlock* block, int64_t trip_count) {
+  uint32_t max_peel = MaxNumberPeeled();
   switch (compiler_driver_->GetInstructionSet()) {
     case kArm64: {
       // Don't unroll with insufficient iterations.
       // TODO: Unroll loops with unknown trip count.
       DCHECK_NE(vector_length_, 0u);
-      if (trip_count < 2 * vector_length_) {
+      if (trip_count < (2 * vector_length_ + max_peel)) {
         return kNoUnrollingFactor;
       }
       // Don't unroll for large loop body size.
@@ -2045,7 +2186,7 @@
       //  - At least one iteration of the transformed loop should be executed.
       //  - The loop body shouldn't be "too big" (heuristic).
       uint32_t uf1 = ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE / instruction_count;
-      uint32_t uf2 = trip_count / vector_length_;
+      uint32_t uf2 = (trip_count - max_peel) / vector_length_;
       uint32_t unroll_factor =
           TruncToPowerOfTwo(std::min({uf1, uf2, ARM64_SIMD_MAXIMUM_UNROLL_FACTOR}));
       DCHECK_GE(unroll_factor, 1u);
@@ -2112,7 +2253,7 @@
     HInstruction* reduction = inputs[1];
     if (HasReductionFormat(reduction, phi)) {
       HLoopInformation* loop_info = phi->GetBlock()->GetLoopInformation();
-      int32_t use_count = 0;
+      uint32_t use_count = 0;
       bool single_use_inside_loop =
           // Reduction update only used by phi.
           reduction->GetUses().HasExactlyOneElement() &&
@@ -2205,7 +2346,7 @@
 bool HLoopOptimization::IsOnlyUsedAfterLoop(HLoopInformation* loop_info,
                                             HInstruction* instruction,
                                             bool collect_loop_uses,
-                                            /*out*/ int32_t* use_count) {
+                                            /*out*/ uint32_t* use_count) {
   // Deal with regular uses.
   for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
     HInstruction* user = use.GetUser();
@@ -2276,7 +2417,7 @@
   // Assigning the last value is always successful if there are no uses.
   // Otherwise, it succeeds in a no early-exit loop by generating the
   // proper last value assignment.
-  int32_t use_count = 0;
+  uint32_t use_count = 0;
   return IsOnlyUsedAfterLoop(loop_info, instruction, collect_loop_uses, &use_count) &&
       (use_count == 0 ||
        (!IsEarlyExit(loop_info) && TryReplaceWithLastValue(loop_info, instruction, block)));
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 768fe55..51e0a98 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -94,20 +94,24 @@
    * Representation of a unit-stride array reference.
    */
   struct ArrayReference {
-    ArrayReference(HInstruction* b, HInstruction* o, DataType::Type t, bool l)
-        : base(b), offset(o), type(t), lhs(l) { }
+    ArrayReference(HInstruction* b, HInstruction* o, DataType::Type t, bool l, bool c = false)
+        : base(b), offset(o), type(t), lhs(l), is_string_char_at(c) { }
     bool operator<(const ArrayReference& other) const {
       return
           (base < other.base) ||
           (base == other.base &&
            (offset < other.offset || (offset == other.offset &&
                                       (type < other.type ||
-                                       (type == other.type && lhs < other.lhs)))));
+                                       (type == other.type &&
+                                        (lhs < other.lhs ||
+                                         (lhs == other.lhs &&
+                                          is_string_char_at < other.is_string_char_at)))))));
     }
-    HInstruction* base;    // base address
-    HInstruction* offset;  // offset + i
-    DataType::Type type;   // component type
-    bool lhs;              // def/use
+    HInstruction* base;      // base address
+    HInstruction* offset;    // offset + i
+    DataType::Type type;     // component type
+    bool lhs;                // def/use
+    bool is_string_char_at;  // compressed string read
   };
 
   //
@@ -152,6 +156,7 @@
                     bool generate_code,
                     DataType::Type type,
                     uint64_t restrictions);
+  uint32_t GetVectorSizeInBytes();
   bool TrySetVectorType(DataType::Type type, /*out*/ uint64_t* restrictions);
   bool TrySetVectorLength(uint32_t length);
   void GenerateVecInv(HInstruction* org, DataType::Type type);
@@ -183,8 +188,14 @@
                          uint64_t restrictions);
 
   // Vectorization heuristics.
+  Alignment ComputeAlignment(HInstruction* offset,
+                             DataType::Type type,
+                             bool is_string_char_at,
+                             uint32_t peeling = 0);
+  void SetAlignmentStrategy(uint32_t peeling_votes[],
+                            const ArrayReference* peeling_candidate);
+  uint32_t MaxNumberPeeled();
   bool IsVectorizationProfitable(int64_t trip_count);
-  void SetPeelingCandidate(const ArrayReference* candidate, int64_t trip_count);
   uint32_t GetUnrollingFactor(HBasicBlock* block, int64_t trip_count);
 
   //
@@ -202,7 +213,7 @@
   bool IsOnlyUsedAfterLoop(HLoopInformation* loop_info,
                            HInstruction* instruction,
                            bool collect_loop_uses,
-                           /*out*/ int32_t* use_count);
+                           /*out*/ uint32_t* use_count);
   bool IsUsedOutsideLoop(HLoopInformation* loop_info,
                          HInstruction* instruction);
   bool TryReplaceWithLastValue(HLoopInformation* loop_info,
@@ -254,8 +265,9 @@
   // Contents reside in phase-local heap memory.
   ScopedArenaSet<ArrayReference>* vector_refs_;
 
-  // Dynamic loop peeling candidate for alignment.
-  const ArrayReference* vector_peeling_candidate_;
+  // Static or dynamic loop peeling for alignment.
+  uint32_t vector_static_peeling_factor_;
+  const ArrayReference* vector_dynamic_peeling_candidate_;
 
   // Dynamic data dependence test of the form a != b.
   HInstruction* vector_runtime_test_a_;
diff --git a/dexoptanalyzer/dexoptanalyzer.cc b/dexoptanalyzer/dexoptanalyzer.cc
index 08d38d5..3a2c7fc 100644
--- a/dexoptanalyzer/dexoptanalyzer.cc
+++ b/dexoptanalyzer/dexoptanalyzer.cc
@@ -19,6 +19,7 @@
 #include "android-base/stringprintf.h"
 #include "android-base/strings.h"
 #include "compiler_filter.h"
+#include "class_loader_context.h"
 #include "dex_file.h"
 #include "noop_compiler_callbacks.h"
 #include "oat_file_assistant.h"
@@ -175,7 +176,15 @@
         oat_fd_ = std::stoi(option.substr(strlen("--oat-fd=")).ToString(), nullptr, 0);
       } else if (option.starts_with("--vdex-fd")) {
         vdex_fd_ = std::stoi(option.substr(strlen("--vdex-fd=")).ToString(), nullptr, 0);
-      } else { Usage("Unknown argument '%s'", option.data()); }
+      } else if (option.starts_with("--class-loader-context=")) {
+        std::string context_str = option.substr(strlen("--class-loader-context=")).ToString();
+        class_loader_context_ = ClassLoaderContext::Create(context_str);
+        if (class_loader_context_ == nullptr) {
+          Usage("Invalid --class-loader-context '%s'", context_str.c_str());
+        }
+      } else {
+        Usage("Unknown argument '%s'", option.data());
+      }
     }
 
     if (image_.empty()) {
@@ -255,9 +264,8 @@
       return kNoDexOptNeeded;
     }
 
-    // TODO(calin): Pass the class loader context as an argument to dexoptanalyzer. b/62269291.
     int dexoptNeeded = oat_file_assistant->GetDexOptNeeded(
-        compiler_filter_, assume_profile_changed_, downgrade_);
+        compiler_filter_, assume_profile_changed_, downgrade_, class_loader_context_.get());
 
     // Convert OatFileAssitant codes to dexoptanalyzer codes.
     switch (dexoptNeeded) {
@@ -280,6 +288,7 @@
   std::string dex_file_;
   InstructionSet isa_;
   CompilerFilter::Filter compiler_filter_;
+  std::unique_ptr<ClassLoaderContext> class_loader_context_;
   bool assume_profile_changed_;
   bool downgrade_;
   std::string image_;
diff --git a/openjdkjvmti/ti_method.cc b/openjdkjvmti/ti_method.cc
index cf93bf0..5d63285 100644
--- a/openjdkjvmti/ti_method.cc
+++ b/openjdkjvmti/ti_method.cc
@@ -771,7 +771,7 @@
   }
   GetLocalVariableClosure c(self, depth, slot, type, val);
   // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its execution.
-  if (!ThreadUtil::RequestGCSafeSynchronousCheckpoint(target, &c)) {
+  if (!target->RequestSynchronousCheckpoint(&c)) {
     return ERR(THREAD_NOT_ALIVE);
   } else {
     return c.GetResult();
@@ -900,7 +900,7 @@
   }
   SetLocalVariableClosure c(self, depth, slot, type, val);
   // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its execution.
-  if (!ThreadUtil::RequestGCSafeSynchronousCheckpoint(target, &c)) {
+  if (!target->RequestSynchronousCheckpoint(&c)) {
     return ERR(THREAD_NOT_ALIVE);
   } else {
     return c.GetResult();
@@ -959,7 +959,7 @@
   }
   GetLocalInstanceClosure c(self, depth, data);
   // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its execution.
-  if (!ThreadUtil::RequestGCSafeSynchronousCheckpoint(target, &c)) {
+  if (!target->RequestSynchronousCheckpoint(&c)) {
     return ERR(THREAD_NOT_ALIVE);
   } else {
     return c.GetResult();
diff --git a/openjdkjvmti/ti_monitor.cc b/openjdkjvmti/ti_monitor.cc
index 7db0566..5881f8c 100644
--- a/openjdkjvmti/ti_monitor.cc
+++ b/openjdkjvmti/ti_monitor.cc
@@ -395,7 +395,7 @@
   };
   GetContendedMonitorClosure closure(self, monitor);
   // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its execution.
-  if (!ThreadUtil::RequestGCSafeSynchronousCheckpoint(target, &closure)) {
+  if (!target->RequestSynchronousCheckpoint(&closure)) {
     return ERR(THREAD_NOT_ALIVE);
   }
   return OK;
diff --git a/openjdkjvmti/ti_stack.cc b/openjdkjvmti/ti_stack.cc
index b43eaa0..e346e16 100644
--- a/openjdkjvmti/ti_stack.cc
+++ b/openjdkjvmti/ti_stack.cc
@@ -257,7 +257,7 @@
                                        static_cast<size_t>(start_depth),
                                        static_cast<size_t>(max_frame_count));
     // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its execution.
-    if (!ThreadUtil::RequestGCSafeSynchronousCheckpoint(thread, &closure)) {
+    if (!thread->RequestSynchronousCheckpoint(&closure)) {
       return ERR(THREAD_NOT_ALIVE);
     }
     *count_ptr = static_cast<jint>(closure.index);
@@ -268,7 +268,7 @@
   } else {
     GetStackTraceVectorClosure closure(0, 0);
     // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its execution.
-    if (!ThreadUtil::RequestGCSafeSynchronousCheckpoint(thread, &closure)) {
+    if (!thread->RequestSynchronousCheckpoint(&closure)) {
       return ERR(THREAD_NOT_ALIVE);
     }
 
@@ -712,7 +712,7 @@
 
   GetFrameCountClosure closure;
   // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its execution.
-  if (!ThreadUtil::RequestGCSafeSynchronousCheckpoint(thread, &closure)) {
+  if (!thread->RequestSynchronousCheckpoint(&closure)) {
     return ERR(THREAD_NOT_ALIVE);
   }
 
@@ -802,7 +802,7 @@
 
   GetLocationClosure closure(static_cast<size_t>(depth));
   // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its execution.
-  if (!ThreadUtil::RequestGCSafeSynchronousCheckpoint(thread, &closure)) {
+  if (!thread->RequestSynchronousCheckpoint(&closure)) {
     return ERR(THREAD_NOT_ALIVE);
   }
 
@@ -923,7 +923,7 @@
     if (target != self) {
       called_method = true;
       // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its execution.
-      if (!ThreadUtil::RequestGCSafeSynchronousCheckpoint(target, &closure)) {
+      if (!target->RequestSynchronousCheckpoint(&closure)) {
         return ERR(THREAD_NOT_ALIVE);
       }
     } else {
diff --git a/openjdkjvmti/ti_thread.cc b/openjdkjvmti/ti_thread.cc
index 6d075a6..99dea54 100644
--- a/openjdkjvmti/ti_thread.cc
+++ b/openjdkjvmti/ti_thread.cc
@@ -38,9 +38,6 @@
 #include "base/mutex.h"
 #include "events-inl.h"
 #include "gc/system_weak.h"
-#include "gc/collector_type.h"
-#include "gc/gc_cause.h"
-#include "gc/scoped_gc_critical_section.h"
 #include "gc_root-inl.h"
 #include "jni_internal.h"
 #include "mirror/class.h"
@@ -1064,7 +1061,7 @@
   };
   StopThreadClosure c(exc);
   // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its execution.
-  if (RequestGCSafeSynchronousCheckpoint(target, &c)) {
+  if (target->RequestSynchronousCheckpoint(&c)) {
     return OK;
   } else {
     // Something went wrong, probably the thread died.
@@ -1087,29 +1084,4 @@
   return OK;
 }
 
-class GcCriticalSectionClosure : public art::Closure {
- public:
-  explicit GcCriticalSectionClosure(art::Closure* wrapped) : wrapped_(wrapped) {}
-
-  void Run(art::Thread* self) OVERRIDE {
-    if (art::kIsDebugBuild) {
-      art::Locks::thread_list_lock_->AssertNotHeld(art::Thread::Current());
-    }
-    // This might block as it waits for any in-progress GCs to finish but this is fine since we
-    // released the Thread-list-lock prior to calling this in RequestSynchronousCheckpoint.
-    art::gc::ScopedGCCriticalSection sgccs(art::Thread::Current(),
-                                           art::gc::kGcCauseDebugger,
-                                           art::gc::kCollectorTypeDebugger);
-    wrapped_->Run(self);
-  }
-
- private:
-  art::Closure* wrapped_;
-};
-
-bool ThreadUtil::RequestGCSafeSynchronousCheckpoint(art::Thread* thr, art::Closure* function) {
-  GcCriticalSectionClosure gccsc(function);
-  return thr->RequestSynchronousCheckpoint(&gccsc);
-}
-
 }  // namespace openjdkjvmti
diff --git a/openjdkjvmti/ti_thread.h b/openjdkjvmti/ti_thread.h
index 341bffe..09b4cab 100644
--- a/openjdkjvmti/ti_thread.h
+++ b/openjdkjvmti/ti_thread.h
@@ -42,7 +42,6 @@
 class ArtField;
 class ScopedObjectAccessAlreadyRunnable;
 class Thread;
-class Closure;
 }  // namespace art
 
 namespace openjdkjvmti {
@@ -134,16 +133,6 @@
     REQUIRES(!art::Locks::user_code_suspension_lock_,
              !art::Locks::thread_suspend_count_lock_);
 
-  // This will request a synchronous checkpoint in such a way as to prevent gc races if a local
-  // variable is taken from one thread's stack and placed in the stack of another thread.
-  // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its execution. This is
-  // due to the fact that Thread::Current() needs to go to sleep to allow the targeted thread to
-  // execute the checkpoint for us if it is Runnable.
-  static bool RequestGCSafeSynchronousCheckpoint(art::Thread* thr, art::Closure* function)
-      REQUIRES_SHARED(art::Locks::mutator_lock_)
-      RELEASE(art::Locks::thread_list_lock_)
-      REQUIRES(!art::Locks::thread_suspend_count_lock_);
-
  private:
   // We need to make sure only one thread tries to suspend threads at a time so we can get the
   // 'suspend-only-once' behavior the spec requires. Internally, ART considers suspension to be a
diff --git a/runtime/Android.bp b/runtime/Android.bp
index a615437..3258aae 100644
--- a/runtime/Android.bp
+++ b/runtime/Android.bp
@@ -203,6 +203,7 @@
         "runtime.cc",
         "runtime_callbacks.cc",
         "runtime_common.cc",
+        "runtime_intrinsics.cc",
         "runtime_options.cc",
         "scoped_thread_state_change.cc",
         "signal_catcher.cc",
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index adfc88f..280e593 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -2921,7 +2921,7 @@
     INCREASE_FRAME 16                             // Reserve space for JValue result.
     str     xzr, [sp, #0]                         // Initialize result to zero.
     mov     x0, sp                                // Set r0 to point to result.
-    bl      artInvokePolymorphic                  // ArtInvokePolymorphic(result, receiver, thread, save_area)
+    bl      artInvokePolymorphic                  // artInvokePolymorphic(result, receiver, thread, save_area)
     uxtb    w0, w0                                // Result is the return type descriptor as a char.
     sub     w0, w0, 'A'                           // Convert to zero based index.
     cmp     w0, 'Z' - 'A'
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index ee3f17d..489c52c 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -3228,7 +3228,7 @@
     sw    $zero, 20($sp)                      # Initialize JValue result.
     sw    $zero, 16($sp)
     la    $t9, artInvokePolymorphic
-    jalr  $t9                                 # (result, receiver, Thread*, context)
+    jalr  $t9                                 # artInvokePolymorphic(result, receiver, Thread*, context)
     addiu $a0, $sp, 16                        # Make $a0 a pointer to the JValue result
 .macro MATCH_RETURN_TYPE c, handler
     li    $t0, \c
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index d4ad275..98ffe65 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -3028,7 +3028,7 @@
     daddiu $sp, $sp, -8                        # Reserve space for JValue result.
     .cfi_adjust_cfa_offset 8
     sd     $zero, 0($sp)                       # Initialize JValue result.
-    jal    artInvokePolymorphic                # (result, receiver, Thread*, context)
+    jal    artInvokePolymorphic                # artInvokePolymorphic(result, receiver, Thread*, context)
     move   $a0, $sp                            # Make $a0 a pointer to the JValue result
 .macro MATCH_RETURN_TYPE c, handler
     li     $t0, \c
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index eecca58..25716dc 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -2419,7 +2419,7 @@
     CFI_ADJUST_CFA_OFFSET(4)
     PUSH ecx                                       // pass receiver (method handle)
     PUSH eax                                       // pass JResult
-    call SYMBOL(artInvokePolymorphic)              // (result, receiver, Thread*, SP)
+    call SYMBOL(artInvokePolymorphic)              // artInvokePolymorphic(result, receiver, Thread*, SP)
     subl LITERAL('A'), %eax                        // Eliminate out of bounds options
     cmpb LITERAL('Z' - 'A'), %al
     ja .Lcleanup_and_return
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index d4297df..80f5c34 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -166,6 +166,8 @@
     return kInterface;
   } else if (IsDirect()) {
     return kDirect;
+  } else if (IsPolymorphicSignature()) {
+    return kPolymorphic;
   } else {
     return kVirtual;
   }
@@ -427,6 +429,12 @@
                          /* lookup_in_resolved_boot_classes */ true);
 }
 
+bool ArtMethod::IsAnnotatedWithPolymorphicSignature() {
+  return IsAnnotatedWith(WellKnownClasses::java_lang_invoke_MethodHandle_PolymorphicSignature,
+                         DexFile::kDexVisibilityRuntime,
+                         /* lookup_in_resolved_boot_classes */ true);
+}
+
 bool ArtMethod::IsAnnotatedWith(jclass klass,
                                 uint32_t visibility,
                                 bool lookup_in_resolved_boot_classes) {
diff --git a/runtime/art_method.h b/runtime/art_method.h
index caef81c..fe85cb4 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -271,6 +271,12 @@
 
   bool IsProxyMethod() REQUIRES_SHARED(Locks::mutator_lock_);
 
+  bool IsPolymorphicSignature() REQUIRES_SHARED(Locks::mutator_lock_) {
+    // Methods with a polymorphic signature have constraints that they
+    // are native and varargs. Check these first before possibly expensive call.
+    return IsNative() && IsVarargs() && IsAnnotatedWithPolymorphicSignature();
+  }
+
   bool SkipAccessChecks() {
     return (GetAccessFlags() & kAccSkipAccessChecks) != 0;
   }
@@ -316,6 +322,10 @@
   // -- Unrelated to the GC notion of "critical".
   bool IsAnnotatedWithCriticalNative();
 
+  // Checks to see if the method was annotated with
+  // @java.lang.invoke.MethodHandle.PolymorphicSignature.
+  bool IsAnnotatedWithPolymorphicSignature();
+
   // Returns true if this method could be overridden by a default method.
   bool IsOverridableByDefaultMethod() REQUIRES_SHARED(Locks::mutator_lock_);
 
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index cf5cc11..4d7c2a1 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -2574,7 +2574,7 @@
 // each type.
 extern "C" uintptr_t artInvokePolymorphic(
     JValue* result,
-    mirror::Object* raw_method_handle,
+    mirror::Object* raw_receiver,
     Thread* self,
     ArtMethod** sp)
     REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -2602,26 +2602,29 @@
   RememberForGcArgumentVisitor gc_visitor(sp, kMethodIsStatic, shorty, shorty_length, &soa);
   gc_visitor.VisitArguments();
 
-  // Wrap raw_method_handle in a Handle for safety.
-  StackHandleScope<2> hs(self);
-  Handle<mirror::MethodHandle> method_handle(
-      hs.NewHandle(ObjPtr<mirror::MethodHandle>::DownCast(MakeObjPtr(raw_method_handle))));
-  raw_method_handle = nullptr;
+  // Wrap raw_receiver in a Handle for safety.
+  StackHandleScope<3> hs(self);
+  Handle<mirror::Object> receiver_handle(hs.NewHandle(raw_receiver));
+  raw_receiver = nullptr;
   self->EndAssertNoThreadSuspension(old_cause);
 
-  // Resolve method - it's either MethodHandle.invoke() or MethodHandle.invokeExact().
+  // Resolve method.
   ClassLinker* linker = Runtime::Current()->GetClassLinker();
   ArtMethod* resolved_method = linker->ResolveMethod<ClassLinker::ResolveMode::kCheckICCEAndIAE>(
       self, inst.VRegB(), caller_method, kVirtual);
-  DCHECK((resolved_method ==
-          jni::DecodeArtMethod(WellKnownClasses::java_lang_invoke_MethodHandle_invokeExact)) ||
-         (resolved_method ==
-          jni::DecodeArtMethod(WellKnownClasses::java_lang_invoke_MethodHandle_invoke)));
-  if (UNLIKELY(method_handle.IsNull())) {
+
+  if (UNLIKELY(receiver_handle.IsNull())) {
     ThrowNullPointerExceptionForMethodAccess(resolved_method, InvokeType::kVirtual);
     return static_cast<uintptr_t>('V');
   }
 
+  // TODO(oth): Ensure this path isn't taken for VarHandle accessors (b/65872996).
+  DCHECK_EQ(resolved_method->GetDeclaringClass(),
+            WellKnownClasses::ToClass(WellKnownClasses::java_lang_invoke_MethodHandle));
+
+  Handle<mirror::MethodHandle> method_handle(hs.NewHandle(
+      ObjPtr<mirror::MethodHandle>::DownCast(MakeObjPtr(receiver_handle.Get()))));
+
   Handle<mirror::MethodType> method_type(
       hs.NewHandle(linker->ResolveMethodType(self, proto_idx, caller_method)));
 
@@ -2662,16 +2665,28 @@
   // consecutive order.
   uint32_t unused_args[Instruction::kMaxVarArgRegs] = {};
   uint32_t first_callee_arg = first_arg + 1;
-  if (!DoInvokePolymorphic<true /* is_range */>(self,
-                                                resolved_method,
-                                                *shadow_frame,
-                                                method_handle,
-                                                method_type,
-                                                unused_args,
-                                                first_callee_arg,
-                                                result)) {
-    DCHECK(self->IsExceptionPending());
+
+  bool isExact = (jni::EncodeArtMethod(resolved_method) ==
+                  WellKnownClasses::java_lang_invoke_MethodHandle_invokeExact);
+  bool success = false;
+  if (isExact) {
+    success = MethodHandleInvokeExact<true/*is_range*/>(self,
+                                                        *shadow_frame,
+                                                        method_handle,
+                                                        method_type,
+                                                        unused_args,
+                                                        first_callee_arg,
+                                                        result);
+  } else {
+    success = MethodHandleInvoke<true/*is_range*/>(self,
+                                                   *shadow_frame,
+                                                   method_handle,
+                                                   method_type,
+                                                   unused_args,
+                                                   first_callee_arg,
+                                                   result);
   }
+  DCHECK(success || self->IsExceptionPending());
 
   // Pop transition record.
   self->PopManagedStackFragment(fragment);
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 9fb9fe7..0a1ae36 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -22,6 +22,7 @@
 #include "debugger.h"
 #include "dex_file_types.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
+#include "intrinsics_enum.h"
 #include "jit/jit.h"
 #include "jvalue.h"
 #include "method_handles-inl.h"
@@ -588,11 +589,12 @@
 }
 
 template<bool is_range>
-bool DoInvokePolymorphic(Thread* self,
-                         ShadowFrame& shadow_frame,
-                         const Instruction* inst,
-                         uint16_t inst_data,
-                         JValue* result)
+static bool DoMethodHandleInvokeCommon(Thread* self,
+                                       ShadowFrame& shadow_frame,
+                                       bool invoke_exact,
+                                       const Instruction* inst,
+                                       uint16_t inst_data,
+                                       JValue* result)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   // Make sure to check for async exceptions
   if (UNLIKELY(self->ObserveAsyncException())) {
@@ -638,41 +640,381 @@
     return false;
   }
 
-  ArtMethod* invoke_method =
-      class_linker->ResolveMethod<ClassLinker::ResolveMode::kCheckICCEAndIAE>(
-          self, invoke_method_idx, shadow_frame.GetMethod(), kVirtual);
-
   // There is a common dispatch method for method handles that takes
   // arguments either from a range or an array of arguments depending
   // on whether the DEX instruction is invoke-polymorphic/range or
   // invoke-polymorphic. The array here is for the latter.
   uint32_t args[Instruction::kMaxVarArgRegs] = {};
-  if (is_range) {
+  if (UNLIKELY(is_range)) {
     // VRegC is the register holding the method handle. Arguments passed
     // to the method handle's target do not include the method handle.
     uint32_t first_arg = inst->VRegC_4rcc() + 1;
-    return DoInvokePolymorphic<is_range>(self,
-                                         invoke_method,
-                                         shadow_frame,
-                                         method_handle,
-                                         callsite_type,
-                                         args /* unused */,
-                                         first_arg,
-                                         result);
+    static const bool kIsRange = true;
+    if (invoke_exact) {
+      return art::MethodHandleInvokeExact<kIsRange>(self,
+                                                    shadow_frame,
+                                                    method_handle,
+                                                    callsite_type,
+                                                    args /* unused */,
+                                                    first_arg,
+                                                    result);
+    } else {
+      return art::MethodHandleInvoke<kIsRange>(self,
+                                               shadow_frame,
+                                               method_handle,
+                                               callsite_type,
+                                               args /* unused */,
+                                               first_arg,
+                                               result);
+    }
   } else {
     // Get the register arguments for the invoke.
     inst->GetVarArgs(args, inst_data);
     // Drop the first register which is the method handle performing the invoke.
     memmove(args, args + 1, sizeof(args[0]) * (Instruction::kMaxVarArgRegs - 1));
     args[Instruction::kMaxVarArgRegs - 1] = 0;
-    return DoInvokePolymorphic<is_range>(self,
-                                         invoke_method,
-                                         shadow_frame,
-                                         method_handle,
-                                         callsite_type,
-                                         args,
-                                         args[0],
-                                         result);
+    static const bool kIsRange = false;
+    if (invoke_exact) {
+      return art::MethodHandleInvokeExact<kIsRange>(self,
+                                                    shadow_frame,
+                                                    method_handle,
+                                                    callsite_type,
+                                                    args,
+                                                    args[0],
+                                                    result);
+    } else {
+      return art::MethodHandleInvoke<kIsRange>(self,
+                                               shadow_frame,
+                                               method_handle,
+                                               callsite_type,
+                                               args,
+                                               args[0],
+                                               result);
+    }
+  }
+}
+
+bool DoMethodHandleInvokeExact(Thread* self,
+                               ShadowFrame& shadow_frame,
+                               const Instruction* inst,
+                               uint16_t inst_data,
+                               JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  if (inst->Opcode() == Instruction::INVOKE_POLYMORPHIC) {
+    static const bool kIsRange = false;
+    return DoMethodHandleInvokeCommon<kIsRange>(
+        self, shadow_frame, true /* is_exact */, inst, inst_data, result);
+  } else {
+    DCHECK_EQ(inst->Opcode(), Instruction::INVOKE_POLYMORPHIC_RANGE);
+    static const bool kIsRange = true;
+    return DoMethodHandleInvokeCommon<kIsRange>(
+        self, shadow_frame, true /* is_exact */, inst, inst_data, result);
+  }
+}
+
+bool DoMethodHandleInvoke(Thread* self,
+                          ShadowFrame& shadow_frame,
+                          const Instruction* inst,
+                          uint16_t inst_data,
+                          JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  if (inst->Opcode() == Instruction::INVOKE_POLYMORPHIC) {
+    static const bool kIsRange = false;
+    return DoMethodHandleInvokeCommon<kIsRange>(
+        self, shadow_frame, false /* is_exact */, inst, inst_data, result);
+  } else {
+    DCHECK_EQ(inst->Opcode(), Instruction::INVOKE_POLYMORPHIC_RANGE);
+    static const bool kIsRange = true;
+    return DoMethodHandleInvokeCommon<kIsRange>(
+        self, shadow_frame, false /* is_exact */, inst, inst_data, result);
+  }
+}
+
+static bool UnimplementedSignaturePolymorphicMethod(Thread* self ATTRIBUTE_UNUSED,
+                                                    ShadowFrame& shadow_frame ATTRIBUTE_UNUSED,
+                                                    const Instruction* inst ATTRIBUTE_UNUSED,
+                                                    uint16_t inst_data ATTRIBUTE_UNUSED,
+                                                    JValue* result ATTRIBUTE_UNUSED)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  UNIMPLEMENTED(FATAL) << "TODO(oth): b/65872996";
+  return false;
+}
+
+bool DoVarHandleCompareAndExchange(Thread* self,
+                                   ShadowFrame& shadow_frame,
+                                   const Instruction* inst,
+                                   uint16_t inst_data,
+                                   JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return UnimplementedSignaturePolymorphicMethod(self, shadow_frame, inst, inst_data, result);
+}
+
+bool DoVarHandleCompareAndExchangeAcquire(Thread* self,
+                                          ShadowFrame& shadow_frame,
+                                          const Instruction* inst,
+                                          uint16_t inst_data,
+                                          JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return UnimplementedSignaturePolymorphicMethod(self, shadow_frame, inst, inst_data, result);
+}
+
+bool DoVarHandleCompareAndExchangeRelease(Thread* self,
+                                          ShadowFrame& shadow_frame,
+                                          const Instruction* inst,
+                                          uint16_t inst_data,
+                                          JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return UnimplementedSignaturePolymorphicMethod(self, shadow_frame, inst, inst_data, result);
+}
+
+bool DoVarHandleCompareAndSet(Thread* self,
+                              ShadowFrame& shadow_frame,
+                              const Instruction* inst,
+                              uint16_t inst_data,
+                              JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return UnimplementedSignaturePolymorphicMethod(self, shadow_frame, inst, inst_data, result);
+}
+
+bool DoVarHandleGet(Thread* self,
+                    ShadowFrame& shadow_frame,
+                    const Instruction* inst,
+                    uint16_t inst_data,
+                    JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return UnimplementedSignaturePolymorphicMethod(self, shadow_frame, inst, inst_data, result);
+}
+
+bool DoVarHandleGetAcquire(Thread* self,
+                           ShadowFrame& shadow_frame,
+                           const Instruction* inst,
+                           uint16_t inst_data,
+                           JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return UnimplementedSignaturePolymorphicMethod(self, shadow_frame, inst, inst_data, result);
+}
+
+bool DoVarHandleGetAndAdd(Thread* self,
+                          ShadowFrame& shadow_frame,
+                          const Instruction* inst,
+                          uint16_t inst_data,
+                          JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return UnimplementedSignaturePolymorphicMethod(self, shadow_frame, inst, inst_data, result);
+}
+
+bool DoVarHandleGetAndAddAcquire(Thread* self,
+                                 ShadowFrame& shadow_frame,
+                                 const Instruction* inst,
+                                 uint16_t inst_data,
+                                 JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return UnimplementedSignaturePolymorphicMethod(self, shadow_frame, inst, inst_data, result);
+}
+
+bool DoVarHandleGetAndAddRelease(Thread* self,
+                                 ShadowFrame& shadow_frame,
+                                 const Instruction* inst,
+                                 uint16_t inst_data,
+                                 JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return UnimplementedSignaturePolymorphicMethod(self, shadow_frame, inst, inst_data, result);
+}
+
+bool DoVarHandleGetAndBitwiseAnd(Thread* self,
+                                 ShadowFrame& shadow_frame,
+                                 const Instruction* inst,
+                                 uint16_t inst_data,
+                                 JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return UnimplementedSignaturePolymorphicMethod(self, shadow_frame, inst, inst_data, result);
+}
+
+bool DoVarHandleGetAndBitwiseAndAcquire(Thread* self,
+                                        ShadowFrame& shadow_frame,
+                                        const Instruction* inst,
+                                        uint16_t inst_data,
+                                        JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return UnimplementedSignaturePolymorphicMethod(self, shadow_frame, inst, inst_data, result);
+}
+
+bool DoVarHandleGetAndBitwiseAndRelease(Thread* self,
+                                        ShadowFrame& shadow_frame,
+                                        const Instruction* inst,
+                                        uint16_t inst_data,
+                                        JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return UnimplementedSignaturePolymorphicMethod(self, shadow_frame, inst, inst_data, result);
+}
+
+bool DoVarHandleGetAndBitwiseOr(Thread* self,
+                                ShadowFrame& shadow_frame,
+                                const Instruction* inst,
+                                uint16_t inst_data,
+                                JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return UnimplementedSignaturePolymorphicMethod(self, shadow_frame, inst, inst_data, result);
+}
+
+bool DoVarHandleGetAndBitwiseOrAcquire(Thread* self,
+                                       ShadowFrame& shadow_frame,
+                                       const Instruction* inst,
+                                       uint16_t inst_data,
+                                       JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return UnimplementedSignaturePolymorphicMethod(self, shadow_frame, inst, inst_data, result);
+}
+
+bool DoVarHandleGetAndBitwiseOrRelease(Thread* self,
+                                       ShadowFrame& shadow_frame,
+                                       const Instruction* inst,
+                                       uint16_t inst_data,
+                                       JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return UnimplementedSignaturePolymorphicMethod(self, shadow_frame, inst, inst_data, result);
+}
+
+bool DoVarHandleGetAndBitwiseXor(Thread* self,
+                                 ShadowFrame& shadow_frame,
+                                 const Instruction* inst,
+                                 uint16_t inst_data,
+                                 JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return UnimplementedSignaturePolymorphicMethod(self, shadow_frame, inst, inst_data, result);
+}
+
+bool DoVarHandleGetAndBitwiseXorAcquire(Thread* self,
+                                        ShadowFrame& shadow_frame,
+                                        const Instruction* inst,
+                                        uint16_t inst_data,
+                                        JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return UnimplementedSignaturePolymorphicMethod(self, shadow_frame, inst, inst_data, result);
+}
+
+bool DoVarHandleGetAndBitwiseXorRelease(Thread* self,
+                                        ShadowFrame& shadow_frame,
+                                        const Instruction* inst,
+                                        uint16_t inst_data,
+                                        JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return UnimplementedSignaturePolymorphicMethod(self, shadow_frame, inst, inst_data, result);
+}
+
+bool DoVarHandleGetAndSet(Thread* self,
+                          ShadowFrame& shadow_frame,
+                          const Instruction* inst,
+                          uint16_t inst_data,
+                          JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return UnimplementedSignaturePolymorphicMethod(self, shadow_frame, inst, inst_data, result);
+}
+
+bool DoVarHandleGetAndSetAcquire(Thread* self,
+                                 ShadowFrame& shadow_frame,
+                                 const Instruction* inst,
+                                 uint16_t inst_data,
+                                 JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return UnimplementedSignaturePolymorphicMethod(self, shadow_frame, inst, inst_data, result);
+}
+
+bool DoVarHandleGetAndSetRelease(Thread* self,
+                                 ShadowFrame& shadow_frame,
+                                 const Instruction* inst,
+                                 uint16_t inst_data,
+                                 JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return UnimplementedSignaturePolymorphicMethod(self, shadow_frame, inst, inst_data, result);
+}
+
+bool DoVarHandleGetOpaque(Thread* self,
+                          ShadowFrame& shadow_frame,
+                          const Instruction* inst,
+                          uint16_t inst_data,
+                          JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return UnimplementedSignaturePolymorphicMethod(self, shadow_frame, inst, inst_data, result);
+}
+
+bool DoVarHandleGetVolatile(Thread* self,
+                            ShadowFrame& shadow_frame,
+                            const Instruction* inst,
+                            uint16_t inst_data,
+                            JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return UnimplementedSignaturePolymorphicMethod(self, shadow_frame, inst, inst_data, result);
+}
+
+bool DoVarHandleSet(Thread* self,
+                    ShadowFrame& shadow_frame,
+                    const Instruction* inst,
+                    uint16_t inst_data,
+                    JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return UnimplementedSignaturePolymorphicMethod(self, shadow_frame, inst, inst_data, result);
+}
+
+bool DoVarHandleSetOpaque(Thread* self,
+                          ShadowFrame& shadow_frame,
+                          const Instruction* inst,
+                          uint16_t inst_data,
+                          JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return UnimplementedSignaturePolymorphicMethod(self, shadow_frame, inst, inst_data, result);
+}
+
+bool DoVarHandleSetRelease(Thread* self,
+                           ShadowFrame& shadow_frame,
+                           const Instruction* inst,
+                           uint16_t inst_data,
+                           JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return UnimplementedSignaturePolymorphicMethod(self, shadow_frame, inst, inst_data, result);
+}
+
+bool DoVarHandleSetVolatile(Thread* self,
+                            ShadowFrame& shadow_frame,
+                            const Instruction* inst,
+                            uint16_t inst_data,
+                            JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return UnimplementedSignaturePolymorphicMethod(self, shadow_frame, inst, inst_data, result);
+}
+
+bool DoVarHandleWeakCompareAndSet(Thread* self,
+                                  ShadowFrame& shadow_frame,
+                                  const Instruction* inst,
+                                  uint16_t inst_data,
+                                  JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return UnimplementedSignaturePolymorphicMethod(self, shadow_frame, inst, inst_data, result);
+}
+
+bool DoVarHandleWeakCompareAndSetAcquire(Thread* self,
+                                         ShadowFrame& shadow_frame,
+                                         const Instruction* inst,
+                                         uint16_t inst_data,
+                                         JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return UnimplementedSignaturePolymorphicMethod(self, shadow_frame, inst, inst_data, result);
+}
+
+bool DoVarHandleWeakCompareAndSetPlain(Thread* self,
+                                       ShadowFrame& shadow_frame,
+                                       const Instruction* inst,
+                                       uint16_t inst_data,
+                                       JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return UnimplementedSignaturePolymorphicMethod(self, shadow_frame, inst, inst_data, result);
+}
+
+bool DoVarHandleWeakCompareAndSetRelease(Thread* self,
+                                         ShadowFrame& shadow_frame,
+                                         const Instruction* inst,
+                                         uint16_t inst_data,
+                                         JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return UnimplementedSignaturePolymorphicMethod(self, shadow_frame, inst, inst_data, result);
+}
+
+template<bool is_range>
+bool DoInvokePolymorphic(Thread* self,
+                         ShadowFrame& shadow_frame,
+                         const Instruction* inst,
+                         uint16_t inst_data,
+                         JValue* result) {
+  const int invoke_method_idx = inst->VRegB();
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  ArtMethod* invoke_method =
+      class_linker->ResolveMethod<ClassLinker::ResolveMode::kCheckICCEAndIAE>(
+          self, invoke_method_idx, shadow_frame.GetMethod(), kVirtual);
+
+  // Ensure intrinsic identifiers are initialized.
+  DCHECK(invoke_method->IsIntrinsic());
+
+  // Dispatch based on intrinsic identifier associated with method.
+  switch (static_cast<art::Intrinsics>(invoke_method->GetIntrinsic())) {
+#define CASE_SIGNATURE_POLYMORPHIC_INTRINSIC(Name, ...) \
+    case Intrinsics::k##Name:                           \
+      return Do ## Name(self, shadow_frame, inst, inst_data, result);
+#include "intrinsics_list.h"
+    SIGNATURE_POLYMORPHIC_INTRINSICS_LIST(CASE_SIGNATURE_POLYMORPHIC_INTRINSIC)
+#undef INTRINSICS_LIST
+#undef SIGNATURE_POLYMORPHIC_INTRINSICS_LIST
+#undef CASE_SIGNATURE_POLYMORPHIC_INTRINSIC
+    default:
+      LOG(FATAL) << "Unreachable: " << invoke_method->GetIntrinsic();
+      UNREACHABLE();
+      return false;
   }
 }
 
@@ -839,19 +1181,16 @@
   // Invoke the bootstrap method handle.
   JValue result;
 
-  // This array of arguments is unused. DoInvokePolymorphic() operates on either a
+  // This array of arguments is unused. DoMethodHandleInvokeExact() operates on either a
   // an argument array or a range, but always takes an array argument.
   uint32_t args_unused[Instruction::kMaxVarArgRegs];
-  ArtMethod* invoke_exact =
-      jni::DecodeArtMethod(WellKnownClasses::java_lang_invoke_MethodHandle_invokeExact);
-  bool invoke_success = DoInvokePolymorphic<true /* is_range */>(self,
-                                                                 invoke_exact,
-                                                                 *bootstrap_frame,
-                                                                 bootstrap,
-                                                                 bootstrap_method_type,
-                                                                 args_unused,
-                                                                 0,
-                                                                 &result);
+  bool invoke_success = art::MethodHandleInvokeExact<true /* is_range */>(self,
+                                                                          *bootstrap_frame,
+                                                                          bootstrap,
+                                                                          bootstrap_method_type,
+                                                                          args_unused,
+                                                                          0,
+                                                                          &result);
   if (!invoke_success) {
     DCHECK(self->IsExceptionPending());
     return nullptr;
@@ -942,16 +1281,13 @@
     inst->GetVarArgs(args, inst_data);
   }
 
-  ArtMethod* invoke_exact =
-      jni::DecodeArtMethod(WellKnownClasses::java_lang_invoke_MethodHandle_invokeExact);
-  return DoInvokePolymorphic<is_range>(self,
-                                       invoke_exact,
-                                       shadow_frame,
-                                       target,
-                                       target_method_type,
-                                       args,
-                                       args[0],
-                                       result);
+  return art::MethodHandleInvokeExact<is_range>(self,
+                                                shadow_frame,
+                                                target,
+                                                target_method_type,
+                                                args,
+                                                args[0],
+                                                result);
 }
 
 template <bool is_range>
@@ -1344,16 +1680,6 @@
 EXPLICIT_DO_CALL_TEMPLATE_DECL(true, true);
 #undef EXPLICIT_DO_CALL_TEMPLATE_DECL
 
-// Explicit DoInvokeCustom template function declarations.
-#define EXPLICIT_DO_INVOKE_CUSTOM_TEMPLATE_DECL(_is_range)               \
-  template REQUIRES_SHARED(Locks::mutator_lock_)                         \
-  bool DoInvokeCustom<_is_range>(                                        \
-      Thread* self, ShadowFrame& shadow_frame, const Instruction* inst,  \
-      uint16_t inst_data, JValue* result)
-EXPLICIT_DO_INVOKE_CUSTOM_TEMPLATE_DECL(false);
-EXPLICIT_DO_INVOKE_CUSTOM_TEMPLATE_DECL(true);
-#undef EXPLICIT_DO_INVOKE_CUSTOM_TEMPLATE_DECL
-
 // Explicit DoInvokePolymorphic template function declarations.
 #define EXPLICIT_DO_INVOKE_POLYMORPHIC_TEMPLATE_DECL(_is_range)          \
   template REQUIRES_SHARED(Locks::mutator_lock_)                         \
@@ -1364,6 +1690,16 @@
 EXPLICIT_DO_INVOKE_POLYMORPHIC_TEMPLATE_DECL(true);
 #undef EXPLICIT_DO_INVOKE_POLYMORPHIC_TEMPLATE_DECL
 
+// Explicit DoInvokeCustom template function declarations.
+#define EXPLICIT_DO_INVOKE_CUSTOM_TEMPLATE_DECL(_is_range)               \
+  template REQUIRES_SHARED(Locks::mutator_lock_)                         \
+  bool DoInvokeCustom<_is_range>(                                        \
+      Thread* self, ShadowFrame& shadow_frame, const Instruction* inst,  \
+      uint16_t inst_data, JValue* result)
+EXPLICIT_DO_INVOKE_CUSTOM_TEMPLATE_DECL(false);
+EXPLICIT_DO_INVOKE_CUSTOM_TEMPLATE_DECL(true);
+#undef EXPLICIT_DO_INVOKE_CUSTOM_TEMPLATE_DECL
+
 // Explicit DoFilledNewArray template function declarations.
 #define EXPLICIT_DO_FILLED_NEW_ARRAY_TEMPLATE_DECL(_is_range_, _check, _transaction_active)       \
   template REQUIRES_SHARED(Locks::mutator_lock_)                                                  \
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index e7f67eb..f097bc7 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -222,7 +222,18 @@
   return class_linker->ResolveMethodType(self, method_type_index, referrer);
 }
 
-// Performs a signature polymorphic invoke (invoke-polymorphic/invoke-polymorphic-range).
+#define DECLARE_SIGNATURE_POLYMORPHIC_HANDLER(Name, ...)              \
+bool Do ## Name(Thread* self,                                         \
+                ShadowFrame& shadow_frame,                            \
+                const Instruction* inst,                              \
+                uint16_t inst_data,                                   \
+                JValue* result) REQUIRES_SHARED(Locks::mutator_lock_);
+#include "intrinsics_list.h"
+INTRINSICS_LIST(DECLARE_SIGNATURE_POLYMORPHIC_HANDLER)
+#undef INTRINSICS_LIST
+#undef DECLARE_SIGNATURE_POLYMORPHIC_HANDLER
+
+// Performs a invoke-polymorphic or invoke-polymorphic-range.
 template<bool is_range>
 bool DoInvokePolymorphic(Thread* self,
                          ShadowFrame& shadow_frame,
diff --git a/runtime/interpreter/interpreter_intrinsics.cc b/runtime/interpreter/interpreter_intrinsics.cc
index 26de6b4..37593bc 100644
--- a/runtime/interpreter/interpreter_intrinsics.cc
+++ b/runtime/interpreter/interpreter_intrinsics.cc
@@ -16,8 +16,8 @@
 
 #include "interpreter/interpreter_intrinsics.h"
 
-#include "compiler/intrinsics_enum.h"
 #include "dex_instruction.h"
+#include "intrinsics_enum.h"
 #include "interpreter/interpreter_common.h"
 
 namespace art {
@@ -323,14 +323,14 @@
   return true;
 }
 
-#define VARHANDLE_FENCE_INTRINSIC(name, std_memory_operation)   \
-static ALWAYS_INLINE bool name(ShadowFrame* /* shadow_frame */, \
-                               const Instruction* /* inst */,   \
-                               uint16_t /* inst_data */,        \
-                               JValue* /* result_register */)   \
-    REQUIRES_SHARED(Locks::mutator_lock_) {                     \
-    std::atomic_thread_fence(std_memory_operation);             \
-    return true;                                                \
+#define VARHANDLE_FENCE_INTRINSIC(name, std_memory_operation)              \
+static ALWAYS_INLINE bool name(ShadowFrame* shadow_frame ATTRIBUTE_UNUSED, \
+                               const Instruction* inst ATTRIBUTE_UNUSED,   \
+                               uint16_t inst_data ATTRIBUTE_UNUSED,        \
+                               JValue* result_register ATTRIBUTE_UNUSED)   \
+    REQUIRES_SHARED(Locks::mutator_lock_) {                                \
+  std::atomic_thread_fence(std_memory_operation);                          \
+  return true;                                                             \
 }
 
 // The VarHandle fence methods are static (unlike sun.misc.Unsafe versions).
@@ -342,6 +342,63 @@
 VARHANDLE_FENCE_INTRINSIC(MterpVarHandleLoadLoadFence, std::memory_order_acquire)
 VARHANDLE_FENCE_INTRINSIC(MterpVarHandleStoreStoreFence, std::memory_order_release)
 
+#define METHOD_HANDLE_INVOKE_INTRINSIC(name)                                                      \
+static ALWAYS_INLINE bool Mterp##name(ShadowFrame* shadow_frame,                                  \
+                               const Instruction* inst,                                           \
+                               uint16_t inst_data,                                                \
+                               JValue* result)                                                    \
+    REQUIRES_SHARED(Locks::mutator_lock_) {                                                       \
+  if (inst->Opcode() == Instruction::INVOKE_POLYMORPHIC) {                                        \
+    return DoInvokePolymorphic<false>(Thread::Current(), *shadow_frame, inst, inst_data, result); \
+  } else {                                                                                        \
+    return DoInvokePolymorphic<true>(Thread::Current(), *shadow_frame, inst, inst_data, result);  \
+  }                                                                                               \
+}
+
+METHOD_HANDLE_INVOKE_INTRINSIC(MethodHandleInvokeExact)
+METHOD_HANDLE_INVOKE_INTRINSIC(MethodHandleInvoke)
+
+#define VAR_HANDLE_ACCESSOR_INTRINSIC(name)                                   \
+static ALWAYS_INLINE bool Mterp##name(ShadowFrame* shadow_frame,              \
+                               const Instruction* inst,                       \
+                               uint16_t inst_data,                            \
+                               JValue* result)                                \
+    REQUIRES_SHARED(Locks::mutator_lock_) {                                   \
+  return Do##name(Thread::Current(), *shadow_frame, inst, inst_data, result); \
+}
+
+VAR_HANDLE_ACCESSOR_INTRINSIC(VarHandleCompareAndExchange)
+VAR_HANDLE_ACCESSOR_INTRINSIC(VarHandleCompareAndExchangeAcquire)
+VAR_HANDLE_ACCESSOR_INTRINSIC(VarHandleCompareAndExchangeRelease)
+VAR_HANDLE_ACCESSOR_INTRINSIC(VarHandleCompareAndSet)
+VAR_HANDLE_ACCESSOR_INTRINSIC(VarHandleGet);
+VAR_HANDLE_ACCESSOR_INTRINSIC(VarHandleGetAcquire)
+VAR_HANDLE_ACCESSOR_INTRINSIC(VarHandleGetAndAdd)
+VAR_HANDLE_ACCESSOR_INTRINSIC(VarHandleGetAndAddAcquire)
+VAR_HANDLE_ACCESSOR_INTRINSIC(VarHandleGetAndAddRelease)
+VAR_HANDLE_ACCESSOR_INTRINSIC(VarHandleGetAndBitwiseAnd)
+VAR_HANDLE_ACCESSOR_INTRINSIC(VarHandleGetAndBitwiseAndAcquire)
+VAR_HANDLE_ACCESSOR_INTRINSIC(VarHandleGetAndBitwiseAndRelease)
+VAR_HANDLE_ACCESSOR_INTRINSIC(VarHandleGetAndBitwiseOr)
+VAR_HANDLE_ACCESSOR_INTRINSIC(VarHandleGetAndBitwiseOrAcquire)
+VAR_HANDLE_ACCESSOR_INTRINSIC(VarHandleGetAndBitwiseOrRelease)
+VAR_HANDLE_ACCESSOR_INTRINSIC(VarHandleGetAndBitwiseXor)
+VAR_HANDLE_ACCESSOR_INTRINSIC(VarHandleGetAndBitwiseXorAcquire)
+VAR_HANDLE_ACCESSOR_INTRINSIC(VarHandleGetAndBitwiseXorRelease)
+VAR_HANDLE_ACCESSOR_INTRINSIC(VarHandleGetAndSet)
+VAR_HANDLE_ACCESSOR_INTRINSIC(VarHandleGetAndSetAcquire)
+VAR_HANDLE_ACCESSOR_INTRINSIC(VarHandleGetAndSetRelease)
+VAR_HANDLE_ACCESSOR_INTRINSIC(VarHandleGetOpaque)
+VAR_HANDLE_ACCESSOR_INTRINSIC(VarHandleGetVolatile)
+VAR_HANDLE_ACCESSOR_INTRINSIC(VarHandleSet)
+VAR_HANDLE_ACCESSOR_INTRINSIC(VarHandleSetOpaque)
+VAR_HANDLE_ACCESSOR_INTRINSIC(VarHandleSetRelease)
+VAR_HANDLE_ACCESSOR_INTRINSIC(VarHandleSetVolatile)
+VAR_HANDLE_ACCESSOR_INTRINSIC(VarHandleWeakCompareAndSet)
+VAR_HANDLE_ACCESSOR_INTRINSIC(VarHandleWeakCompareAndSetAcquire)
+VAR_HANDLE_ACCESSOR_INTRINSIC(VarHandleWeakCompareAndSetPlain)
+VAR_HANDLE_ACCESSOR_INTRINSIC(VarHandleWeakCompareAndSetRelease)
+
 // Macro to help keep track of what's left to implement.
 #define UNIMPLEMENTED_CASE(name)    \
     case Intrinsics::k##name:       \
@@ -494,6 +551,39 @@
     INTRINSIC_CASE(VarHandleReleaseFence)
     INTRINSIC_CASE(VarHandleLoadLoadFence)
     INTRINSIC_CASE(VarHandleStoreStoreFence)
+    INTRINSIC_CASE(MethodHandleInvokeExact)
+    INTRINSIC_CASE(MethodHandleInvoke)
+    INTRINSIC_CASE(VarHandleCompareAndExchange)
+    INTRINSIC_CASE(VarHandleCompareAndExchangeAcquire)
+    INTRINSIC_CASE(VarHandleCompareAndExchangeRelease)
+    INTRINSIC_CASE(VarHandleCompareAndSet)
+    INTRINSIC_CASE(VarHandleGet)
+    INTRINSIC_CASE(VarHandleGetAcquire)
+    INTRINSIC_CASE(VarHandleGetAndAdd)
+    INTRINSIC_CASE(VarHandleGetAndAddAcquire)
+    INTRINSIC_CASE(VarHandleGetAndAddRelease)
+    INTRINSIC_CASE(VarHandleGetAndBitwiseAnd)
+    INTRINSIC_CASE(VarHandleGetAndBitwiseAndAcquire)
+    INTRINSIC_CASE(VarHandleGetAndBitwiseAndRelease)
+    INTRINSIC_CASE(VarHandleGetAndBitwiseOr)
+    INTRINSIC_CASE(VarHandleGetAndBitwiseOrAcquire)
+    INTRINSIC_CASE(VarHandleGetAndBitwiseOrRelease)
+    INTRINSIC_CASE(VarHandleGetAndBitwiseXor)
+    INTRINSIC_CASE(VarHandleGetAndBitwiseXorAcquire)
+    INTRINSIC_CASE(VarHandleGetAndBitwiseXorRelease)
+    INTRINSIC_CASE(VarHandleGetAndSet)
+    INTRINSIC_CASE(VarHandleGetAndSetAcquire)
+    INTRINSIC_CASE(VarHandleGetAndSetRelease)
+    INTRINSIC_CASE(VarHandleGetOpaque)
+    INTRINSIC_CASE(VarHandleGetVolatile)
+    INTRINSIC_CASE(VarHandleSet)
+    INTRINSIC_CASE(VarHandleSetOpaque)
+    INTRINSIC_CASE(VarHandleSetRelease)
+    INTRINSIC_CASE(VarHandleSetVolatile)
+    INTRINSIC_CASE(VarHandleWeakCompareAndSet)
+    INTRINSIC_CASE(VarHandleWeakCompareAndSetAcquire)
+    INTRINSIC_CASE(VarHandleWeakCompareAndSetPlain)
+    INTRINSIC_CASE(VarHandleWeakCompareAndSetRelease)
     case Intrinsics::kNone:
       res = false;
       break;
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index 7a8ae9a..31e7986 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -1636,6 +1636,18 @@
   result->SetI((obj != nullptr) ? obj->IdentityHashCode() : 0);
 }
 
+// Checks whether the runtime is s64-bit. This is needed for the clinit of
+// java.lang.invoke.VarHandle clinit. The clinit determines sets of
+// available VarHandle accessors and these differ based on machine
+// word size.
+void UnstartedRuntime::UnstartedJNIVMRuntimeIs64Bit(
+    Thread* self ATTRIBUTE_UNUSED, ArtMethod* method ATTRIBUTE_UNUSED,
+    mirror::Object* receiver ATTRIBUTE_UNUSED, uint32_t* args ATTRIBUTE_UNUSED, JValue* result) {
+  PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+  jboolean is64bit = (pointer_size == PointerSize::k64) ? JNI_TRUE : JNI_FALSE;
+  result->SetZ(is64bit);
+}
+
 void UnstartedRuntime::UnstartedJNIVMRuntimeNewUnpaddedArray(
     Thread* self, ArtMethod* method ATTRIBUTE_UNUSED, mirror::Object* receiver ATTRIBUTE_UNUSED,
     uint32_t* args, JValue* result) {
diff --git a/runtime/interpreter/unstarted_runtime_list.h b/runtime/interpreter/unstarted_runtime_list.h
index e7047c7..c029e07 100644
--- a/runtime/interpreter/unstarted_runtime_list.h
+++ b/runtime/interpreter/unstarted_runtime_list.h
@@ -80,6 +80,7 @@
 
 // Methods that are native.
 #define UNSTARTED_RUNTIME_JNI_LIST(V)           \
+  V(VMRuntimeIs64Bit, "boolean dalvik.system.VMRuntime.is64Bit()") \
   V(VMRuntimeNewUnpaddedArray, "java.lang.Object dalvik.system.VMRuntime.newUnpaddedArray(java.lang.Class, int)") \
   V(VMStackGetCallingClassLoader, "java.lang.ClassLoader dalvik.system.VMStack.getCallingClassLoader()") \
   V(VMStackGetStackClass2, "java.lang.Class dalvik.system.VMStack.getStackClass2()") \
diff --git a/compiler/intrinsics_enum.h b/runtime/intrinsics_enum.h
similarity index 88%
rename from compiler/intrinsics_enum.h
rename to runtime/intrinsics_enum.h
index 5528181..d46d0cc 100644
--- a/compiler/intrinsics_enum.h
+++ b/runtime/intrinsics_enum.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_INTRINSICS_ENUM_H_
-#define ART_COMPILER_INTRINSICS_ENUM_H_
+#ifndef ART_RUNTIME_INTRINSICS_ENUM_H_
+#define ART_RUNTIME_INTRINSICS_ENUM_H_
 
 namespace art {
 
@@ -32,4 +32,4 @@
 
 }  // namespace art
 
-#endif  // ART_COMPILER_INTRINSICS_ENUM_H_
+#endif  // ART_RUNTIME_INTRINSICS_ENUM_H_
diff --git a/compiler/intrinsics_list.h b/runtime/intrinsics_list.h
similarity index 71%
rename from compiler/intrinsics_list.h
rename to runtime/intrinsics_list.h
index bfefead..d007728 100644
--- a/compiler/intrinsics_list.h
+++ b/runtime/intrinsics_list.h
@@ -14,23 +14,76 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_INTRINSICS_LIST_H_
-#define ART_COMPILER_INTRINSICS_LIST_H_
+#ifndef ART_RUNTIME_INTRINSICS_LIST_H_
+#define ART_RUNTIME_INTRINSICS_LIST_H_
 
-// All intrinsics supported by ART. Format is name, then whether it is expected
-// to be a HInvokeStaticOrDirect node (compared to HInvokeVirtual), then whether it requires an
-// environment, may have side effects, or may throw exceptions.
-
+// This file defines the set of intrinsics that are supported by ART
+// in the compiler and runtime. Neither compiler nor runtime has
+// intrinsics for all methods here.
+//
+// The entries in the INTRINSICS_LIST below have the following format:
+//
+//   1. name
+//   2. invocation-type (art::InvokeType value).
+//   3. needs-environment (art::IntrinsicNeedsEnvironmentOrCache value)
+//   4. side-effects (art::IntrinsicSideEffects value)
+//   5. exception-info (art::::IntrinsicExceptions value)
+//   6. declaring class descriptor
+//   7. method name
+//   8. method descriptor
+//
+// The needs-environment, side-effects and exception-info are compiler
+// related properties (compiler/optimizing/nodes.h) that should not be
+// used outside of the compiler.
+//
 // Note: adding a new intrinsic requires an art image version change,
 // as the modifiers flag for some ArtMethods will need to be changed.
+//
+// Note: j.l.Integer.valueOf says kNoThrow even though it could throw an
+// OOME. The kNoThrow should be renamed to kNoVisibleThrow, as it is ok to
+// GVN Integer.valueOf (kNoSideEffects), and it is also OK to remove it if
+// it's unused.
+//
+// Note: Thread.interrupted is marked with kAllSideEffects due to the lack
+// of finer grain side effects representation.
 
-// Note: j.l.Integer.valueOf says kNoThrow even though it could throw an OOME.
-// The kNoThrow should be renamed to kNoVisibleThrow, as it is ok to GVN Integer.valueOf
-// (kNoSideEffects), and it is also OK to remove it if it's unused.
+// Intrinsics for methods with signature polymorphic behaviours.
+#define SIGNATURE_POLYMORPHIC_INTRINSICS_LIST(V) \
+  V(MethodHandleInvokeExact, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/MethodHandle;", "invokeExact", "([Ljava/lang/Object;)Ljava/lang/Object;") \
+  V(MethodHandleInvoke, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/MethodHandle;", "invoke", "([Ljava/lang/Object;)Ljava/lang/Object;") \
+  V(VarHandleCompareAndExchange, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/VarHandle;", "compareAndExchange", "([Ljava/lang/Object;)Ljava/lang/Object;") \
+  V(VarHandleCompareAndExchangeAcquire, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/VarHandle;", "compareAndExchangeAcquire", "([Ljava/lang/Object;)Ljava/lang/Object;") \
+  V(VarHandleCompareAndExchangeRelease, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/VarHandle;", "compareAndExchangeRelease", "([Ljava/lang/Object;)Ljava/lang/Object;") \
+  V(VarHandleCompareAndSet, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/VarHandle;", "compareAndSet", "([Ljava/lang/Object;)Z") \
+  V(VarHandleGet, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/VarHandle;", "get", "([Ljava/lang/Object;)Ljava/lang/Object;") \
+  V(VarHandleGetAcquire, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/VarHandle;", "getAcquire", "([Ljava/lang/Object;)Ljava/lang/Object;") \
+  V(VarHandleGetAndAdd, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/VarHandle;", "getAndAdd", "([Ljava/lang/Object;)Ljava/lang/Object;") \
+  V(VarHandleGetAndAddAcquire, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/VarHandle;", "getAndAddAcquire", "([Ljava/lang/Object;)Ljava/lang/Object;") \
+  V(VarHandleGetAndAddRelease, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/VarHandle;", "getAndAddRelease", "([Ljava/lang/Object;)Ljava/lang/Object;") \
+  V(VarHandleGetAndBitwiseAnd, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/VarHandle;", "getAndBitwiseAnd", "([Ljava/lang/Object;)Ljava/lang/Object;") \
+  V(VarHandleGetAndBitwiseAndAcquire, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/VarHandle;", "getAndBitwiseAndAcquire", "([Ljava/lang/Object;)Ljava/lang/Object;") \
+  V(VarHandleGetAndBitwiseAndRelease, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/VarHandle;", "getAndBitwiseAndRelease", "([Ljava/lang/Object;)Ljava/lang/Object;") \
+  V(VarHandleGetAndBitwiseOr, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/VarHandle;", "getAndBitwiseOr", "([Ljava/lang/Object;)Ljava/lang/Object;") \
+  V(VarHandleGetAndBitwiseOrAcquire, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/VarHandle;", "getAndBitwiseOrAcquire", "([Ljava/lang/Object;)Ljava/lang/Object;") \
+  V(VarHandleGetAndBitwiseOrRelease, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/VarHandle;", "getAndBitwiseOrRelease", "([Ljava/lang/Object;)Ljava/lang/Object;") \
+  V(VarHandleGetAndBitwiseXor, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/VarHandle;", "getAndBitwiseXor", "([Ljava/lang/Object;)Ljava/lang/Object;") \
+  V(VarHandleGetAndBitwiseXorAcquire, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/VarHandle;", "getAndBitwiseXorAcquire", "([Ljava/lang/Object;)Ljava/lang/Object;") \
+  V(VarHandleGetAndBitwiseXorRelease, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/VarHandle;", "getAndBitwiseXorRelease", "([Ljava/lang/Object;)Ljava/lang/Object;") \
+  V(VarHandleGetAndSet, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/VarHandle;", "getAndSet", "([Ljava/lang/Object;)Ljava/lang/Object;") \
+  V(VarHandleGetAndSetAcquire, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/VarHandle;", "getAndSetAcquire", "([Ljava/lang/Object;)Ljava/lang/Object;") \
+  V(VarHandleGetAndSetRelease, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/VarHandle;", "getAndSetRelease", "([Ljava/lang/Object;)Ljava/lang/Object;") \
+  V(VarHandleGetOpaque, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/VarHandle;", "getOpaque", "([Ljava/lang/Object;)Ljava/lang/Object;") \
+  V(VarHandleGetVolatile, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/VarHandle;", "getVolatile", "([Ljava/lang/Object;)Ljava/lang/Object;") \
+  V(VarHandleSet, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/VarHandle;", "set", "([Ljava/lang/Object;)V") \
+  V(VarHandleSetOpaque, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/VarHandle;", "setOpaque", "([Ljava/lang/Object;)V") \
+  V(VarHandleSetRelease, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/VarHandle;", "setRelease", "([Ljava/lang/Object;)V") \
+  V(VarHandleSetVolatile, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/VarHandle;", "setVolatile", "([Ljava/lang/Object;)V") \
+  V(VarHandleWeakCompareAndSet, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/VarHandle;", "weakCompareAndSet", "([Ljava/lang/Object;)Z") \
+  V(VarHandleWeakCompareAndSetAcquire, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/VarHandle;", "weakCompareAndSetAcquire", "([Ljava/lang/Object;)Z") \
+  V(VarHandleWeakCompareAndSetPlain, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/VarHandle;", "weakCompareAndSetPlain", "([Ljava/lang/Object;)Z") \
+  V(VarHandleWeakCompareAndSetRelease, kPolymorphic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/invoke/VarHandle;", "weakCompareAndSetRelease", "([Ljava/lang/Object;)Z")
 
-// Note: Thread.interrupted is marked with kAllSideEffects due to the lack of finer grain
-// side effects representation.
-
+// The complete list of intrinsics.
 #define INTRINSICS_LIST(V) \
   V(DoubleDoubleToRawLongBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Double;", "doubleToRawLongBits", "(D)J") \
   V(DoubleDoubleToLongBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Double;", "doubleToLongBits", "(D)J") \
@@ -164,6 +217,7 @@
   V(VarHandleReleaseFence, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kNoThrow, "Ljava/lang/invoke/VarHandle;", "releaseFence", "()V") \
   V(VarHandleLoadLoadFence, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kNoThrow, "Ljava/lang/invoke/VarHandle;", "loadLoadFence", "()V") \
   V(VarHandleStoreStoreFence, kStatic, kNeedsEnvironmentOrCache, kReadSideEffects, kNoThrow, "Ljava/lang/invoke/VarHandle;", "storeStoreFence", "()V") \
+  SIGNATURE_POLYMORPHIC_INTRINSICS_LIST(V)
 
-#endif  // ART_COMPILER_INTRINSICS_LIST_H_
-#undef ART_COMPILER_INTRINSICS_LIST_H_   // #define is only for lint.
+#endif  // ART_RUNTIME_INTRINSICS_LIST_H_
+#undef ART_RUNTIME_INTRINSICS_LIST_H_   // #define is only for lint.
diff --git a/runtime/invoke_type.h b/runtime/invoke_type.h
index a003f7f..2b877e6 100644
--- a/runtime/invoke_type.h
+++ b/runtime/invoke_type.h
@@ -22,12 +22,13 @@
 namespace art {
 
 enum InvokeType : uint32_t {
-  kStatic,     // <<static>>
-  kDirect,     // <<direct>>
-  kVirtual,    // <<virtual>>
-  kSuper,      // <<super>>
-  kInterface,  // <<interface>>
-  kMaxInvokeType = kInterface
+  kStatic,       // <<static>>
+  kDirect,       // <<direct>>
+  kVirtual,      // <<virtual>>
+  kSuper,        // <<super>>
+  kInterface,    // <<interface>>
+  kPolymorphic,  // <<polymorphic>>
+  kMaxInvokeType = kPolymorphic
 };
 
 std::ostream& operator<<(std::ostream& os, const InvokeType& rhs);
diff --git a/runtime/method_handles.cc b/runtime/method_handles.cc
index 65f39e4..5a5d571 100644
--- a/runtime/method_handles.cc
+++ b/runtime/method_handles.cc
@@ -355,15 +355,6 @@
                                                                             num_method_params);
 }
 
-inline bool IsMethodHandleInvokeExact(const ArtMethod* const method) {
-  if (method == jni::DecodeArtMethod(WellKnownClasses::java_lang_invoke_MethodHandle_invokeExact)) {
-    return true;
-  } else {
-    DCHECK_EQ(method, jni::DecodeArtMethod(WellKnownClasses::java_lang_invoke_MethodHandle_invoke));
-    return false;
-  }
-}
-
 inline bool IsInvoke(const mirror::MethodHandle::Kind handle_kind) {
   return handle_kind <= mirror::MethodHandle::Kind::kLastInvokeKind;
 }
@@ -416,15 +407,14 @@
 }
 
 template <bool is_range>
-static inline bool DoCallPolymorphic(ArtMethod* called_method,
-                                     Handle<mirror::MethodType> callsite_type,
-                                     Handle<mirror::MethodType> target_type,
-                                     Thread* self,
-                                     ShadowFrame& shadow_frame,
-                                     const uint32_t (&args)[Instruction::kMaxVarArgRegs],
-                                     uint32_t first_arg,
-                                     JValue* result)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
+static inline bool MethodHandleInvokeMethod(ArtMethod* called_method,
+                                            Handle<mirror::MethodType> callsite_type,
+                                            Handle<mirror::MethodType> target_type,
+                                            Thread* self,
+                                            ShadowFrame& shadow_frame,
+                                            const uint32_t (&args)[Instruction::kMaxVarArgRegs],
+                                            uint32_t first_arg,
+                                            JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
   // Compute method information.
   const DexFile::CodeItem* code_item = called_method->GetCodeItem();
 
@@ -552,15 +542,15 @@
 }
 
 template <bool is_range>
-static inline bool DoCallTransform(ArtMethod* called_method,
-                                   Handle<mirror::MethodType> callsite_type,
-                                   Handle<mirror::MethodType> callee_type,
-                                   Thread* self,
-                                   ShadowFrame& shadow_frame,
-                                   Handle<mirror::MethodHandle> receiver,
-                                   const uint32_t (&args)[Instruction::kMaxVarArgRegs],
-                                   uint32_t first_arg,
-                                   JValue* result)
+static inline bool MethodHandleInvokeTransform(ArtMethod* called_method,
+                                               Handle<mirror::MethodType> callsite_type,
+                                               Handle<mirror::MethodType> callee_type,
+                                               Thread* self,
+                                               ShadowFrame& shadow_frame,
+                                               Handle<mirror::MethodHandle> receiver,
+                                               const uint32_t (&args)[Instruction::kMaxVarArgRegs],
+                                               uint32_t first_arg,
+                                               JValue* result)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   // This can be fixed to two, because the method we're calling here
   // (MethodHandle.transformInternal) doesn't have any locals and the signature
@@ -753,34 +743,34 @@
     Handle<mirror::MethodType> callee_type =
         (handle_kind == mirror::MethodHandle::Kind::kInvokeCallSiteTransform) ? callsite_type
         : handle_type;
-    return DoCallTransform<is_range>(called_method,
-                                     callsite_type,
-                                     callee_type,
-                                     self,
-                                     shadow_frame,
-                                     method_handle /* receiver */,
-                                     args,
-                                     first_arg,
-                                     result);
+    return MethodHandleInvokeTransform<is_range>(called_method,
+                                                 callsite_type,
+                                                 callee_type,
+                                                 self,
+                                                 shadow_frame,
+                                                 method_handle /* receiver */,
+                                                 args,
+                                                 first_arg,
+                                                 result);
   } else {
-    return DoCallPolymorphic<is_range>(called_method,
-                                       callsite_type,
-                                       handle_type,
-                                       self,
-                                       shadow_frame,
-                                       args,
-                                       first_arg,
-                                       result);
+    return MethodHandleInvokeMethod<is_range>(called_method,
+                                              callsite_type,
+                                              handle_type,
+                                              self,
+                                              shadow_frame,
+                                              args,
+                                              first_arg,
+                                              result);
   }
 }
 
 // Helper for getters in invoke-polymorphic.
-inline static void DoFieldGetForInvokePolymorphic(Thread* self,
-                                                  const ShadowFrame& shadow_frame,
-                                                  ObjPtr<mirror::Object>& obj,
-                                                  ArtField* field,
-                                                  Primitive::Type field_type,
-                                                  JValue* result)
+inline static void MethodHandleFieldGet(Thread* self,
+                                        const ShadowFrame& shadow_frame,
+                                        ObjPtr<mirror::Object>& obj,
+                                        ArtField* field,
+                                        Primitive::Type field_type,
+                                        JValue* result)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   switch (field_type) {
     case Primitive::kPrimBoolean:
@@ -817,12 +807,12 @@
 }
 
 // Helper for setters in invoke-polymorphic.
-inline bool DoFieldPutForInvokePolymorphic(Thread* self,
-                                           ShadowFrame& shadow_frame,
-                                           ObjPtr<mirror::Object>& obj,
-                                           ArtField* field,
-                                           Primitive::Type field_type,
-                                           JValue& value)
+inline bool MethodHandleFieldPut(Thread* self,
+                                 ShadowFrame& shadow_frame,
+                                 ObjPtr<mirror::Object>& obj,
+                                 ArtField* field,
+                                 Primitive::Type field_type,
+                                 JValue& value)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   DCHECK(!Runtime::Current()->IsActiveTransaction());
   static const bool kTransaction = false;         // Not in a transaction.
@@ -895,14 +885,13 @@
 }
 
 template <bool is_range, bool do_conversions>
-bool DoInvokePolymorphicFieldAccess(Thread* self,
-                                    ShadowFrame& shadow_frame,
-                                    Handle<mirror::MethodHandle> method_handle,
-                                    Handle<mirror::MethodType> callsite_type,
-                                    const uint32_t (&args)[Instruction::kMaxVarArgRegs],
-                                    uint32_t first_arg,
-                                    JValue* result)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
+bool MethodHandleFieldAccess(Thread* self,
+                             ShadowFrame& shadow_frame,
+                             Handle<mirror::MethodHandle> method_handle,
+                             Handle<mirror::MethodType> callsite_type,
+                             const uint32_t (&args)[Instruction::kMaxVarArgRegs],
+                             uint32_t first_arg,
+                             JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
   StackHandleScope<1> hs(self);
   Handle<mirror::MethodType> handle_type(hs.NewHandle(method_handle->GetMethodType()));
   const mirror::MethodHandle::Kind handle_kind = method_handle->GetHandleKind();
@@ -913,7 +902,7 @@
     case mirror::MethodHandle::kInstanceGet: {
       size_t obj_reg = is_range ? first_arg : args[0];
       ObjPtr<mirror::Object> obj = shadow_frame.GetVRegReference(obj_reg);
-      DoFieldGetForInvokePolymorphic(self, shadow_frame, obj, field, field_type, result);
+      MethodHandleFieldGet(self, shadow_frame, obj, field, field_type, result);
       if (do_conversions && !ConvertReturnValue(callsite_type, handle_type, result)) {
         DCHECK(self->IsExceptionPending());
         return false;
@@ -926,7 +915,7 @@
         DCHECK(self->IsExceptionPending());
         return false;
       }
-      DoFieldGetForInvokePolymorphic(self, shadow_frame, obj, field, field_type, result);
+      MethodHandleFieldGet(self, shadow_frame, obj, field, field_type, result);
       if (do_conversions && !ConvertReturnValue(callsite_type, handle_type, result)) {
         DCHECK(self->IsExceptionPending());
         return false;
@@ -951,7 +940,7 @@
         return false;
       }
       ObjPtr<mirror::Object> obj = shadow_frame.GetVRegReference(obj_reg);
-      return DoFieldPutForInvokePolymorphic(self, shadow_frame, obj, field, field_type, value);
+      return MethodHandleFieldPut(self, shadow_frame, obj, field, field_type, value);
     }
     case mirror::MethodHandle::kStaticPut: {
       ObjPtr<mirror::Object> obj = GetAndInitializeDeclaringClass(self, field);
@@ -974,7 +963,7 @@
         DCHECK(self->IsExceptionPending());
         return false;
       }
-      return DoFieldPutForInvokePolymorphic(self, shadow_frame, obj, field, field_type, value);
+      return MethodHandleFieldPut(self, shadow_frame, obj, field, field_type, value);
     }
     default:
       LOG(FATAL) << "Unreachable: " << handle_kind;
@@ -983,26 +972,24 @@
 }
 
 template <bool is_range>
-static inline bool DoInvokePolymorphicNonExact(Thread* self,
-                                               ShadowFrame& shadow_frame,
-                                               Handle<mirror::MethodHandle> method_handle,
-                                               Handle<mirror::MethodType> callsite_type,
-                                               const uint32_t (&args)[Instruction::kMaxVarArgRegs],
-                                               uint32_t first_arg,
-                                               JValue* result)
+static inline bool MethodHandleInvokeInternal(Thread* self,
+                                              ShadowFrame& shadow_frame,
+                                              Handle<mirror::MethodHandle> method_handle,
+                                              Handle<mirror::MethodType> callsite_type,
+                                              const uint32_t (&args)[Instruction::kMaxVarArgRegs],
+                                              uint32_t first_arg,
+                                              JValue* result)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   const mirror::MethodHandle::Kind handle_kind = method_handle->GetHandleKind();
-  ObjPtr<mirror::MethodType> handle_type(method_handle->GetMethodType());
-  CHECK(handle_type != nullptr);
-
   if (IsFieldAccess(handle_kind)) {
+    ObjPtr<mirror::MethodType> handle_type(method_handle->GetMethodType());
     DCHECK(!callsite_type->IsExactMatch(handle_type.Ptr()));
     if (!callsite_type->IsConvertible(handle_type.Ptr())) {
       ThrowWrongMethodTypeException(handle_type.Ptr(), callsite_type.Get());
       return false;
     }
     const bool do_convert = true;
-    return DoInvokePolymorphicFieldAccess<is_range, do_convert>(
+    return MethodHandleFieldAccess<is_range, do_convert>(
         self,
         shadow_frame,
         method_handle,
@@ -1011,7 +998,6 @@
         first_arg,
         result);
   }
-
   return DoInvokePolymorphicMethod<is_range>(self,
                                              shadow_frame,
                                              method_handle,
@@ -1022,27 +1008,32 @@
 }
 
 template <bool is_range>
-bool DoInvokePolymorphicExact(Thread* self,
-                              ShadowFrame& shadow_frame,
-                              Handle<mirror::MethodHandle> method_handle,
-                              Handle<mirror::MethodType> callsite_type,
-                              const uint32_t (&args)[Instruction::kMaxVarArgRegs],
-                              uint32_t first_arg,
-                              JValue* result)
+static inline bool MethodHandleInvokeExactInternal(
+    Thread* self,
+    ShadowFrame& shadow_frame,
+    Handle<mirror::MethodHandle> method_handle,
+    Handle<mirror::MethodType> callsite_type,
+    const uint32_t (&args)[Instruction::kMaxVarArgRegs],
+    uint32_t first_arg,
+    JValue* result)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   StackHandleScope<1> hs(self);
-  const mirror::MethodHandle::Kind handle_kind = method_handle->GetHandleKind();
   Handle<mirror::MethodType> method_handle_type(hs.NewHandle(method_handle->GetMethodType()));
+  if (!callsite_type->IsExactMatch(method_handle_type.Get())) {
+    ThrowWrongMethodTypeException(method_handle_type.Get(), callsite_type.Get());
+    return false;
+  }
+
+  const mirror::MethodHandle::Kind handle_kind = method_handle->GetHandleKind();
   if (IsFieldAccess(handle_kind)) {
     const bool do_convert = false;
-    return DoInvokePolymorphicFieldAccess<is_range, do_convert>(
-        self,
-        shadow_frame,
-        method_handle,
-        callsite_type,
-        args,
-        first_arg,
-        result);
+    return MethodHandleFieldAccess<is_range, do_convert>(self,
+                                                         shadow_frame,
+                                                         method_handle,
+                                                         callsite_type,
+                                                         args,
+                                                         first_arg,
+                                                         result);
   }
 
   // Slow-path check.
@@ -1120,77 +1111,77 @@
 }  // namespace
 
 template <bool is_range>
-bool DoInvokePolymorphic(Thread* self,
-                         ArtMethod* invoke_method,
-                         ShadowFrame& shadow_frame,
-                         Handle<mirror::MethodHandle> method_handle,
-                         Handle<mirror::MethodType> callsite_type,
-                         const uint32_t (&args)[Instruction::kMaxVarArgRegs],
-                         uint32_t first_arg,
-                         JValue* result)
+inline bool MethodHandleInvoke(Thread* self,
+                                 ShadowFrame& shadow_frame,
+                                 Handle<mirror::MethodHandle> method_handle,
+                                 Handle<mirror::MethodType> callsite_type,
+                                 const uint32_t (&args)[Instruction::kMaxVarArgRegs],
+                                 uint32_t first_arg,
+                                 JValue* result)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  ObjPtr<mirror::MethodType> method_handle_type = method_handle->GetMethodType();
-  if (IsMethodHandleInvokeExact(invoke_method)) {
-    // We need to check the nominal type of the handle in addition to the
-    // real type. The "nominal" type is present when MethodHandle.asType is
-    // called any handle, and results in the declared type of the handle
-    // changing.
-    ObjPtr<mirror::MethodType> nominal_type(method_handle->GetNominalType());
-    if (UNLIKELY(nominal_type != nullptr)) {
-      if (UNLIKELY(!callsite_type->IsExactMatch(nominal_type.Ptr()))) {
-        ThrowWrongMethodTypeException(nominal_type.Ptr(), callsite_type.Get());
-        return false;
-      }
-
-      if (LIKELY(!nominal_type->IsExactMatch(method_handle_type.Ptr()))) {
-        // Different nominal type means we have to treat as non-exact.
-        return DoInvokePolymorphicNonExact<is_range>(self,
+  if (UNLIKELY(callsite_type->IsExactMatch(method_handle->GetMethodType()))) {
+    // A non-exact invoke that can be invoked exactly.
+    return MethodHandleInvokeExactInternal<is_range>(self,
                                                      shadow_frame,
                                                      method_handle,
                                                      callsite_type,
                                                      args,
                                                      first_arg,
                                                      result);
-      }
-    }
-
-    if (!callsite_type->IsExactMatch(method_handle_type.Ptr())) {
-      ThrowWrongMethodTypeException(method_handle_type.Ptr(), callsite_type.Get());
-      return false;
-    }
-    return DoInvokePolymorphicExact<is_range>(self,
-                                              shadow_frame,
-                                              method_handle,
-                                              callsite_type,
-                                              args,
-                                              first_arg,
-                                              result);
   } else {
-    if (UNLIKELY(callsite_type->IsExactMatch(method_handle_type.Ptr()))) {
-      // A non-exact invoke that can be invoked exactly.
-      return DoInvokePolymorphicExact<is_range>(self,
+    return MethodHandleInvokeInternal<is_range>(self,
                                                 shadow_frame,
                                                 method_handle,
                                                 callsite_type,
                                                 args,
                                                 first_arg,
                                                 result);
-    }
-    return DoInvokePolymorphicNonExact<is_range>(self,
-                                                 shadow_frame,
-                                                 method_handle,
-                                                 callsite_type,
-                                                 args,
-                                                 first_arg,
-                                                 result);
   }
 }
 
-#define EXPLICIT_DO_INVOKE_POLYMORPHIC_TEMPLATE_DECL(_is_range)  \
+template <bool is_range>
+bool MethodHandleInvokeExact(Thread* self,
+                               ShadowFrame& shadow_frame,
+                               Handle<mirror::MethodHandle> method_handle,
+                               Handle<mirror::MethodType> callsite_type,
+                               const uint32_t (&args)[Instruction::kMaxVarArgRegs],
+                               uint32_t first_arg,
+                               JValue* result)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  // We need to check the nominal type of the handle in addition to the
+  // real type. The "nominal" type is present when MethodHandle.asType is
+  // called any handle, and results in the declared type of the handle
+  // changing.
+  ObjPtr<mirror::MethodType> nominal_type(method_handle->GetNominalType());
+  if (UNLIKELY(nominal_type != nullptr)) {
+    if (UNLIKELY(!callsite_type->IsExactMatch(nominal_type.Ptr()))) {
+      ThrowWrongMethodTypeException(nominal_type.Ptr(), callsite_type.Get());
+      return false;
+    }
+    if (LIKELY(!nominal_type->IsExactMatch(method_handle->GetMethodType()))) {
+      // Different nominal type means we have to treat as non-exact.
+      return MethodHandleInvokeInternal<is_range>(self,
+                                           shadow_frame,
+                                           method_handle,
+                                           callsite_type,
+                                           args,
+                                           first_arg,
+                                           result);
+    }
+  }
+  return MethodHandleInvokeExactInternal<is_range>(self,
+                                                   shadow_frame,
+                                                   method_handle,
+                                                   callsite_type,
+                                                   args,
+                                                   first_arg,
+                                                   result);
+}
+
+#define EXPLICIT_DO_METHOD_HANDLE_METHOD(_name, _is_range)       \
   template REQUIRES_SHARED(Locks::mutator_lock_)                 \
-  bool DoInvokePolymorphic<_is_range>(                           \
+  bool MethodHandle##_name<_is_range>(                           \
       Thread* self,                                              \
-      ArtMethod* invoke_method,                                  \
       ShadowFrame& shadow_frame,                                 \
       Handle<mirror::MethodHandle> method_handle,                \
       Handle<mirror::MethodType> callsite_type,                  \
@@ -1198,8 +1189,10 @@
       uint32_t first_arg,                                        \
       JValue* result)
 
-EXPLICIT_DO_INVOKE_POLYMORPHIC_TEMPLATE_DECL(true);
-EXPLICIT_DO_INVOKE_POLYMORPHIC_TEMPLATE_DECL(false);
-#undef EXPLICIT_DO_INVOKE_POLYMORPHIC_TEMPLATE_DECL
+EXPLICIT_DO_METHOD_HANDLE_METHOD(Invoke, true);
+EXPLICIT_DO_METHOD_HANDLE_METHOD(Invoke, false);
+EXPLICIT_DO_METHOD_HANDLE_METHOD(InvokeExact, true);
+EXPLICIT_DO_METHOD_HANDLE_METHOD(InvokeExact, false);
+#undef EXPLICIT_DO_METHOD_HANDLE_METHOD
 
 }  // namespace art
diff --git a/runtime/method_handles.h b/runtime/method_handles.h
index 55680f0..8641918 100644
--- a/runtime/method_handles.h
+++ b/runtime/method_handles.h
@@ -202,14 +202,23 @@
 };
 
 template <bool is_range>
-bool DoInvokePolymorphic(Thread* self,
-                         ArtMethod* invoke_method,
-                         ShadowFrame& shadow_frame,
-                         Handle<mirror::MethodHandle> method_handle,
-                         Handle<mirror::MethodType> callsite_type,
-                         const uint32_t (&args)[Instruction::kMaxVarArgRegs],
-                         uint32_t first_arg,
-                         JValue* result)
+bool MethodHandleInvoke(Thread* self,
+                        ShadowFrame& shadow_frame,
+                        Handle<mirror::MethodHandle> method_handle,
+                        Handle<mirror::MethodType> callsite_type,
+                        const uint32_t (&args)[Instruction::kMaxVarArgRegs],
+                        uint32_t first_arg,
+                        JValue* result)
+    REQUIRES_SHARED(Locks::mutator_lock_);
+
+template <bool is_range>
+bool MethodHandleInvokeExact(Thread* self,
+                             ShadowFrame& shadow_frame,
+                             Handle<mirror::MethodHandle> method_handle,
+                             Handle<mirror::MethodType> callsite_type,
+                             const uint32_t (&args)[Instruction::kMaxVarArgRegs],
+                             uint32_t first_arg,
+                             JValue* result)
     REQUIRES_SHARED(Locks::mutator_lock_);
 
 }  // namespace art
diff --git a/runtime/mirror/emulated_stack_frame.cc b/runtime/mirror/emulated_stack_frame.cc
index a6129cc..f82bfbf 100644
--- a/runtime/mirror/emulated_stack_frame.cc
+++ b/runtime/mirror/emulated_stack_frame.cc
@@ -289,7 +289,7 @@
   static_class_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
-// Explicit DoInvokePolymorphic template function declarations.
+// Explicit CreateFromShadowFrameAndArgs template function declarations.
 #define EXPLICIT_CREATE_FROM_SHADOW_FRAME_AND_ARGS_DECL(_is_range)                         \
   template REQUIRES_SHARED(Locks::mutator_lock_)                                           \
   mirror::EmulatedStackFrame* EmulatedStackFrame::CreateFromShadowFrameAndArgs<_is_range>( \
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 4ab8908..4512227 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -23,6 +23,7 @@
 #include "base/logging.h"
 #include "base/stl_util.h"
 #include "class_linker.h"
+#include <class_loader_context.h>
 #include "common_throws.h"
 #include "compiler_filter.h"
 #include "dex_file-inl.h"
@@ -459,6 +460,7 @@
                             const char* filename,
                             const char* instruction_set,
                             const char* compiler_filter_name,
+                            const char* class_loader_context,
                             bool profile_changed,
                             bool downgrade) {
   if ((filename == nullptr) || !OS::FileExists(filename)) {
@@ -485,6 +487,19 @@
     return -1;
   }
 
+  std::unique_ptr<ClassLoaderContext> context = nullptr;
+  if (class_loader_context != nullptr) {
+    context = ClassLoaderContext::Create(class_loader_context);
+
+    if (context == nullptr) {
+      ScopedLocalRef<jclass> iae(env, env->FindClass("java/lang/IllegalArgumentException"));
+      std::string message(StringPrintf("Class loader context '%s' is invalid.",
+                                       class_loader_context));
+      env->ThrowNew(iae.get(), message.c_str());
+      return -1;
+    }
+  }
+
   // TODO: Verify the dex location is well formed, and throw an IOException if
   // not?
 
@@ -495,8 +510,10 @@
     return OatFileAssistant::kNoDexOptNeeded;
   }
 
-  // TODO(calin): Extend DexFile.getDexOptNeeded to accept the class loader context. b/62269291.
-  return oat_file_assistant.GetDexOptNeeded(filter, profile_changed, downgrade);
+  return oat_file_assistant.GetDexOptNeeded(filter,
+                                            profile_changed,
+                                            downgrade,
+                                            context.get());
 }
 
 static jstring DexFile_getDexFileStatus(JNIEnv* env,
@@ -532,6 +549,7 @@
                                     jstring javaFilename,
                                     jstring javaInstructionSet,
                                     jstring javaTargetCompilerFilter,
+                                    jstring javaClassLoaderContext,
                                     jboolean newProfile,
                                     jboolean downgrade) {
   ScopedUtfChars filename(env, javaFilename);
@@ -549,10 +567,16 @@
     return -1;
   }
 
+  NullableScopedUtfChars class_loader_context(env, javaClassLoaderContext);
+  if (env->ExceptionCheck()) {
+    return -1;
+  }
+
   return GetDexOptNeeded(env,
                          filename.c_str(),
                          instruction_set.c_str(),
                          target_compiler_filter.c_str(),
+                         class_loader_context.c_str(),
                          newProfile == JNI_TRUE,
                          downgrade == JNI_TRUE);
 }
@@ -731,7 +755,7 @@
   NATIVE_METHOD(DexFile, getClassNameList, "(Ljava/lang/Object;)[Ljava/lang/String;"),
   NATIVE_METHOD(DexFile, isDexOptNeeded, "(Ljava/lang/String;)Z"),
   NATIVE_METHOD(DexFile, getDexOptNeeded,
-                "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;ZZ)I"),
+                "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;ZZ)I"),
   NATIVE_METHOD(DexFile, openDexFileNative,
                 "(Ljava/lang/String;"
                 "Ljava/lang/String;"
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index a7fe9b1..fb61228 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -1168,12 +1168,13 @@
 
   const OatFile* file = GetFile();
   if (file == nullptr) {
-    return false;
+    // No oat file means we have nothing to verify.
+    return true;
   }
 
-  size_t dir_index = file->GetLocation().rfind('/');
+  size_t dir_index = oat_file_assistant_->dex_location_.rfind('/');
   std::string classpath_dir = (dir_index != std::string::npos)
-      ? file->GetLocation().substr(0, dir_index)
+      ? oat_file_assistant_->dex_location_.substr(0, dir_index)
       : "";
 
   if (!context->OpenDexFiles(oat_file_assistant_->isa_, classpath_dir)) {
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index d99036d..6d14971 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -1466,6 +1466,33 @@
                   default_filter, false, false, updated_context.get()));
 }
 
+TEST_F(OatFileAssistantTest, GetDexOptNeededWithUpToDateContextRelative) {
+  std::string dex_location = GetScratchDir() + "/TestDex.jar";
+  std::string context_location = GetScratchDir() + "/ContextDex.jar";
+  Copy(GetDexSrc1(), dex_location);
+  Copy(GetDexSrc2(), context_location);
+
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
+
+  const CompilerFilter::Filter default_filter =
+      OatFileAssistant::kDefaultCompilerFilterForDexLoading;
+  std::string error_msg;
+  std::string context_str = "PCL[" + context_location + "]";
+  std::unique_ptr<ClassLoaderContext> context = ClassLoaderContext::Create(context_str);
+  ASSERT_TRUE(context != nullptr);
+  ASSERT_TRUE(context->OpenDexFiles(kRuntimeISA, ""));
+
+  int status = oat_file_assistant.MakeUpToDate(false, context.get(), &error_msg);
+  EXPECT_EQ(OatFileAssistant::kUpdateSucceeded, status) << error_msg;
+
+  // A relative context simulates a dependent split context.
+  std::unique_ptr<ClassLoaderContext> relative_context =
+      ClassLoaderContext::Create("PCL[ContextDex.jar]");
+  EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded,
+            oat_file_assistant.GetDexOptNeeded(
+                default_filter, false, false, relative_context.get()));
+}
+
 // TODO: More Tests:
 //  * Test class linker falls back to unquickened dex for DexNoOat
 //  * Test class linker falls back to unquickened dex for MultiDexNoOat
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index c88799c..ec210d0 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -143,6 +143,7 @@
 #include "quick/quick_method_frame_info.h"
 #include "reflection.h"
 #include "runtime_callbacks.h"
+#include "runtime_intrinsics.h"
 #include "runtime_options.h"
 #include "scoped_thread_state_change-inl.h"
 #include "sigchain.h"
@@ -738,6 +739,11 @@
     InitNativeMethods();
   }
 
+  // IntializeIntrinsics needs to be called after the WellKnownClasses::Init in InitNativeMethods
+  // because in checking the invocation types of intrinsic methods ArtMethod::GetInvokeType()
+  // needs the SignaturePolymorphic annotation class which is initialized in WellKnownClasses::Init.
+  InitializeIntrinsics();
+
   // Initialize well known thread group values that may be accessed threads while attaching.
   InitThreadGroups(self);
 
diff --git a/runtime/runtime_intrinsics.cc b/runtime/runtime_intrinsics.cc
new file mode 100644
index 0000000..f710ebe
--- /dev/null
+++ b/runtime/runtime_intrinsics.cc
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "runtime_intrinsics.h"
+
+#include "art_method-inl.h"
+#include "class_linker.h"
+#include "intrinsics_enum.h"
+#include "invoke_type.h"
+#include "mirror/class.h"
+#include "runtime.h"
+#include "scoped_thread_state_change-inl.h"
+#include "thread.h"
+
+namespace art {
+
+namespace {
+
+// Initialize an intrinsic. Returns true if the intrinsic is already
+// initialized, false otherwise.
+bool InitializeIntrinsic(Thread* self,
+                         Intrinsics intrinsic,
+                         InvokeType invoke_type,
+                         const char* class_name,
+                         const char* method_name,
+                         const char* signature)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  PointerSize image_size = class_linker->GetImagePointerSize();
+  ObjPtr<mirror::Class> cls = class_linker->FindSystemClass(self, class_name);
+  if (cls == nullptr) {
+    LOG(FATAL) << "Could not find class of intrinsic " << class_name;
+  }
+
+  ArtMethod* method = cls->FindClassMethod(method_name, signature, image_size);
+  if (method == nullptr || method->GetDeclaringClass() != cls) {
+    LOG(FATAL) << "Could not find method of intrinsic "
+               << class_name << " " << method_name << " " << signature;
+  }
+
+  CHECK_EQ(method->GetInvokeType(), invoke_type);
+  if (method->IsIntrinsic()) {
+    CHECK_EQ(method->GetIntrinsic(), static_cast<uint32_t>(intrinsic));
+    return true;
+  } else {
+    method->SetIntrinsic(static_cast<uint32_t>(intrinsic));
+    return false;
+  }
+}
+
+}  // namespace
+
+void InitializeIntrinsics() {
+  ScopedObjectAccess soa(Thread::Current());
+  // Initialization here uses the short-circuit operator || to stop
+  // initializing if there's an already initialized intrinsic.
+#define SETUP_INTRINSICS(Name, InvokeType, _, __, ___, ClassName, MethodName, Signature) \
+  InitializeIntrinsic(soa.Self(),                                                        \
+                      Intrinsics::k##Name,                                               \
+                      InvokeType,                                                        \
+                      ClassName,                                                         \
+                      MethodName,                                                        \
+                      Signature) ||
+#include "intrinsics_list.h"
+  INTRINSICS_LIST(SETUP_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef SETUP_INTRINSICS
+      true;
+}
+
+}  // namespace art
diff --git a/runtime/runtime_intrinsics.h b/runtime/runtime_intrinsics.h
new file mode 100644
index 0000000..98dc9bc
--- /dev/null
+++ b/runtime/runtime_intrinsics.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_RUNTIME_INTRINSICS_H_
+#define ART_RUNTIME_RUNTIME_INTRINSICS_H_
+
+namespace art {
+
+void InitializeIntrinsics();
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_RUNTIME_INTRINSICS_H_
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 38c893b..6555e14 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -553,7 +553,7 @@
     : self_(self),
       arena_stack_(Runtime::Current()->GetArenaPool()),
       allocator_(&arena_stack_),
-      reg_types_(can_load_classes, allocator_, allow_thread_suspension),
+      reg_types_(can_load_classes, allocator_),
       reg_table_(allocator_),
       work_insn_idx_(dex::kDexNoIndex),
       dex_method_idx_(dex_method_idx),
diff --git a/runtime/verifier/reg_type_cache.cc b/runtime/verifier/reg_type_cache.cc
index 0029eb9..4ebe151 100644
--- a/runtime/verifier/reg_type_cache.cc
+++ b/runtime/verifier/reg_type_cache.cc
@@ -268,13 +268,12 @@
   return *reg_type;
 }
 
-RegTypeCache::RegTypeCache(bool can_load_classes, ScopedArenaAllocator& allocator, bool can_suspend)
+RegTypeCache::RegTypeCache(bool can_load_classes, ScopedArenaAllocator& allocator)
     : entries_(allocator.Adapter(kArenaAllocVerifier)),
       klass_entries_(allocator.Adapter(kArenaAllocVerifier)),
       can_load_classes_(can_load_classes),
       allocator_(allocator) {
-  DCHECK(can_suspend || !can_load_classes) << "Cannot load classes is suspension is disabled!";
-  if (kIsDebugBuild && can_suspend) {
+  if (kIsDebugBuild) {
     Thread::Current()->AssertThreadSuspensionIsAllowable(gAborting == 0);
   }
   // The klass_entries_ array does not have primitives or small constants.
diff --git a/runtime/verifier/reg_type_cache.h b/runtime/verifier/reg_type_cache.h
index cb16b15..74d9e9d 100644
--- a/runtime/verifier/reg_type_cache.h
+++ b/runtime/verifier/reg_type_cache.h
@@ -61,7 +61,7 @@
 
 class RegTypeCache {
  public:
-  RegTypeCache(bool can_load_classes, ScopedArenaAllocator& allocator, bool can_suspend = true);
+  explicit RegTypeCache(bool can_load_classes, ScopedArenaAllocator& allocator);
   ~RegTypeCache();
   static void Init() REQUIRES_SHARED(Locks::mutator_lock_) {
     if (!RegTypeCache::primitive_initialized_) {
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index bfcd95c..829dea9 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -52,6 +52,7 @@
 jclass WellKnownClasses::java_lang_Daemons;
 jclass WellKnownClasses::java_lang_Error;
 jclass WellKnownClasses::java_lang_invoke_MethodHandle;
+jclass WellKnownClasses::java_lang_invoke_MethodHandle_PolymorphicSignature;
 jclass WellKnownClasses::java_lang_IllegalAccessError;
 jclass WellKnownClasses::java_lang_NoClassDefFoundError;
 jclass WellKnownClasses::java_lang_Object;
@@ -298,6 +299,7 @@
   java_lang_Error = CacheClass(env, "java/lang/Error");
   java_lang_IllegalAccessError = CacheClass(env, "java/lang/IllegalAccessError");
   java_lang_invoke_MethodHandle = CacheClass(env, "java/lang/invoke/MethodHandle");
+  java_lang_invoke_MethodHandle_PolymorphicSignature = CacheClass(env, "java/lang/invoke/MethodHandle$PolymorphicSignature");
   java_lang_NoClassDefFoundError = CacheClass(env, "java/lang/NoClassDefFoundError");
   java_lang_reflect_Constructor = CacheClass(env, "java/lang/reflect/Constructor");
   java_lang_reflect_Executable = CacheClass(env, "java/lang/reflect/Executable");
@@ -334,6 +336,7 @@
   java_lang_invoke_MethodHandle_invokeExact = CacheMethod(env, java_lang_invoke_MethodHandle, false, "invokeExact", "([Ljava/lang/Object;)Ljava/lang/Object;");
   java_lang_invoke_MethodHandles_lookup = CacheMethod(env, "java/lang/invoke/MethodHandles", true, "lookup", "()Ljava/lang/invoke/MethodHandles$Lookup;");
   java_lang_invoke_MethodHandles_Lookup_findConstructor = CacheMethod(env, "java/lang/invoke/MethodHandles$Lookup", false, "findConstructor", "(Ljava/lang/Class;Ljava/lang/invoke/MethodType;)Ljava/lang/invoke/MethodHandle;");
+
   java_lang_ref_FinalizerReference_add = CacheMethod(env, "java/lang/ref/FinalizerReference", true, "add", "(Ljava/lang/Object;)V");
   java_lang_ref_ReferenceQueue_add = CacheMethod(env, "java/lang/ref/ReferenceQueue", true, "add", "(Ljava/lang/ref/Reference;)V");
 
@@ -434,6 +437,7 @@
   java_lang_Error = nullptr;
   java_lang_IllegalAccessError = nullptr;
   java_lang_invoke_MethodHandle = nullptr;
+  java_lang_invoke_MethodHandle_PolymorphicSignature = nullptr;
   java_lang_NoClassDefFoundError = nullptr;
   java_lang_Object = nullptr;
   java_lang_OutOfMemoryError = nullptr;
diff --git a/runtime/well_known_classes.h b/runtime/well_known_classes.h
index 7deef63..b2fd4d6 100644
--- a/runtime/well_known_classes.h
+++ b/runtime/well_known_classes.h
@@ -64,6 +64,7 @@
   static jclass java_lang_Error;
   static jclass java_lang_IllegalAccessError;
   static jclass java_lang_invoke_MethodHandle;
+  static jclass java_lang_invoke_MethodHandle_PolymorphicSignature;
   static jclass java_lang_NoClassDefFoundError;
   static jclass java_lang_Object;
   static jclass java_lang_OutOfMemoryError;
diff --git a/test/667-checker-simd-alignment/expected.txt b/test/667-checker-simd-alignment/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/667-checker-simd-alignment/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/667-checker-simd-alignment/info.txt b/test/667-checker-simd-alignment/info.txt
new file mode 100644
index 0000000..a46bfaa
--- /dev/null
+++ b/test/667-checker-simd-alignment/info.txt
@@ -0,0 +1 @@
+Test SIMD vectorization alignment optimizations.
diff --git a/test/667-checker-simd-alignment/src/Main.java b/test/667-checker-simd-alignment/src/Main.java
new file mode 100644
index 0000000..a6235b8
--- /dev/null
+++ b/test/667-checker-simd-alignment/src/Main.java
@@ -0,0 +1,337 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for zero vectorization.
+ */
+public class Main {
+
+  /// CHECK-START: void Main.staticallyAligned(int[]) loop_optimization (before)
+  /// CHECK-DAG: <<Par:l\d+>>   ParameterValue                       loop:none
+  /// CHECK-DAG: <<One:i\d+>>   IntConstant 1                        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>   Phi [<<One>>,<<AddI:i\d+>>]          loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:i\d+>>   ArrayGet [<<Par>>,<<Phi>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>   Add [<<Get>>,<<One>>]                loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                ArraySet [<<Par>>,<<Phi>>,<<Add>>]   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<AddI>>       Add [<<Phi>>,<<One>>]                loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM: void Main.staticallyAligned(int[]) loop_optimization (after)
+  /// CHECK-DAG: <<Par:l\d+>>   ParameterValue                       loop:none
+  /// CHECK-DAG: <<Zero:i\d+>>  IntConstant 0                        loop:none
+  /// CHECK-DAG: <<One:i\d+>>   IntConstant 1                        loop:none
+  /// CHECK-DAG: <<Vl:i\d+>>    IntConstant 2                        loop:none
+  /// CHECK-DAG: <<Repl:d\d+>>  VecReplicateScalar [<<One>>]         loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>   Phi [<<Zero>>,<<AddI:i\d+>>]                            loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Nrm:i\d+>>   Add [<<Phi>>,<<One>>]                                   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Load:d\d+>>  VecLoad [<<Par>>,<<Nrm>>]          alignment:ALIGN(8,0) loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:d\d+>>   VecAdd [<<Load>>,<<Repl>>]                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                VecStore [<<Par>>,<<Nrm>>,<<Add>>] alignment:ALIGN(8,0) loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<AddI>>       Add [<<Phi>>,<<Vl>>]                                    loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-NOT:                ArrayGet
+  /// CHECK-NOT:                ArraySet
+  static void staticallyAligned(int[] a) {
+    // Starts at offset 12 (hidden) + 1 * 4 relative to base alignment.
+    // So no peeling, aligned vector, no cleanup.
+    for (int i = 1; i < 9; i++) {
+      a[i] += 1;
+    }
+  }
+
+  /// CHECK-START: void Main.staticallyAlignedN(int[]) loop_optimization (before)
+  /// CHECK-DAG: <<One:i\d+>>   IntConstant 1                        loop:none
+  /// CHECK-DAG: <<Par:l\d+>>   NullCheck                            loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>   Phi [<<One>>,<<AddI:i\d+>>]          loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:i\d+>>   ArrayGet [<<Par>>,<<Phi>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>   Add [<<Get>>,<<One>>]                loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                ArraySet [<<Par>>,<<Phi>>,<<Add>>]   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<AddI>>       Add [<<Phi>>,<<One>>]                loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM: void Main.staticallyAlignedN(int[]) loop_optimization (after)
+  /// CHECK-DAG: <<Zero:i\d+>>  IntConstant 0                        loop:none
+  /// CHECK-DAG: <<One:i\d+>>   IntConstant 1                        loop:none
+  /// CHECK-DAG: <<Vl:i\d+>>    IntConstant 2                        loop:none
+  /// CHECK-DAG: <<Par:l\d+>>   NullCheck                            loop:none
+  /// CHECK-DAG: <<Repl:d\d+>>  VecReplicateScalar [<<One>>]         loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>   Phi [<<Zero>>,<<AddI:i\d+>>]                            loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Nrm:i\d+>>   Add [<<Phi>>,<<One>>]                                   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Load:d\d+>>  VecLoad [<<Par>>,<<Nrm>>]          alignment:ALIGN(8,0) loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:d\d+>>   VecAdd [<<Load>>,<<Repl>>]                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                VecStore [<<Par>>,<<Nrm>>,<<Add>>] alignment:ALIGN(8,0) loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<AddI>>       Add [<<Phi>>,<<Vl>>]                                    loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<PhiC:i\d+>>  Phi [<<Phi>>,<<AddIC:i\d+>>]         loop:<<Clean:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<NrmC:i\d+>>  Add [<<PhiC>>,<<One>>]               loop:<<Clean>>      outer_loop:none
+  /// CHECK-DAG: <<Get:i\d+>>   ArrayGet [<<Par>>,<<NrmC>>]          loop:<<Clean>>      outer_loop:none
+  /// CHECK-DAG: <<AddC:i\d+>>  Add [<<Get>>,<<One>>]                loop:<<Clean>>      outer_loop:none
+  /// CHECK-DAG:                ArraySet [<<Par>>,<<NrmC>>,<<AddC>>] loop:<<Clean>>      outer_loop:none
+  /// CHECK-DAG: <<AddIC>>      Add [<<PhiC>>,<<One>>]               loop:<<Clean>>      outer_loop:none
+  static void staticallyAlignedN(int[] a) {
+    // Starts at offset 12 (hidden) + 1 * 4 relative to base alignment.
+    // So no peeling, aligned vector, cleanup.
+    for (int i = 1; i < a.length; i++) {
+      a[i] += 1;
+    }
+  }
+
+  /// CHECK-START: void Main.staticallyMisaligned(int[]) loop_optimization (before)
+  /// CHECK-DAG: <<Par:l\d+>>   ParameterValue                       loop:none
+  /// CHECK-DAG: <<Zero:i\d+>>  IntConstant 0                        loop:none
+  /// CHECK-DAG: <<One:i\d+>>   IntConstant 1                        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>   Phi [<<Zero>>,<<AddI:i\d+>>]         loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:i\d+>>   ArrayGet [<<Par>>,<<Phi>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>   Add [<<Get>>,<<One>>]                loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                ArraySet [<<Par>>,<<Phi>>,<<Add>>]   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<AddI>>       Add [<<Phi>>,<<One>>]                loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM: void Main.staticallyMisaligned(int[]) loop_optimization (after)
+  /// CHECK-DAG: <<Par:l\d+>>   ParameterValue                       loop:none
+  /// CHECK-DAG: <<Zero:i\d+>>  IntConstant 0                        loop:none
+  /// CHECK-DAG: <<One:i\d+>>   IntConstant 1                        loop:none
+  /// CHECK-DAG: <<Vl:i\d+>>    IntConstant 2                        loop:none
+  /// CHECK-DAG: <<PhiP:i\d+>>  Phi [<<Zero>>,<<AddIP:i\d+>>]        loop:<<Peel:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:i\d+>>   ArrayGet [<<Par>>,<<PhiP>>]          loop:<<Peel>>      outer_loop:none
+  /// CHECK-DAG: <<AddP:i\d+>>  Add [<<Get>>,<<One>>]                loop:<<Peel>>      outer_loop:none
+  /// CHECK-DAG:                ArraySet [<<Par>>,<<PhiP>>,<<AddP>>] loop:<<Peel>>      outer_loop:none
+  /// CHECK-DAG: <<AddIP>>      Add [<<PhiP>>,<<One>>]               loop:<<Peel>>      outer_loop:none
+  /// CHECK-DAG: <<Repl:d\d+>>  VecReplicateScalar [<<One>>]         loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>   Phi [<<PhiP>>,<<AddI:i\d+>>]                            loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Load:d\d+>>  VecLoad [<<Par>>,<<Phi>>]          alignment:ALIGN(8,0) loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:d\d+>>   VecAdd [<<Load>>,<<Repl>>]                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                VecStore [<<Par>>,<<Phi>>,<<Add>>] alignment:ALIGN(8,0) loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<AddI>>       Add [<<Phi>>,<<Vl>>]                                    loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-NOT:                ArrayGet
+  /// CHECK-NOT:                ArraySet
+  static void staticallyMisaligned(int[] a) {
+    // Starts at offset 12 (hidden) + 0 * 4 relative to base alignment.
+    // Yes, Art runtime misaligns the most common access pattern :-(
+    // Static peeling to the rescue, aligned vector, no cleanup.
+    for (int i = 0; i < 9; i++) {
+      a[i] += 1;
+    }
+  }
+
+  /// CHECK-START: void Main.staticallyMisalignedN(int[]) loop_optimization (before)
+  /// CHECK-DAG: <<Zero:i\d+>>  IntConstant 0                       loop:none
+  /// CHECK-DAG: <<One:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<Par:l\d+>>   NullCheck                           loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>   Phi [<<Zero>>,<<AddI:i\d+>>]        loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:i\d+>>   ArrayGet [<<Par>>,<<Phi>>]          loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>   Add [<<Get>>,<<One>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                ArraySet [<<Par>>,<<Phi>>,<<Add>>]  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<AddI>>       Add [<<Phi>>,<<One>>]               loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM: void Main.staticallyMisalignedN(int[]) loop_optimization (after)
+  /// CHECK-DAG: <<Zero:i\d+>>  IntConstant 0                        loop:none
+  /// CHECK-DAG: <<One:i\d+>>   IntConstant 1                        loop:none
+  /// CHECK-DAG: <<Vl:i\d+>>    IntConstant 2                        loop:none
+  /// CHECK-DAG: <<Par:l\d+>>   NullCheck                            loop:none
+  /// CHECK-DAG: <<PhiP:i\d+>>  Phi [<<Zero>>,<<AddIP:i\d+>>]        loop:<<Peel:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<GetP:i\d+>>  ArrayGet [<<Par>>,<<PhiP>>]          loop:<<Peel>>      outer_loop:none
+  /// CHECK-DAG: <<AddP:i\d+>>  Add [<<GetP>>,<<One>>]               loop:<<Peel>>      outer_loop:none
+  /// CHECK-DAG:                ArraySet [<<Par>>,<<PhiP>>,<<AddP>>] loop:<<Peel>>      outer_loop:none
+  /// CHECK-DAG: <<AddIP>>      Add [<<PhiP>>,<<One>>]               loop:<<Peel>>      outer_loop:none
+  /// CHECK-DAG: <<Repl:d\d+>>  VecReplicateScalar [<<One>>]         loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>   Phi [<<PhiP>>,<<AddI:i\d+>>]                            loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Load:d\d+>>  VecLoad [<<Par>>,<<Phi>>]          alignment:ALIGN(8,0) loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:d\d+>>   VecAdd [<<Load>>,<<Repl>>]                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                VecStore [<<Par>>,<<Phi>>,<<Add>>] alignment:ALIGN(8,0) loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<AddI>>       Add [<<Phi>>,<<Vl>>]                                    loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<PhiC:i\d+>>  Phi [<<Phi>>,<<AddIC:i\d+>>]         loop:<<Clean:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<GetC:i\d+>>  ArrayGet [<<Par>>,<<PhiC>>]          loop:<<Clean>>      outer_loop:none
+  /// CHECK-DAG: <<AddC:i\d+>>  Add [<<GetC>>,<<One>>]               loop:<<Clean>>      outer_loop:none
+  /// CHECK-DAG:                ArraySet [<<Par>>,<<PhiC>>,<<AddC>>] loop:<<Clean>>      outer_loop:none
+  /// CHECK-DAG: <<AddIC>>      Add [<<PhiC>>,<<One>>]               loop:<<Clean>>      outer_loop:none
+  static void staticallyMisalignedN(int[] a) {
+    // Starts at offset 12 (hidden) + 0 * 4 relative to base alignment.
+    // Yes, Art runtime misaligns the most common access pattern :-(
+    // Static peeling to the rescue, aligned vector, cleanup.
+    for (int i = 0; i < a.length; i++) {
+      a[i] += 1;
+    }
+  }
+
+  /// CHECK-START: void Main.staticallyUnknownAligned(int[], int) loop_optimization (before)
+  /// CHECK-DAG: <<Par:l\d+>>   ParameterValue                       loop:none
+  /// CHECK-DAG: <<Off:i\d+>>   ParameterValue                       loop:none
+  /// CHECK-DAG: <<Zero:i\d+>>  IntConstant 0                        loop:none
+  /// CHECK-DAG: <<One:i\d+>>   IntConstant 1                        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>   Phi [<<Zero>>,<<AddI:i\d+>>]         loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Nrm:i\d+>>   Add [<<Off>>,<<Phi>>]                loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get:i\d+>>   ArrayGet [<<Par>>,<<Nrm>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>   Add [<<Get>>,<<One>>]                loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                ArraySet [<<Par>>,<<Nrm>>,<<Add>>]   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<AddI>>       Add [<<Phi>>,<<One>>]                loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM: void Main.staticallyUnknownAligned(int[], int) loop_optimization (after)
+  /// CHECK-DAG: <<Par:l\d+>>   ParameterValue                       loop:none
+  /// CHECK-DAG: <<Off:i\d+>>   ParameterValue                       loop:none
+  /// CHECK-DAG: <<Zero:i\d+>>  IntConstant 0                        loop:none
+  /// CHECK-DAG: <<One:i\d+>>   IntConstant 1                        loop:none
+  /// CHECK-DAG: <<Vl:i\d+>>    IntConstant 2                        loop:none
+  /// CHECK-DAG: <<PhiP:i\d+>>  Phi [<<Zero>>,<<AddIP:i\d+>>]        loop:<<Peel:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<NrmP:i\d+>>  Add [<<PhiP>>,<<Off>>]               loop:<<Peel>>      outer_loop:none
+  /// CHECK-DAG: <<Get:i\d+>>   ArrayGet [<<Par>>,<<NrmP>>]          loop:<<Peel>>      outer_loop:none
+  /// CHECK-DAG: <<AddP:i\d+>>  Add [<<Get>>,<<One>>]                loop:<<Peel>>      outer_loop:none
+  /// CHECK-DAG:                ArraySet [<<Par>>,<<NrmP>>,<<AddP>>] loop:<<Peel>>      outer_loop:none
+  /// CHECK-DAG: <<AddIP>>      Add [<<PhiP>>,<<One>>]               loop:<<Peel>>      outer_loop:none
+  /// CHECK-DAG: <<Repl:d\d+>>  VecReplicateScalar [<<One>>]         loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>   Phi [<<PhiP>>,<<AddI:i\d+>>]                            loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Nrm:i\d+>>   Add [<<Phi>>,<<Off>>]                                   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Load:d\d+>>  VecLoad [<<Par>>,<<Nrm>>]          alignment:ALIGN(8,0) loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:d\d+>>   VecAdd [<<Load>>,<<Repl>>]                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                VecStore [<<Par>>,<<Nrm>>,<<Add>>] alignment:ALIGN(8,0) loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<AddI>>       Add [<<Phi>>,<<Vl>>]                                    loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<PhiC:i\d+>>  Phi [<<Phi>>,<<AddIC:i\d+>>]         loop:<<Clean:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<NrmC:i\d+>>  Add [<<PhiC>>,<<Off>>]               loop:<<Clean>>      outer_loop:none
+  /// CHECK-DAG: <<GetC:i\d+>>  ArrayGet [<<Par>>,<<NrmC>>]          loop:<<Clean>>      outer_loop:none
+  /// CHECK-DAG: <<AddC:i\d+>>  Add [<<GetC>>,<<One>>]               loop:<<Clean>>      outer_loop:none
+  /// CHECK-DAG:                ArraySet [<<Par>>,<<NrmC>>,<<AddC>>] loop:<<Clean>>      outer_loop:none
+  /// CHECK-DAG: <<AddIC>>      Add [<<PhiC>>,<<One>>]               loop:<<Clean>>      outer_loop:none
+  static void staticallyUnknownAligned(int[] a, int off) {
+    // Starts at an unknown offset due to parameter off.
+    // Dynamic peeling to the rescue, aligned vector, cleanup.
+    for (int i = 0; i < 9; i++) {
+      a[off + i] += 1;
+    }
+  }
+
+  /// CHECK-START: void Main.staticallyUnknownAlignedN(int[], int, int) loop_optimization (before)
+  /// CHECK-DAG: <<Par:l\d+>>   ParameterValue                       loop:none
+  /// CHECK-DAG: <<Off:i\d+>>   ParameterValue                       loop:none
+  /// CHECK-DAG: <<Zero:i\d+>>  IntConstant 0                        loop:none
+  /// CHECK-DAG: <<One:i\d+>>   IntConstant 1                        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>   Phi [<<Zero>>,<<AddI:i\d+>>]         loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Nrm:i\d+>>   Add [<<Off>>,<<Phi>>]                loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get:i\d+>>   ArrayGet [<<Par>>,<<Nrm>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>   Add [<<Get>>,<<One>>]                loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                ArraySet [<<Par>>,<<Nrm>>,<<Add>>]   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<AddI>>       Add [<<Phi>>,<<One>>]                loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM: void Main.staticallyUnknownAlignedN(int[], int, int) loop_optimization (after)
+  /// CHECK-DAG: <<Par:l\d+>>   ParameterValue                       loop:none
+  /// CHECK-DAG: <<Off:i\d+>>   ParameterValue                       loop:none
+  /// CHECK-DAG: <<Zero:i\d+>>  IntConstant 0                        loop:none
+  /// CHECK-DAG: <<One:i\d+>>   IntConstant 1                        loop:none
+  /// CHECK-DAG: <<Vl:i\d+>>    IntConstant 2                        loop:none
+  /// CHECK-DAG: <<PhiP:i\d+>>  Phi [<<Zero>>,<<AddIP:i\d+>>]        loop:<<Peel:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<NrmP:i\d+>>  Add [<<PhiP>>,<<Off>>]               loop:<<Peel>>      outer_loop:none
+  /// CHECK-DAG: <<Get:i\d+>>   ArrayGet [<<Par>>,<<NrmP>>]          loop:<<Peel>>      outer_loop:none
+  /// CHECK-DAG: <<AddP:i\d+>>  Add [<<Get>>,<<One>>]                loop:<<Peel>>      outer_loop:none
+  /// CHECK-DAG:                ArraySet [<<Par>>,<<NrmP>>,<<AddP>>] loop:<<Peel>>      outer_loop:none
+  /// CHECK-DAG: <<AddIP>>      Add [<<PhiP>>,<<One>>]               loop:<<Peel>>      outer_loop:none
+  /// CHECK-DAG: <<Repl:d\d+>>  VecReplicateScalar [<<One>>]         loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>   Phi [<<PhiP>>,<<AddI:i\d+>>]                            loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Nrm:i\d+>>   Add [<<Phi>>,<<Off>>]                                   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Load:d\d+>>  VecLoad [<<Par>>,<<Nrm>>]          alignment:ALIGN(8,0) loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:d\d+>>   VecAdd [<<Load>>,<<Repl>>]                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                VecStore [<<Par>>,<<Nrm>>,<<Add>>] alignment:ALIGN(8,0) loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<AddI>>       Add [<<Phi>>,<<Vl>>]                                    loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<PhiC:i\d+>>  Phi [<<Phi>>,<<AddIC:i\d+>>]         loop:<<Clean:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<NrmC:i\d+>>  Add [<<PhiC>>,<<Off>>]               loop:<<Clean>>      outer_loop:none
+  /// CHECK-DAG: <<GetC:i\d+>>  ArrayGet [<<Par>>,<<NrmC>>]          loop:<<Clean>>      outer_loop:none
+  /// CHECK-DAG: <<AddC:i\d+>>  Add [<<GetC>>,<<One>>]               loop:<<Clean>>      outer_loop:none
+  /// CHECK-DAG:                ArraySet [<<Par>>,<<NrmC>>,<<AddC>>] loop:<<Clean>>      outer_loop:none
+  /// CHECK-DAG: <<AddIC>>      Add [<<PhiC>>,<<One>>]               loop:<<Clean>>      outer_loop:none
+  static void staticallyUnknownAlignedN(int[] a, int off, int n) {
+    // Starts at an unknown offset due to parameter off.
+    // Dynamic peeling to the rescue, aligned vector, cleanup.
+    for (int i = 0; i < n; i++) {
+      a[off + i] += 1;
+    }
+  }
+
+  //
+  // Test drivers.
+  //
+
+  private static void test1() {
+    int[] a = new int[9];
+    staticallyAligned(a);
+    for (int i = 0; i < a.length; i++) {
+      int e = i > 0 ? 1 : 0;
+      expectEquals(e, a[i]);
+    }
+  }
+
+  private static void test2() {
+    for (int n = 0; n <= 71; n++) {
+      int[] a = new int[n];
+      staticallyAlignedN(a);
+      for (int i = 0; i < a.length; i++) {
+        int e = i > 0 ? 1 : 0;
+        expectEquals(e, a[i]);
+      }
+    }
+  }
+
+  private static void test3() {
+    int[] a = new int[9];
+    staticallyMisaligned(a);
+    for (int i = 0; i < a.length; i++) {
+      expectEquals(1, a[i]);
+    }
+  }
+
+  private static void test4() {
+    for (int n = 0; n <= 71; n++) {
+      int[] a = new int[n];
+      staticallyMisalignedN(a);
+      for (int i = 0; i < a.length; i++) {
+        expectEquals(1, a[i]);
+      }
+    }
+  }
+
+  private static void test5() {
+    for (int off = 0; off <= 8; off++) {
+      int[] a = new int[17];
+      staticallyUnknownAligned(a, off);
+      for (int i = 0; i < a.length; i++) {
+        int e = (off <= i && i < off + 9) ? 1 : 0;
+        expectEquals(e, a[i]);
+      }
+    }
+  }
+
+  private static void test6() {
+    for (int off = 0; off <= 8; off++) {
+      for (int n = 0; n <= 9; n++) {
+        int[] a = new int[17];
+        staticallyUnknownAlignedN(a, off, n);
+        for (int i = 0; i < a.length; i++) {
+          int e = (off <= i && i < off + n) ? 1 : 0;
+          expectEquals(e, a[i]);
+        }
+      }
+    }
+  }
+
+  public static void main(String[] args) {
+    test1();
+    test2();
+    test4();
+    test5();
+    test6();
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/988-method-trace/gen_srcs.py b/test/988-method-trace/gen_srcs.py
index 8f1082f..225f41b 100755
--- a/test/988-method-trace/gen_srcs.py
+++ b/test/988-method-trace/gen_srcs.py
@@ -28,8 +28,8 @@
 
 from string import Template
 
-# Relative path to art/compiler/intrinsics_list.h
-INTRINSICS_LIST_H = os.path.dirname(os.path.realpath(__file__)) + "/../../compiler/intrinsics_list.h"
+# Relative path to art/runtime/intrinsics_list.h
+INTRINSICS_LIST_H = os.path.dirname(os.path.realpath(__file__)) + "/../../runtime/intrinsics_list.h"
 
 # Macro parameter index to V(). Negative means from the end.
 IDX_STATIC_OR_VIRTUAL = 1
@@ -39,7 +39,8 @@
 
 # Exclude all hidden API.
 KLASS_BLACK_LIST = ['sun.misc.Unsafe', 'libcore.io.Memory', 'java.lang.StringFactory',
-                    'java.lang.invoke.VarHandle' ]  # TODO(b/65872996): Enable when VarHandle is visible.
+                    'java.lang.invoke.MethodHandle', # invokes are tested by 956-method-handles
+                    'java.lang.invoke.VarHandle' ]  # TODO(b/65872996): will tested separately
 METHOD_BLACK_LIST = [('java.lang.ref.Reference', 'getReferent'),
                      ('java.lang.String', 'getCharsNoCheck'),
                      ('java.lang.System', 'arraycopy')]  # arraycopy has a manual test.
@@ -90,7 +91,7 @@
   }
 
   static void test() {
-    // Call each intrinsic from art/compiler/intrinsics_list.h to make sure they are traced.
+    // Call each intrinsic from art/runtime/intrinsics_list.h to make sure they are traced.
 $test_body
   }
 }
diff --git a/test/988-method-trace/src/art/Test988Intrinsics.java b/test/988-method-trace/src/art/Test988Intrinsics.java
index 099fbf2..3069f1a 100644
--- a/test/988-method-trace/src/art/Test988Intrinsics.java
+++ b/test/988-method-trace/src/art/Test988Intrinsics.java
@@ -44,7 +44,7 @@
   }
 
   static void test() {
-    // Call each intrinsic from art/compiler/intrinsics_list.h to make sure they are traced.
+    // Call each intrinsic from art/runtime/intrinsics_list.h to make sure they are traced.
     java.lang.Double.doubleToRawLongBits(0.0);
     java.lang.Double.doubleToLongBits(0.0);
     java.lang.Double.isInfinite(0.0);
diff --git a/tools/ahat/Android.mk b/tools/ahat/Android.mk
index e5da385..1b8a8c5 100644
--- a/tools/ahat/Android.mk
+++ b/tools/ahat/Android.mk
@@ -30,6 +30,10 @@
 # Let users with Java 7 run ahat (b/28303627)
 LOCAL_JAVA_LANGUAGE_VERSION := 1.7
 
+# Make this available on the classpath of the general-tests tradefed suite.
+# It is used by libcore tests that run there.
+LOCAL_COMPATIBILITY_SUITE := general-tests
+
 include $(BUILD_HOST_JAVA_LIBRARY)
 
 # --- ahat script ----------------