Optimize method linking

Added more inlining, removed imt array allocation and replaced it
with a handle scope. Removed some un-necessary handle scopes.

Added logic to base interface method tables from the superclass so
that we dont need to reconstruct for every interface (large win).

Facebook launch Dalvik KK MR2:
TotalTime: 3165
TotalTime: 3652
TotalTime: 3143
TotalTime: 3298
TotalTime: 3212
TotalTime: 3211

Facebook launch TOT before:
WaitTime: 3702
WaitTime: 3616
WaitTime: 3616
WaitTime: 3687
WaitTime: 3742
WaitTime: 3767

After optimizations:
WaitTime: 2903
WaitTime: 2953
WaitTime: 2918
WaitTime: 2940
WaitTime: 2879
WaitTime: 2792

LinkInterfaceMethods no longer one of the hottest methods, new list:
4.73% art::ClassLinker::LinkVirtualMethods(art::Thread*, art::Handle<art::mirror::Class>)
3.07% art::DexFile::FindClassDef(char const*) const
2.94% art::mirror::Class::FindDeclaredStaticField(art::mirror::DexCache const*, unsigned int)
2.90% art::DexFile::FindStringId(char const*) const

Bug: 18054905
Bug: 16828525

(cherry picked from commit 1fb463e42cf1d67595cff66d19c0f99e3046f4c4)

Change-Id: I27cc70178fd3655fbe5a3178887fcba189d21321
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 12bec89..83e3688 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -49,7 +49,7 @@
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   size_t SizeOf() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  int32_t GetLength() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ALWAYS_INLINE int32_t GetLength() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Array, length_));
   }
 
@@ -82,7 +82,7 @@
   // Returns true if the index is valid. If not, throws an ArrayIndexOutOfBoundsException and
   // returns false.
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  bool CheckIsValidIndex(int32_t index) ALWAYS_INLINE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ALWAYS_INLINE bool CheckIsValidIndex(int32_t index) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  protected:
   void ThrowArrayStoreException(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/mirror/art_method-inl.h b/runtime/mirror/art_method-inl.h
index d262fd5..cb6ac4f 100644
--- a/runtime/mirror/art_method-inl.h
+++ b/runtime/mirror/art_method-inl.h
@@ -291,6 +291,13 @@
   return result;
 }
 
+inline bool ArtMethod::IsImtUnimplementedMethod() {
+  bool result = this == Runtime::Current()->GetImtUnimplementedMethod();
+  // Check that if we do think it is phony it looks like the imt unimplemented method.
+  DCHECK(!result || IsRuntimeMethod());
+  return result;
+}
+
 inline uintptr_t ArtMethod::NativeQuickPcOffset(const uintptr_t pc) {
   const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(this);
   return pc - reinterpret_cast<uintptr_t>(code);
@@ -309,7 +316,7 @@
 }
 
 inline const DexFile* ArtMethod::GetDexFile() {
-  return GetInterfaceMethodIfProxy()->GetDeclaringClass()->GetDexCache()->GetDexFile();
+  return GetDexCache()->GetDexFile();
 }
 
 inline const char* ArtMethod::GetDeclaringClassDescriptor() {
@@ -441,6 +448,21 @@
   return interface_method;
 }
 
+inline void ArtMethod::SetDexCacheStrings(ObjectArray<String>* new_dex_cache_strings) {
+  SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_cache_strings_),
+                        new_dex_cache_strings);
+}
+
+inline void ArtMethod::SetDexCacheResolvedMethods(ObjectArray<ArtMethod>* new_dex_cache_methods) {
+  SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_cache_resolved_methods_),
+                        new_dex_cache_methods);
+}
+
+inline void ArtMethod::SetDexCacheResolvedTypes(ObjectArray<Class>* new_dex_cache_classes) {
+  SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_cache_resolved_types_),
+                        new_dex_cache_classes);
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index b219004..014d1a9 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -91,21 +91,6 @@
   java_lang_reflect_ArtMethod_ = GcRoot<Class>(nullptr);
 }
 
-void ArtMethod::SetDexCacheStrings(ObjectArray<String>* new_dex_cache_strings) {
-  SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_cache_strings_),
-                        new_dex_cache_strings);
-}
-
-void ArtMethod::SetDexCacheResolvedMethods(ObjectArray<ArtMethod>* new_dex_cache_methods) {
-  SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_cache_resolved_methods_),
-                        new_dex_cache_methods);
-}
-
-void ArtMethod::SetDexCacheResolvedTypes(ObjectArray<Class>* new_dex_cache_classes) {
-  SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_cache_resolved_types_),
-                        new_dex_cache_classes);
-}
-
 size_t ArtMethod::NumArgRegisters(const StringPiece& shorty) {
   CHECK_LE(1U, shorty.length());
   uint32_t num_registers = 0;
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index 3b92012..92b2c30 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -65,7 +65,7 @@
     return MemberOffset(OFFSETOF_MEMBER(ArtMethod, declaring_class_));
   }
 
-  uint32_t GetAccessFlags() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ALWAYS_INLINE uint32_t GetAccessFlags() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetAccessFlags(uint32_t new_access_flags) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Not called within a transaction.
@@ -203,7 +203,7 @@
   // Number of 32bit registers that would be required to hold all the arguments
   static size_t NumArgRegisters(const StringPiece& shorty);
 
-  uint32_t GetDexMethodIndex() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ALWAYS_INLINE uint32_t GetDexMethodIndex() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetDexMethodIndex(uint32_t new_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Not called within a transaction.
@@ -226,11 +226,11 @@
     return OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_cache_resolved_types_);
   }
 
-  ArtMethod* GetDexCacheResolvedMethod(uint16_t method_idx)
+  ALWAYS_INLINE ArtMethod* GetDexCacheResolvedMethod(uint16_t method_idx)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void SetDexCacheResolvedMethod(uint16_t method_idx, ArtMethod* new_method)
+  ALWAYS_INLINE void SetDexCacheResolvedMethod(uint16_t method_idx, ArtMethod* new_method)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void SetDexCacheResolvedMethods(ObjectArray<ArtMethod>* new_dex_cache_methods)
+  ALWAYS_INLINE void SetDexCacheResolvedMethods(ObjectArray<ArtMethod>* new_dex_cache_methods)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   bool HasDexCacheResolvedMethods() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   bool HasSameDexCacheResolvedMethods(ArtMethod* other) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -432,6 +432,8 @@
 
   bool IsImtConflictMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  bool IsImtUnimplementedMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   uintptr_t NativeQuickPcOffset(const uintptr_t pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 #ifdef NDEBUG
   uintptr_t NativeQuickPcOffset(const uintptr_t pc, const void* quick_entry_point)
@@ -564,9 +566,11 @@
   static GcRoot<Class> java_lang_reflect_ArtMethod_;
 
  private:
-  ObjectArray<ArtMethod>* GetDexCacheResolvedMethods() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ALWAYS_INLINE ObjectArray<ArtMethod>* GetDexCacheResolvedMethods()
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ObjectArray<Class>* GetDexCacheResolvedTypes() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ALWAYS_INLINE ObjectArray<Class>* GetDexCacheResolvedTypes()
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   friend struct art::ArtMethodOffsets;  // for verifying offset information
   DISALLOW_IMPLICIT_CONSTRUCTORS(ArtMethod);
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index e3295ef..892bf44 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -114,19 +114,19 @@
 inline ArtMethod* Class::GetVirtualMethod(uint32_t i) {
   DCHECK(IsResolved<kVerifyFlags>() || IsErroneous<kVerifyFlags>())
       << PrettyClass(this) << " status=" << GetStatus();
-  return GetVirtualMethods()->Get(i);
+  return GetVirtualMethods()->GetWithoutChecks(i);
 }
 
 inline ArtMethod* Class::GetVirtualMethodDuringLinking(uint32_t i) {
   DCHECK(IsLoaded() || IsErroneous());
-  return GetVirtualMethods()->Get(i);
+  return GetVirtualMethods()->GetWithoutChecks(i);
 }
 
 inline void Class::SetVirtualMethod(uint32_t i, ArtMethod* f)  // TODO: uint16_t
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ObjectArray<ArtMethod>* virtual_methods =
       GetFieldObject<ObjectArray<ArtMethod>>(OFFSET_OF_OBJECT_MEMBER(Class, virtual_methods_));
-  virtual_methods->Set<false>(i, f);
+  virtual_methods->SetWithoutChecks<false>(i, f);
 }
 
 inline ObjectArray<ArtMethod>* Class::GetVTable() {
@@ -143,14 +143,6 @@
   SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, vtable_), new_vtable);
 }
 
-inline ObjectArray<ArtMethod>* Class::GetImTable() {
-  return GetFieldObject<ObjectArray<ArtMethod>>(OFFSET_OF_OBJECT_MEMBER(Class, imtable_));
-}
-
-inline void Class::SetImTable(ObjectArray<ArtMethod>* new_imtable) {
-  SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, imtable_), new_imtable);
-}
-
 inline ArtMethod* Class::GetEmbeddedImTableEntry(uint32_t i) {
   uint32_t offset = EmbeddedImTableOffset().Uint32Value() + i * sizeof(ImTableEntry);
   return GetFieldObject<mirror::ArtMethod>(MemberOffset(offset));
@@ -159,7 +151,6 @@
 inline void Class::SetEmbeddedImTableEntry(uint32_t i, ArtMethod* method) {
   uint32_t offset = EmbeddedImTableOffset().Uint32Value() + i * sizeof(ImTableEntry);
   SetFieldObject<false>(MemberOffset(offset), method);
-  CHECK(method == GetImTable()->Get(i));
 }
 
 inline bool Class::HasVTable() {
@@ -761,6 +752,24 @@
   }
 }
 
+inline uint32_t Class::NumDirectInterfaces() {
+  if (IsPrimitive()) {
+    return 0;
+  } else if (IsArrayClass()) {
+    return 2;
+  } else if (IsProxyClass()) {
+    mirror::ObjectArray<mirror::Class>* interfaces = GetInterfaces();
+    return interfaces != nullptr ? interfaces->GetLength() : 0;
+  } else {
+    const DexFile::TypeList* interfaces = GetInterfaceTypeList();
+    if (interfaces == nullptr) {
+      return 0;
+    } else {
+      return interfaces->Size();
+    }
+  }
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 6df7204..8eafd6f 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -738,24 +738,6 @@
   return &GetDexFile().GetClassDef(class_def_idx);
 }
 
-uint32_t Class::NumDirectInterfaces() {
-  if (IsPrimitive()) {
-    return 0;
-  } else if (IsArrayClass()) {
-    return 2;
-  } else if (IsProxyClass()) {
-    mirror::ObjectArray<mirror::Class>* interfaces = GetInterfaces();
-    return interfaces != nullptr ? interfaces->GetLength() : 0;
-  } else {
-    const DexFile::TypeList* interfaces = GetInterfaceTypeList();
-    if (interfaces == nullptr) {
-      return 0;
-    } else {
-      return interfaces->Size();
-    }
-  }
-}
-
 uint16_t Class::GetDirectInterfaceTypeIdx(uint32_t idx) {
   DCHECK(!IsPrimitive());
   DCHECK(!IsArrayClass());
@@ -817,22 +799,21 @@
   return GetDexFile().GetInterfacesList(*class_def);
 }
 
-void Class::PopulateEmbeddedImtAndVTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  ObjectArray<ArtMethod>* table = GetImTable();
-  if (table != nullptr) {
-    for (uint32_t i = 0; i < kImtSize; i++) {
-      SetEmbeddedImTableEntry(i, table->Get(i));
-    }
+void Class::PopulateEmbeddedImtAndVTable(StackHandleScope<kImtSize>* imt_handle_scope) {
+  for (uint32_t i = 0; i < kImtSize; i++) {
+    // Replace null with conflict.
+    mirror::Object* obj = imt_handle_scope->GetReference(i);
+    DCHECK(obj != nullptr);
+    SetEmbeddedImTableEntry(i, obj->AsArtMethod());
   }
 
-  table = GetVTableDuringLinking();
+  ObjectArray<ArtMethod>* table = GetVTableDuringLinking();
   CHECK(table != nullptr) << PrettyClass(this);
   SetEmbeddedVTableLength(table->GetLength());
   for (int32_t i = 0; i < table->GetLength(); i++) {
-    SetEmbeddedVTableEntry(i, table->Get(i));
+    SetEmbeddedVTableEntry(i, table->GetWithoutChecks(i));
   }
 
-  SetImTable(nullptr);
   // Keep java.lang.Object class's vtable around for since it's easier
   // to be reused by array classes during their linking.
   if (!IsObjectClass()) {
@@ -844,9 +825,10 @@
 class CopyClassVisitor {
  public:
   explicit CopyClassVisitor(Thread* self, Handle<mirror::Class>* orig,
-                            size_t new_length, size_t copy_bytes)
+                            size_t new_length, size_t copy_bytes,
+                            StackHandleScope<mirror::Class::kImtSize>* imt_handle_scope)
       : self_(self), orig_(orig), new_length_(new_length),
-        copy_bytes_(copy_bytes) {
+        copy_bytes_(copy_bytes), imt_handle_scope_(imt_handle_scope) {
   }
 
   void operator()(Object* obj, size_t usable_size) const
@@ -855,7 +837,7 @@
     mirror::Class* new_class_obj = obj->AsClass();
     mirror::Object::CopyObject(self_, new_class_obj, orig_->Get(), copy_bytes_);
     new_class_obj->SetStatus(Class::kStatusResolving, self_);
-    new_class_obj->PopulateEmbeddedImtAndVTable();
+    new_class_obj->PopulateEmbeddedImtAndVTable(imt_handle_scope_);
     new_class_obj->SetClassSize(new_length_);
   }
 
@@ -864,10 +846,12 @@
   Handle<mirror::Class>* const orig_;
   const size_t new_length_;
   const size_t copy_bytes_;
+  StackHandleScope<mirror::Class::kImtSize>* const imt_handle_scope_;
   DISALLOW_COPY_AND_ASSIGN(CopyClassVisitor);
 };
 
-Class* Class::CopyOf(Thread* self, int32_t new_length) {
+Class* Class::CopyOf(Thread* self, int32_t new_length,
+                     StackHandleScope<kImtSize>* imt_handle_scope) {
   DCHECK_GE(new_length, static_cast<int32_t>(sizeof(Class)));
   // We may get copied by a compacting GC.
   StackHandleScope<1> hs(self);
@@ -875,17 +859,15 @@
   gc::Heap* heap = Runtime::Current()->GetHeap();
   // The num_bytes (3rd param) is sizeof(Class) as opposed to SizeOf()
   // to skip copying the tail part that we will overwrite here.
-  CopyClassVisitor visitor(self, &h_this, new_length, sizeof(Class));
-
+  CopyClassVisitor visitor(self, &h_this, new_length, sizeof(Class), imt_handle_scope);
   mirror::Object* new_class =
       kMovingClasses
          ? heap->AllocObject<true>(self, java_lang_Class_.Read(), new_length, visitor)
          : heap->AllocNonMovableObject<true>(self, java_lang_Class_.Read(), new_length, visitor);
   if (UNLIKELY(new_class == nullptr)) {
     CHECK(self->IsExceptionPending());  // Expect an OOME.
-    return NULL;
+    return nullptr;
   }
-
   return new_class->AsClass();
 }
 
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index c06071b..6a7faaa 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -36,6 +36,7 @@
 template<class T> class Handle;
 class Signature;
 class StringPiece;
+template<size_t kNumReferences> class PACKED(4) StackHandleScope;
 
 namespace mirror {
 
@@ -196,46 +197,46 @@
   }
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  uint32_t GetAccessFlags() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ALWAYS_INLINE uint32_t GetAccessFlags() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetAccessFlags(uint32_t new_access_flags) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Returns true if the class is an interface.
-  bool IsInterface() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ALWAYS_INLINE bool IsInterface() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccInterface) != 0;
   }
 
   // Returns true if the class is declared public.
-  bool IsPublic() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ALWAYS_INLINE bool IsPublic() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccPublic) != 0;
   }
 
   // Returns true if the class is declared final.
-  bool IsFinal() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ALWAYS_INLINE bool IsFinal() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccFinal) != 0;
   }
 
-  bool IsFinalizable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ALWAYS_INLINE bool IsFinalizable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccClassIsFinalizable) != 0;
   }
 
-  void SetFinalizable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ALWAYS_INLINE void SetFinalizable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     uint32_t flags = GetField32(OFFSET_OF_OBJECT_MEMBER(Class, access_flags_));
     SetAccessFlags(flags | kAccClassIsFinalizable);
   }
 
   // Returns true if the class is abstract.
-  bool IsAbstract() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ALWAYS_INLINE bool IsAbstract() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccAbstract) != 0;
   }
 
   // Returns true if the class is an annotation.
-  bool IsAnnotation() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ALWAYS_INLINE bool IsAnnotation() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccAnnotation) != 0;
   }
 
   // Returns true if the class is synthetic.
-  bool IsSynthetic() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ALWAYS_INLINE bool IsSynthetic() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccSynthetic) != 0;
   }
 
@@ -592,7 +593,7 @@
   // downcast would be necessary. Similarly for interfaces, a class that implements (or an interface
   // that extends) another can be assigned to its parent, but not vice-versa. All Classes may assign
   // to themselves. Classes for primitive types may not assign to each other.
-  inline bool IsAssignableFrom(Class* src) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ALWAYS_INLINE bool IsAssignableFrom(Class* src) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(src != NULL);
     if (this == src) {
       // Can always assign to things of the same type.
@@ -609,7 +610,7 @@
     }
   }
 
-  Class* GetSuperClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ALWAYS_INLINE Class* GetSuperClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetSuperClass(Class *new_super_class) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Super class is assigned once, except during class linker initialization.
@@ -648,12 +649,13 @@
 
   void SetDexCache(DexCache* new_dex_cache) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ObjectArray<ArtMethod>* GetDirectMethods() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ALWAYS_INLINE ObjectArray<ArtMethod>* GetDirectMethods()
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetDirectMethods(ObjectArray<ArtMethod>* new_direct_methods)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* GetDirectMethod(int32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ALWAYS_INLINE ArtMethod* GetDirectMethod(int32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetDirectMethod(uint32_t i, ArtMethod* f)  // TODO: uint16_t
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -662,13 +664,14 @@
   uint32_t NumDirectMethods() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  ObjectArray<ArtMethod>* GetVirtualMethods() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ALWAYS_INLINE ObjectArray<ArtMethod>* GetVirtualMethods()
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SetVirtualMethods(ObjectArray<ArtMethod>* new_virtual_methods)
+  ALWAYS_INLINE void SetVirtualMethods(ObjectArray<ArtMethod>* new_virtual_methods)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Returns the number of non-inherited virtual methods.
-  uint32_t NumVirtualMethods() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ALWAYS_INLINE uint32_t NumVirtualMethods() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   ArtMethod* GetVirtualMethod(uint32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -678,9 +681,10 @@
   void SetVirtualMethod(uint32_t i, ArtMethod* f)  // TODO: uint16_t
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ObjectArray<ArtMethod>* GetVTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ALWAYS_INLINE ObjectArray<ArtMethod>* GetVTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ObjectArray<ArtMethod>* GetVTableDuringLinking() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ALWAYS_INLINE ObjectArray<ArtMethod>* GetVTableDuringLinking()
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetVTable(ObjectArray<ArtMethod>* new_vtable)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -689,13 +693,6 @@
     return OFFSET_OF_OBJECT_MEMBER(Class, vtable_);
   }
 
-  void SetImTable(ObjectArray<ArtMethod>* new_imtable)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  static MemberOffset ImTableOffset() {
-    return OFFSET_OF_OBJECT_MEMBER(Class, imtable_);
-  }
-
   static MemberOffset EmbeddedImTableOffset() {
     return MemberOffset(sizeof(Class));
   }
@@ -705,7 +702,7 @@
   }
 
   static MemberOffset EmbeddedVTableOffset() {
-    return MemberOffset(sizeof(Class) + kImtSize * sizeof(mirror::Class::ImTableEntry) + sizeof(int32_t));
+    return MemberOffset(sizeof(Class) + kImtSize * sizeof(ImTableEntry) + sizeof(int32_t));
   }
 
   bool ShouldHaveEmbeddedImtAndVTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -730,7 +727,8 @@
 
   void SetEmbeddedVTableEntry(uint32_t i, ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void PopulateEmbeddedImtAndVTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void PopulateEmbeddedImtAndVTable(StackHandleScope<kImtSize>* imt_handle_scope)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Given a method implemented by this class but potentially from a super class, return the
   // specific implementation method for this class.
@@ -798,11 +796,11 @@
 
   ArtMethod* FindClassInitializer() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  int32_t GetIfTableCount() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ALWAYS_INLINE int32_t GetIfTableCount() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  IfTable* GetIfTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ALWAYS_INLINE IfTable* GetIfTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SetIfTable(IfTable* new_iftable) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ALWAYS_INLINE void SetIfTable(IfTable* new_iftable) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Get instance fields of the class (See also GetSFields).
   ObjectArray<ArtField>* GetIFields() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -970,7 +968,7 @@
 
   const DexFile::ClassDef* GetClassDef() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  uint32_t NumDirectInterfaces() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ALWAYS_INLINE uint32_t NumDirectInterfaces() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   uint16_t GetDirectInterfaceTypeIdx(uint32_t idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -990,7 +988,7 @@
   void AssertInitializedOrInitializingInThread(Thread* self)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  Class* CopyOf(Thread* self, int32_t new_length)
+  Class* CopyOf(Thread* self, int32_t new_length, StackHandleScope<kImtSize>* imt_handle_scope)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // For proxy class only.
@@ -1039,8 +1037,6 @@
 
   void CheckObjectAlloc() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ObjectArray<ArtMethod>* GetImTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   // defining class loader, or NULL for the "bootstrap" system loader
   HeapReference<ClassLoader> class_loader_;
 
diff --git a/runtime/mirror/iftable-inl.h b/runtime/mirror/iftable-inl.h
index 3f20bf4..d1309d2 100644
--- a/runtime/mirror/iftable-inl.h
+++ b/runtime/mirror/iftable-inl.h
@@ -27,7 +27,7 @@
   DCHECK(interface->IsInterface());
   const size_t idx = i * kMax + kInterface;
   DCHECK_EQ(Get(idx), static_cast<Object*>(nullptr));
-  Set<false>(idx, interface);
+  SetWithoutChecks<false>(idx, interface);
 }
 
 }  // namespace mirror
diff --git a/runtime/mirror/iftable.h b/runtime/mirror/iftable.h
index 5feb602..4d899d2 100644
--- a/runtime/mirror/iftable.h
+++ b/runtime/mirror/iftable.h
@@ -25,13 +25,14 @@
 
 class MANAGED IfTable FINAL : public ObjectArray<Object> {
  public:
-  Class* GetInterface(int32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    Class* interface = Get((i * kMax) + kInterface)->AsClass();
+  ALWAYS_INLINE Class* GetInterface(int32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Class* interface = GetWithoutChecks((i * kMax) + kInterface)->AsClass();
     DCHECK(interface != NULL);
     return interface;
   }
 
-  void SetInterface(int32_t i, Class* interface) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ALWAYS_INLINE void SetInterface(int32_t i, Class* interface)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   ObjectArray<ArtMethod>* GetMethodArray(int32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     ObjectArray<ArtMethod>* method_array =
diff --git a/runtime/mirror/object_array.h b/runtime/mirror/object_array.h
index 7012b19..6404faf 100644
--- a/runtime/mirror/object_array.h
+++ b/runtime/mirror/object_array.h
@@ -45,11 +45,11 @@
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   bool CheckAssignable(T* object) NO_THREAD_SAFETY_ANALYSIS;
 
-  void Set(int32_t i, T* object) ALWAYS_INLINE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ALWAYS_INLINE void Set(int32_t i, T* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   // TODO fix thread safety analysis: should be SHARED_LOCKS_REQUIRED(Locks::mutator_lock_).
   template<bool kTransactionActive, bool kCheckTransaction = true,
       VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  void Set(int32_t i, T* object) ALWAYS_INLINE NO_THREAD_SAFETY_ANALYSIS;
+  ALWAYS_INLINE void Set(int32_t i, T* object) NO_THREAD_SAFETY_ANALYSIS;
 
   // Set element without bound and element type checks, to be used in limited
   // circumstances, such as during boot image writing.
@@ -57,15 +57,15 @@
   // SHARED_LOCKS_REQUIRED(Locks::mutator_lock_).
   template<bool kTransactionActive, bool kCheckTransaction = true,
       VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  void SetWithoutChecks(int32_t i, T* object) ALWAYS_INLINE NO_THREAD_SAFETY_ANALYSIS;
+  ALWAYS_INLINE void SetWithoutChecks(int32_t i, T* object) NO_THREAD_SAFETY_ANALYSIS;
   // TODO fix thread safety analysis broken by the use of template. This should be
   // SHARED_LOCKS_REQUIRED(Locks::mutator_lock_).
   template<bool kTransactionActive, bool kCheckTransaction = true,
       VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  void SetWithoutChecksAndWriteBarrier(int32_t i, T* object) ALWAYS_INLINE
+  ALWAYS_INLINE void SetWithoutChecksAndWriteBarrier(int32_t i, T* object)
       NO_THREAD_SAFETY_ANALYSIS;
 
-  T* GetWithoutChecks(int32_t i) ALWAYS_INLINE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ALWAYS_INLINE T* GetWithoutChecks(int32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Copy src into this array (dealing with overlaps as memmove does) without assignability checks.
   void AssignableMemmove(int32_t dst_pos, ObjectArray<T>* src, int32_t src_pos,