Improve performance of invokevirtual/invokeinterface with embedded imt/vtable

Add an embedded version of imt/vtable into class object. Both tables start at
fixed offset within class object so method/entry point can be loaded directly
from class object for invokeinterface/invokevirtual.

Bug: 8142917
Change-Id: I4240d58cfbe9250107c95c0708c036854c455968
diff --git a/compiler/compiled_class.h b/compiler/compiled_class.h
new file mode 100644
index 0000000..b88d613
--- /dev/null
+++ b/compiler/compiled_class.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_COMPILED_CLASS_H_
+#define ART_COMPILER_COMPILED_CLASS_H_
+
+#include "mirror/class.h"
+
+namespace art {
+
+class CompiledClass {
+ public:
+  explicit CompiledClass(mirror::Class::Status status) : status_(status) {}
+  ~CompiledClass() {}
+  mirror::Class::Status GetStatus() const {
+    return status_;
+  }
+ private:
+  const mirror::Class::Status status_;
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_COMPILED_CLASS_H_
diff --git a/compiler/compilers.cc b/compiler/compilers.cc
index f940b54..bac1f12 100644
--- a/compiler/compilers.cc
+++ b/compiler/compilers.cc
@@ -15,6 +15,7 @@
  */
 
 #include "compilers.h"
+
 #include "dex/mir_graph.h"
 #include "dex/quick/mir_to_lir.h"
 #include "elf_writer_quick.h"
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 56986b4..3a30430 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -22,6 +22,7 @@
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "invoke_type.h"
 #include "mirror/array.h"
+#include "mirror/class-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/string.h"
 #include "mir_to_lir-inl.h"
@@ -666,25 +667,23 @@
     }
     case 1:  // Is "this" null? [use kArg1]
       cg->GenNullCheck(cg->TargetRefReg(kArg1), info->opt_flags);
-      // get this->klass_ [use kArg1, set kInvokeTgt]
+      // get this->klass_ [use kArg1, set kArg0]
       cg->LoadRefDisp(cg->TargetRefReg(kArg1), mirror::Object::ClassOffset().Int32Value(),
-                      cg->TargetPtrReg(kInvokeTgt),
+                      cg->TargetRefReg(kArg0),
                       kNotVolatile);
       cg->MarkPossibleNullPointerException(info->opt_flags);
       break;
-    case 2:  // Get this->klass_->vtable [usr kInvokeTgt, set kInvokeTgt]
-      cg->LoadRefDisp(cg->TargetPtrReg(kInvokeTgt), mirror::Class::VTableOffset().Int32Value(),
-                      cg->TargetPtrReg(kInvokeTgt),
-                      kNotVolatile);
+    case 2: {
+      // Get this->klass_.embedded_vtable[method_idx] [usr kArg0, set kArg0]
+      int32_t offset = mirror::Class::EmbeddedVTableOffset().Uint32Value() +
+          method_idx * sizeof(mirror::Class::VTableEntry);
+      // Load target method from embedded vtable to kArg0 [use kArg0, set kArg0]
+      cg->LoadRefDisp(cg->TargetRefReg(kArg0), offset, cg->TargetRefReg(kArg0), kNotVolatile);
       break;
-    case 3:  // Get target method [use kInvokeTgt, set kArg0]
-      cg->LoadRefDisp(cg->TargetPtrReg(kInvokeTgt),
-                      ObjArray::OffsetOfElement(method_idx).Int32Value(),
-                      cg->TargetRefReg(kArg0),
-                      kNotVolatile);
-      break;
-    case 4:  // Get the compiled code address [uses kArg0, sets kInvokeTgt]
+    }
+    case 3:
       if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
+        // Get the compiled code address [use kArg0, set kInvokeTgt]
         cg->LoadWordDisp(cg->TargetRefReg(kArg0),
                          mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value(),
                          cg->TargetPtrReg(kInvokeTgt));
@@ -724,27 +723,24 @@
     }
     case 2:  // Is "this" null? [use kArg1]
       cg->GenNullCheck(cg->TargetRefReg(kArg1), info->opt_flags);
-      // Get this->klass_ [use kArg1, set kInvokeTgt]
+      // Get this->klass_ [use kArg1, set kArg0]
       cg->LoadRefDisp(cg->TargetRefReg(kArg1), mirror::Object::ClassOffset().Int32Value(),
-                      cg->TargetPtrReg(kInvokeTgt),
+                      cg->TargetRefReg(kArg0),
                       kNotVolatile);
       cg->MarkPossibleNullPointerException(info->opt_flags);
       break;
-    case 3:  // Get this->klass_->imtable [use kInvokeTgt, set kInvokeTgt]
-      // NOTE: native pointer.
-      cg->LoadRefDisp(cg->TargetPtrReg(kInvokeTgt), mirror::Class::ImTableOffset().Int32Value(),
-                      cg->TargetPtrReg(kInvokeTgt),
+    case 3: {  // Get target method [use kInvokeTgt, set kArg0]
+      int32_t offset = mirror::Class::EmbeddedImTableOffset().Uint32Value() +
+          (method_idx % mirror::Class::kImtSize) * sizeof(mirror::Class::ImTableEntry);
+      // Load target method from embedded imtable to kArg0 [use kArg0, set kArg0]
+      cg->LoadRefDisp(cg->TargetRefReg(kArg0), offset,
+                      cg->TargetRefReg(kArg0),
                       kNotVolatile);
       break;
-    case 4:  // Get target method [use kInvokeTgt, set kArg0]
-      // NOTE: native pointer.
-      cg->LoadRefDisp(cg->TargetPtrReg(kInvokeTgt),
-                       ObjArray::OffsetOfElement(method_idx % ClassLinker::kImtSize).Int32Value(),
-                       cg->TargetRefReg(kArg0),
-                       kNotVolatile);
-      break;
-    case 5:  // Get the compiled code address [use kArg0, set kInvokeTgt]
+    }
+    case 4:
       if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
+        // Get the compiled code address [use kArg0, set kInvokeTgt]
         cg->LoadWordDisp(cg->TargetRefReg(kArg0),
                          mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value(),
                          cg->TargetPtrReg(kInvokeTgt));
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 4ecc5d8..1c63da4 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -19,6 +19,7 @@
 #include "codegen_x86.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "dex/reg_storage_eq.h"
+#include "mirror/art_method.h"
 #include "mirror/array.h"
 #include "x86_lir.h"
 
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index 324f717..99fcc26 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_DRIVER_COMPILER_DRIVER_INL_H_
 
 #include "compiler_driver.h"
+
 #include "dex/compiler_ir.h"
 #include "mirror/art_field.h"
 #include "mirror/art_field-inl.h"
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 9bf5135..4b4d0d0 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -25,6 +25,7 @@
 #include "base/stl_util.h"
 #include "base/timing_logger.h"
 #include "class_linker.h"
+#include "compiled_class.h"
 #include "compiler.h"
 #include "compiler_driver-inl.h"
 #include "dex_compilation_unit.h"
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 9903421..ae709f8 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -24,7 +24,6 @@
 #include "base/mutex.h"
 #include "base/timing_logger.h"
 #include "class_reference.h"
-#include "compiled_class.h"
 #include "compiled_method.h"
 #include "compiler.h"
 #include "dex_file.h"
@@ -32,6 +31,7 @@
 #include "instruction_set.h"
 #include "invoke_type.h"
 #include "method_reference.h"
+#include "mirror/class.h"  // For mirror::Class::Status.
 #include "os.h"
 #include "profiler.h"
 #include "runtime.h"
@@ -46,6 +46,7 @@
 class MethodVerifier;
 }  // namespace verifier
 
+class CompiledClass;
 class CompilerOptions;
 class DexCompilationUnit;
 class DexFileToMethodInlinerMap;
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index acfa607..38b4100 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -630,11 +630,33 @@
         mirror::Reference::ReferentOffset(), image_writer_->GetImageAddress(ref->GetReferent()));
   }
 
- private:
+ protected:
   ImageWriter* const image_writer_;
   mirror::Object* const copy_;
 };
 
+class FixupClassVisitor FINAL : public FixupVisitor {
+ public:
+  FixupClassVisitor(ImageWriter* image_writer, Object* copy) : FixupVisitor(image_writer, copy) {
+  }
+
+  void operator()(Object* obj, MemberOffset offset, bool /*is_static*/) const
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
+    DCHECK(obj->IsClass());
+    FixupVisitor::operator()(obj, offset, false);
+
+    if (offset.Uint32Value() < mirror::Class::EmbeddedVTableOffset().Uint32Value()) {
+      return;
+    }
+  }
+
+  void operator()(mirror::Class* /*klass*/, mirror::Reference* ref) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+    LOG(FATAL) << "Reference not expected here.";
+  }
+};
+
 void ImageWriter::FixupObject(Object* orig, Object* copy) {
   DCHECK(orig != nullptr);
   DCHECK(copy != nullptr);
@@ -646,13 +668,68 @@
       DCHECK_EQ(copy->GetReadBarrierPointer(), GetImageAddress(orig));
     }
   }
-  FixupVisitor visitor(this, copy);
-  orig->VisitReferences<true /*visit class*/>(visitor, visitor);
+  if (orig->IsClass() && orig->AsClass()->ShouldHaveEmbeddedImtAndVTable()) {
+    FixupClassVisitor visitor(this, copy);
+    orig->VisitReferences<true /*visit class*/>(visitor, visitor);
+  } else {
+    FixupVisitor visitor(this, copy);
+    orig->VisitReferences<true /*visit class*/>(visitor, visitor);
+  }
   if (orig->IsArtMethod<kVerifyNone>()) {
     FixupMethod(orig->AsArtMethod<kVerifyNone>(), down_cast<ArtMethod*>(copy));
   }
 }
 
+const byte* ImageWriter::GetQuickCode(mirror::ArtMethod* method, bool* quick_is_interpreted) {
+  DCHECK(!method->IsResolutionMethod() && !method->IsImtConflictMethod() &&
+         !method->IsAbstract()) << PrettyMethod(method);
+
+  // Use original code if it exists. Otherwise, set the code pointer to the resolution
+  // trampoline.
+
+  // Quick entrypoint:
+  const byte* quick_code = GetOatAddress(method->GetQuickOatCodeOffset());
+  *quick_is_interpreted = false;
+  if (quick_code != nullptr &&
+      (!method->IsStatic() || method->IsConstructor() || method->GetDeclaringClass()->IsInitialized())) {
+    // We have code for a non-static or initialized method, just use the code.
+  } else if (quick_code == nullptr && method->IsNative() &&
+      (!method->IsStatic() || method->GetDeclaringClass()->IsInitialized())) {
+    // Non-static or initialized native method missing compiled code, use generic JNI version.
+    quick_code = GetOatAddress(quick_generic_jni_trampoline_offset_);
+  } else if (quick_code == nullptr && !method->IsNative()) {
+    // We don't have code at all for a non-native method, use the interpreter.
+    quick_code = GetOatAddress(quick_to_interpreter_bridge_offset_);
+    *quick_is_interpreted = true;
+  } else {
+    CHECK(!method->GetDeclaringClass()->IsInitialized());
+    // We have code for a static method, but need to go through the resolution stub for class
+    // initialization.
+    quick_code = GetOatAddress(quick_resolution_trampoline_offset_);
+  }
+  return quick_code;
+}
+
+const byte* ImageWriter::GetQuickEntryPoint(mirror::ArtMethod* method) {
+  // Calculate the quick entry point following the same logic as FixupMethod() below.
+  // The resolution method has a special trampoline to call.
+  if (UNLIKELY(method == Runtime::Current()->GetResolutionMethod())) {
+    return GetOatAddress(quick_resolution_trampoline_offset_);
+  } else if (UNLIKELY(method == Runtime::Current()->GetImtConflictMethod())) {
+    return GetOatAddress(quick_imt_conflict_trampoline_offset_);
+  } else {
+    // We assume all methods have code. If they don't currently then we set them to the use the
+    // resolution trampoline. Abstract methods never have code and so we need to make sure their
+    // use results in an AbstractMethodError. We use the interpreter to achieve this.
+    if (UNLIKELY(method->IsAbstract())) {
+      return GetOatAddress(quick_to_interpreter_bridge_offset_);
+    } else {
+      bool quick_is_interpreted;
+      return GetQuickCode(method, &quick_is_interpreted);
+    }
+  }
+}
+
 void ImageWriter::FixupMethod(ArtMethod* orig, ArtMethod* copy) {
   // OatWriter replaces the code_ with an offset value. Here we re-adjust to a pointer relative to
   // oat_begin_
@@ -674,29 +751,8 @@
       copy->SetEntryPointFromInterpreter<kVerifyNone>(reinterpret_cast<EntryPointFromInterpreter*>
           (const_cast<byte*>(GetOatAddress(interpreter_to_interpreter_bridge_offset_))));
     } else {
-      // Use original code if it exists. Otherwise, set the code pointer to the resolution
-      // trampoline.
-
-      // Quick entrypoint:
-      const byte* quick_code = GetOatAddress(orig->GetQuickOatCodeOffset());
-      bool quick_is_interpreted = false;
-      if (quick_code != nullptr &&
-          (!orig->IsStatic() || orig->IsConstructor() || orig->GetDeclaringClass()->IsInitialized())) {
-        // We have code for a non-static or initialized method, just use the code.
-      } else if (quick_code == nullptr && orig->IsNative() &&
-          (!orig->IsStatic() || orig->GetDeclaringClass()->IsInitialized())) {
-        // Non-static or initialized native method missing compiled code, use generic JNI version.
-        quick_code = GetOatAddress(quick_generic_jni_trampoline_offset_);
-      } else if (quick_code == nullptr && !orig->IsNative()) {
-        // We don't have code at all for a non-native method, use the interpreter.
-        quick_code = GetOatAddress(quick_to_interpreter_bridge_offset_);
-        quick_is_interpreted = true;
-      } else {
-        CHECK(!orig->GetDeclaringClass()->IsInitialized());
-        // We have code for a static method, but need to go through the resolution stub for class
-        // initialization.
-        quick_code = GetOatAddress(quick_resolution_trampoline_offset_);
-      }
+      bool quick_is_interpreted;
+      const byte* quick_code = GetQuickCode(orig, &quick_is_interpreted);
       copy->SetEntryPointFromQuickCompiledCode<kVerifyNone>(quick_code);
 
       // Portable entrypoint:
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 2bcb41e..cf5bc93 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -149,6 +149,13 @@
   void FixupObject(mirror::Object* orig, mirror::Object* copy)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Get quick code for non-resolution/imt_conflict/abstract method.
+  const byte* GetQuickCode(mirror::ArtMethod* method, bool* quick_is_interpreted)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  const byte* GetQuickEntryPoint(mirror::ArtMethod* method)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // Patches references in OatFile to expect runtime addresses.
   void PatchOatCodeAndMethods(File* elf_file)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -192,6 +199,7 @@
   uint32_t quick_to_interpreter_bridge_offset_;
 
   friend class FixupVisitor;
+  friend class FixupClassVisitor;
   DISALLOW_COPY_AND_ASSIGN(ImageWriter);
 };
 
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 3bbb723..dec84f1 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -28,6 +28,7 @@
 #include "driver/compiler_driver.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "jni_internal.h"
+#include "mirror/art_method.h"
 #include "utils/assembler.h"
 #include "utils/managed_register.h"
 #include "utils/arm/managed_register_arm.h"
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 4b6d501..a21351b 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -22,6 +22,7 @@
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
+#include "compiled_class.h"
 #include "dex_file-inl.h"
 #include "dex/verification_results.h"
 #include "gc/space/space.h"