Improve stack walk performance.

Move a few functions from art_method.cc to art_method-inl.h
and introduce new overloads that take already known partial
results; do not rely on the compiler to magically merge the
identical but non-trivial paths. The partial results are
DCHECKed to be correct.

Change-Id: I342c3001bbff08a2bbbb9a7b62ae67188ad8cffc
diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc
index 2d5c07d..c38a595 100644
--- a/runtime/entrypoints/quick/quick_field_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc
@@ -242,9 +242,11 @@
 extern "C" int artSet64InstanceFromCode(uint32_t field_idx, mirror::Object* obj, uint64_t new_value,
                                         Thread* self, mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  mirror::ArtMethod* callee_save = Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsOnly);
-  mirror::ArtMethod* referrer =
-      sp[callee_save->GetFrameSizeInBytes() / sizeof(mirror::ArtMethod*)];
+  Runtime* runtime = Runtime::Current();
+  mirror::ArtMethod* callee_save = runtime->GetCalleeSaveMethod(Runtime::kRefsOnly);
+  uint32_t frame_size =
+      runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsOnly).FrameSizeInBytes();
+  mirror::ArtMethod* referrer = sp[frame_size / sizeof(mirror::ArtMethod*)];
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
                                           sizeof(int64_t));
   if (LIKELY(field != NULL  && obj != NULL)) {
diff --git a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
index 60c5377..11a4b3b 100644
--- a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
@@ -48,10 +48,13 @@
   //       stack.
   // Be aware the store below may well stomp on an incoming argument.
   Locks::mutator_lock_->AssertSharedHeld(self);
-  mirror::ArtMethod* callee_save = Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsOnly);
+  Runtime* runtime = Runtime::Current();
+  mirror::ArtMethod* callee_save = runtime->GetCalleeSaveMethod(Runtime::kRefsOnly);
   *sp = callee_save;
+  uint32_t return_pc_offset = callee_save->GetReturnPcOffsetInBytes(
+      runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsOnly).FrameSizeInBytes());
   uintptr_t* return_pc = reinterpret_cast<uintptr_t*>(reinterpret_cast<byte*>(sp) +
-                                                      callee_save->GetReturnPcOffsetInBytes());
+                                                      return_pc_offset);
   CHECK_EQ(*return_pc, 0U);
   self->SetTopOfStack(sp, 0);
   self->VerifyStack();
diff --git a/runtime/mirror/art_method-inl.h b/runtime/mirror/art_method-inl.h
index b7ca188..39efa58 100644
--- a/runtime/mirror/art_method-inl.h
+++ b/runtime/mirror/art_method-inl.h
@@ -153,6 +153,65 @@
   SetEntryPointFromPortableCompiledCode(reinterpret_cast<void*>(code_offset));
 }
 
+inline const void* ArtMethod::GetQuickOatEntryPoint() {
+  if (IsPortableCompiled() || IsAbstract() || IsRuntimeMethod() || IsProxyMethod()) {
+    return nullptr;
+  }
+  Runtime* runtime = Runtime::Current();
+  const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(this);
+  // On failure, instead of nullptr we get the quick-generic-jni-trampoline for native method
+  // indicating the generic JNI, or the quick-to-interpreter-bridge (but not the trampoline)
+  // for non-native methods.
+  DCHECK(entry_point != GetQuickToInterpreterBridgeTrampoline(runtime->GetClassLinker()));
+  if (UNLIKELY(entry_point == GetQuickToInterpreterBridge()) ||
+      UNLIKELY(entry_point == runtime->GetClassLinker()->GetQuickGenericJniTrampoline())) {
+    return nullptr;
+  }
+  return entry_point;
+}
+
+inline const void* ArtMethod::GetQuickOatCodePointer() {
+  return EntryPointToCodePointer(GetQuickOatEntryPoint());
+}
+
+inline const uint8_t* ArtMethod::GetMappingTable() {
+  const void* code_pointer = GetQuickOatCodePointer();
+  if (code_pointer == nullptr) {
+    return nullptr;
+  }
+  return GetMappingTable(code_pointer);
+}
+
+inline const uint8_t* ArtMethod::GetMappingTable(const void* code_pointer) {
+  DCHECK(code_pointer != nullptr);
+  DCHECK(code_pointer == GetQuickOatCodePointer());
+  uint32_t offset =
+      reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].mapping_table_offset_;
+  if (UNLIKELY(offset == 0u)) {
+    return nullptr;
+  }
+  return reinterpret_cast<const uint8_t*>(code_pointer) - offset;
+}
+
+inline const uint8_t* ArtMethod::GetVmapTable() {
+  const void* code_pointer = GetQuickOatCodePointer();
+  if (code_pointer == nullptr) {
+    return nullptr;
+  }
+  return GetVmapTable(code_pointer);
+}
+
+inline const uint8_t* ArtMethod::GetVmapTable(const void* code_pointer) {
+  DCHECK(code_pointer != nullptr);
+  DCHECK(code_pointer == GetQuickOatCodePointer());
+  uint32_t offset =
+      reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].vmap_table_offset_;
+  if (UNLIKELY(offset == 0u)) {
+    return nullptr;
+  }
+  return reinterpret_cast<const uint8_t*>(code_pointer) - offset;
+}
+
 inline void ArtMethod::SetOatNativeGcMapOffset(uint32_t gc_map_offset) {
   DCHECK(!Runtime::Current()->IsStarted());
   SetNativeGcMap(reinterpret_cast<uint8_t*>(gc_map_offset));
@@ -196,6 +255,17 @@
   return result;
 }
 
+inline uintptr_t ArtMethod::NativePcOffset(const uintptr_t pc) {
+  const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(this);
+  return pc - reinterpret_cast<uintptr_t>(code);
+}
+
+inline uintptr_t ArtMethod::NativePcOffset(const uintptr_t pc, const void* quick_entry_point) {
+  DCHECK(quick_entry_point != GetQuickToInterpreterBridge());
+  DCHECK(quick_entry_point == Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(this));
+  return pc - reinterpret_cast<uintptr_t>(quick_entry_point);
+}
+
 template<VerifyObjectFlags kVerifyFlags>
 inline void ArtMethod::SetNativeMethod(const void* native_method) {
   SetFieldPtr<false, true, kVerifyFlags>(
@@ -233,6 +303,12 @@
   }
 
   const void* code_pointer = EntryPointToCodePointer(entry_point);
+  return GetQuickFrameInfo(code_pointer);
+}
+
+inline QuickMethodFrameInfo ArtMethod::GetQuickFrameInfo(const void* code_pointer) {
+  DCHECK(code_pointer != nullptr);
+  DCHECK(code_pointer == GetQuickOatCodePointer());
   return reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].frame_info_;
 }
 
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index 6af4cdb..af544fd 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -165,23 +165,21 @@
   return result;
 }
 
-uintptr_t ArtMethod::NativePcOffset(const uintptr_t pc) {
-  const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(this);
-  return pc - reinterpret_cast<uintptr_t>(code);
-}
-
 uint32_t ArtMethod::ToDexPc(const uintptr_t pc, bool abort_on_failure) {
   if (IsPortableCompiled()) {
     // Portable doesn't use the machine pc, we just use dex pc instead.
     return static_cast<uint32_t>(pc);
   }
-  MappingTable table(GetMappingTable());
+  const void* entry_point = GetQuickOatEntryPoint();
+  MappingTable table(
+      entry_point != nullptr ? GetMappingTable(EntryPointToCodePointer(entry_point)) : nullptr);
   if (table.TotalSize() == 0) {
+    // NOTE: Special methods (see Mir2Lir::GenSpecialCase()) have an empty mapping
+    // but they have no suspend checks and, consequently, we never call ToDexPc() for them.
     DCHECK(IsNative() || IsCalleeSaveMethod() || IsProxyMethod()) << PrettyMethod(this);
     return DexFile::kDexNoIndex;   // Special no mapping case
   }
-  const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(this);
-  uint32_t sought_offset = pc - reinterpret_cast<uintptr_t>(code);
+  uint32_t sought_offset = pc - reinterpret_cast<uintptr_t>(entry_point);
   // Assume the caller wants a pc-to-dex mapping so check here first.
   typedef MappingTable::PcToDexIterator It;
   for (It cur = table.PcToDexBegin(), end = table.PcToDexEnd(); cur != end; ++cur) {
@@ -198,14 +196,16 @@
   }
   if (abort_on_failure) {
       LOG(FATAL) << "Failed to find Dex offset for PC offset " << reinterpret_cast<void*>(sought_offset)
-             << "(PC " << reinterpret_cast<void*>(pc) << ", code=" << code
+             << "(PC " << reinterpret_cast<void*>(pc) << ", entry_point=" << entry_point
              << ") in " << PrettyMethod(this);
   }
   return DexFile::kDexNoIndex;
 }
 
 uintptr_t ArtMethod::ToNativePc(const uint32_t dex_pc) {
-  MappingTable table(GetMappingTable());
+  const void* entry_point = GetQuickOatEntryPoint();
+  MappingTable table(
+      entry_point != nullptr ? GetMappingTable(EntryPointToCodePointer(entry_point)) : nullptr);
   if (table.TotalSize() == 0) {
     DCHECK_EQ(dex_pc, 0U);
     return 0;   // Special no mapping/pc == 0 case
@@ -214,16 +214,14 @@
   typedef MappingTable::DexToPcIterator It;
   for (It cur = table.DexToPcBegin(), end = table.DexToPcEnd(); cur != end; ++cur) {
     if (cur.DexPc() == dex_pc) {
-      const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(this);
-      return reinterpret_cast<uintptr_t>(code) + cur.NativePcOffset();
+      return reinterpret_cast<uintptr_t>(entry_point) + cur.NativePcOffset();
     }
   }
   // Now check pc-to-dex mappings.
   typedef MappingTable::PcToDexIterator It2;
   for (It2 cur = table.PcToDexBegin(), end = table.PcToDexEnd(); cur != end; ++cur) {
     if (cur.DexPc() == dex_pc) {
-      const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(this);
-      return reinterpret_cast<uintptr_t>(code) + cur.NativePcOffset();
+      return reinterpret_cast<uintptr_t>(entry_point) + cur.NativePcOffset();
     }
   }
   LOG(FATAL) << "Failed to find native offset for dex pc 0x" << std::hex << dex_pc
@@ -379,43 +377,5 @@
   RegisterNative(self, GetJniDlsymLookupStub(), false);
 }
 
-const void* ArtMethod::GetOatCodePointer() {
-  if (IsPortableCompiled() || IsNative() || IsAbstract() || IsRuntimeMethod() || IsProxyMethod()) {
-    return nullptr;
-  }
-  Runtime* runtime = Runtime::Current();
-  const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(this);
-  // On failure, instead of nullptr we get the quick-to-interpreter-bridge (but not the trampoline).
-  DCHECK(entry_point != GetQuickToInterpreterBridgeTrampoline(runtime->GetClassLinker()));
-  if (entry_point == GetQuickToInterpreterBridge()) {
-    return nullptr;
-  }
-  return EntryPointToCodePointer(entry_point);
-}
-
-const uint8_t* ArtMethod::GetMappingTable() {
-  const void* code = GetOatCodePointer();
-  if (code == nullptr) {
-    return nullptr;
-  }
-  uint32_t offset = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].mapping_table_offset_;
-  if (UNLIKELY(offset == 0u)) {
-    return nullptr;
-  }
-  return reinterpret_cast<const uint8_t*>(code) - offset;
-}
-
-const uint8_t* ArtMethod::GetVmapTable() {
-  const void* code = GetOatCodePointer();
-  if (code == nullptr) {
-    return nullptr;
-  }
-  uint32_t offset = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].vmap_table_offset_;
-  if (UNLIKELY(offset == 0u)) {
-    return nullptr;
-  }
-  return reinterpret_cast<const uint8_t*>(code) - offset;
-}
-
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index bb43328..1c2954e 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -297,14 +297,20 @@
     return reinterpret_cast<const void*>(code);
   }
 
+  // Actual entry point pointer to compiled oat code or nullptr.
+  const void* GetQuickOatEntryPoint() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   // Actual pointer to compiled oat code or nullptr.
-  const void* GetOatCodePointer() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const void* GetQuickOatCodePointer() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Callers should wrap the uint8_t* in a MappingTable instance for convenient access.
   const uint8_t* GetMappingTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const uint8_t* GetMappingTable(const void* code_pointer)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Callers should wrap the uint8_t* in a VmapTable instance for convenient access.
   const uint8_t* GetVmapTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const uint8_t* GetVmapTable(const void* code_pointer)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   const uint8_t* GetNativeGcMap() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetFieldPtr<uint8_t*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, gc_map_));
@@ -328,9 +334,17 @@
   }
 
   QuickMethodFrameInfo GetQuickFrameInfo() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  QuickMethodFrameInfo GetQuickFrameInfo(const void* code_pointer)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   size_t GetReturnPcOffsetInBytes() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFrameSizeInBytes() - kPointerSize;
+    return GetReturnPcOffsetInBytes(GetFrameSizeInBytes());
+  }
+
+  size_t GetReturnPcOffsetInBytes(uint32_t frame_size_in_bytes)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK_EQ(frame_size_in_bytes, GetFrameSizeInBytes());
+    return frame_size_in_bytes - kPointerSize;
   }
 
   size_t GetHandleScopeOffsetInBytes() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -371,6 +385,8 @@
   bool IsImtConflictMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   uintptr_t NativePcOffset(const uintptr_t pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  uintptr_t NativePcOffset(const uintptr_t pc, const void* quick_entry_point)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Converts a native PC to a dex PC.
   uint32_t ToDexPc(const uintptr_t pc, bool abort_on_failure = true)
diff --git a/runtime/stack.cc b/runtime/stack.cc
index e0189e9..be1fba4 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -139,9 +139,11 @@
   if (cur_quick_frame_ != NULL) {
     DCHECK(context_ != NULL);  // You can't reliably read registers without a context.
     DCHECK(m == GetMethod());
-    const VmapTable vmap_table(m->GetVmapTable());
+    const void* code_pointer = m->GetQuickOatCodePointer();
+    DCHECK(code_pointer != nullptr);
+    const VmapTable vmap_table(m->GetVmapTable(code_pointer));
+    QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo(code_pointer);
     uint32_t vmap_offset;
-    QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo();
     // TODO: IsInContext stops before spotting floating point registers.
     if (vmap_table.IsInContext(vreg, kind, &vmap_offset)) {
       bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
@@ -163,9 +165,11 @@
   if (cur_quick_frame_ != NULL) {
     DCHECK(context_ != NULL);  // You can't reliably write registers without a context.
     DCHECK(m == GetMethod());
-    const VmapTable vmap_table(m->GetVmapTable());
+    const void* code_pointer = m->GetQuickOatCodePointer();
+    DCHECK(code_pointer != nullptr);
+    const VmapTable vmap_table(m->GetVmapTable(code_pointer));
+    QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo(code_pointer);
     uint32_t vmap_offset;
-    QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo();
     // TODO: IsInContext stops before spotting floating point registers.
     if (vmap_table.IsInContext(vreg, kind, &vmap_offset)) {
       bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
@@ -316,7 +320,7 @@
         }
         size_t frame_size = method->GetFrameSizeInBytes();
         // Compute PC for next stack frame from return PC.
-        size_t return_pc_offset = method->GetReturnPcOffsetInBytes();
+        size_t return_pc_offset = method->GetReturnPcOffsetInBytes(frame_size);
         byte* return_pc_addr = reinterpret_cast<byte*>(cur_quick_frame_) + return_pc_offset;
         uintptr_t return_pc = *reinterpret_cast<uintptr_t*>(return_pc_addr);
         if (UNLIKELY(exit_stubs_installed)) {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index ebf9078..41cfc58 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -2024,10 +2024,14 @@
       size_t num_regs = std::min(map.RegWidth() * 8,
                                  static_cast<size_t>(code_item->registers_size_));
       if (num_regs > 0) {
-        const uint8_t* reg_bitmap = map.FindBitMap(GetNativePcOffset());
+        Runtime* runtime = Runtime::Current();
+        const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(m);
+        uintptr_t native_pc_offset = m->NativePcOffset(GetCurrentQuickFramePc(), entry_point);
+        const uint8_t* reg_bitmap = map.FindBitMap(native_pc_offset);
         DCHECK(reg_bitmap != nullptr);
-        const VmapTable vmap_table(m->GetVmapTable());
-        QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo();
+        const void* code_pointer = mirror::ArtMethod::EntryPointToCodePointer(entry_point);
+        const VmapTable vmap_table(m->GetVmapTable(code_pointer));
+        QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo(code_pointer);
         // For all dex registers in the bitmap
         mirror::ArtMethod** cur_quick_frame = GetCurrentQuickFrame();
         DCHECK(cur_quick_frame != nullptr);