Allow late lookup for @FastNative methods.

Avoid failing a runtime state assertion.

Test: Enable tests in 178-app-image-native-method
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: aosp_taimen-userdebug boots.
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Bug: 112189621
Change-Id: I3eb5c9fd239743732866e8ea0863a84bf85f7b20
diff --git a/runtime/arch/arm/jni_entrypoints_arm.S b/runtime/arch/arm/jni_entrypoints_arm.S
index 3720c96..a0f93cc 100644
--- a/runtime/arch/arm/jni_entrypoints_arm.S
+++ b/runtime/arch/arm/jni_entrypoints_arm.S
@@ -20,6 +20,7 @@
      * Jni dlsym lookup stub.
      */
     .extern artFindNativeMethod
+    .extern artFindNativeMethodRunnable
 ENTRY art_jni_dlsym_lookup_stub
     push   {r0, r1, r2, r3, lr}           @ spill regs
     .cfi_adjust_cfa_offset 20
@@ -30,9 +31,22 @@
     .cfi_rel_offset lr, 16
     sub    sp, #12                        @ pad stack pointer to align frame
     .cfi_adjust_cfa_offset 12
+
     mov    r0, rSELF                      @ pass Thread::Current()
+    // Call artFindNativeMethod() for normal native and artFindNativeMethodRunnable for @FastNative.
+    ldr    ip, [r0, #THREAD_TOP_QUICK_FRAME_OFFSET]   // uintptr_t tagged_quick_frame
+    bic    ip, #1                                     // ArtMethod** sp
+    ldr    ip, [ip]                                   // ArtMethod* method
+    ldr    ip, [ip, #ART_METHOD_ACCESS_FLAGS_OFFSET]  // uint32_t access_flags
+    tst    ip, #ACCESS_FLAGS_METHOD_IS_FAST_NATIVE
+    bne    .Llookup_stub_fast_native
     blx    artFindNativeMethod
+    b      .Llookup_stub_continue
+.Llookup_stub_fast_native:
+    blx    artFindNativeMethodRunnable
+.Llookup_stub_continue:
     mov    r12, r0                        @ save result in r12
+
     add    sp, #12                        @ restore stack pointer
     .cfi_adjust_cfa_offset -12
     cbz    r0, 1f                         @ is method code null?
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 6b9393a..bad37bc 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1725,14 +1725,11 @@
     mov r1, r10
     blx artQuickGenericJniTrampoline  // (Thread*, sp)
 
-    // The C call will have registered the complete save-frame on success.
+    // The C call will have registered the complete save-frame.
     // The result of the call is:
-    // r0: pointer to native code, 0 on error.
+    // r0: pointer to native code.
     // r1: pointer to the bottom of the used area of the alloca, can restore stack till there.
 
-    // Check for error = 0.
-    cbz r0, .Lexception_in_native
-
     // Release part of the alloca.
     mov sp, r1
 
diff --git a/runtime/arch/arm64/jni_entrypoints_arm64.S b/runtime/arch/arm64/jni_entrypoints_arm64.S
index d30738a..e645799 100644
--- a/runtime/arch/arm64/jni_entrypoints_arm64.S
+++ b/runtime/arch/arm64/jni_entrypoints_arm64.S
@@ -20,6 +20,7 @@
      * Jni dlsym lookup stub.
      */
     .extern artFindNativeMethod
+    .extern artFindNativeMethodRunnable
 
 ENTRY art_jni_dlsym_lookup_stub
   // spill regs.
@@ -46,7 +47,18 @@
   .cfi_adjust_cfa_offset 16
 
   mov x0, xSELF   // pass Thread::Current()
-  bl  artFindNativeMethod
+  // Call artFindNativeMethod() for normal native and artFindNativeMethodRunnable for @FastNative.
+  ldr   xIP0, [x0, #THREAD_TOP_QUICK_FRAME_OFFSET]      // uintptr_t tagged_quick_frame
+  bic   xIP0, xIP0, #1                                  // ArtMethod** sp
+  ldr   xIP0, [xIP0]                                    // ArtMethod* method
+  ldr   xIP0, [xIP0, #ART_METHOD_ACCESS_FLAGS_OFFSET]   // uint32_t access_flags
+  tst   xIP0, #ACCESS_FLAGS_METHOD_IS_FAST_NATIVE
+  b.ne  .Llookup_stub_fast_native
+  bl    artFindNativeMethod
+  b     .Llookup_stub_continue
+.Llookup_stub_fast_native:
+  bl    artFindNativeMethodRunnable
+.Llookup_stub_continue:
   mov  x17, x0    // store result in scratch reg.
 
   // load spill regs.
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 5665e18..6e9b533 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -2150,14 +2150,11 @@
     mov x1, xFP
     bl artQuickGenericJniTrampoline  // (Thread*, sp)
 
-    // The C call will have registered the complete save-frame on success.
+    // The C call will have registered the complete save-frame.
     // The result of the call is:
-    // x0: pointer to native code, 0 on error.
+    // x0: pointer to native code.
     // x1: pointer to the bottom of the used area of the alloca, can restore stack till there.
 
-    // Check for error = 0.
-    cbz x0, .Lexception_in_native
-
     // Release part of the alloca.
     mov sp, x1
 
diff --git a/runtime/arch/x86/jni_entrypoints_x86.S b/runtime/arch/x86/jni_entrypoints_x86.S
index aca5a37..4862e99 100644
--- a/runtime/arch/x86/jni_entrypoints_x86.S
+++ b/runtime/arch/x86/jni_entrypoints_x86.S
@@ -24,7 +24,18 @@
     CFI_ADJUST_CFA_OFFSET(8)
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
+    // Call artFindNativeMethod() for normal native and artFindNativeMethodRunnable for @FastNative.
+    movl (%esp), %eax                                // Thread* self
+    movl THREAD_TOP_QUICK_FRAME_OFFSET(%eax), %eax   // uintptr_t tagged_quick_frame
+    andl LITERAL(0xfffffffe), %eax                   // ArtMethod** sp
+    movl (%eax), %eax                                // ArtMethod* method
+    testl LITERAL(ACCESS_FLAGS_METHOD_IS_FAST_NATIVE), ART_METHOD_ACCESS_FLAGS_OFFSET(%eax)
+    jne .Llookup_stub_fast_native
     call SYMBOL(artFindNativeMethod)  // (Thread*)
+    jmp .Llookup_stub_continue
+.Llookup_stub_fast_native:
+    call SYMBOL(artFindNativeMethodRunnable)  // (Thread*)
+.Llookup_stub_continue:
     addl LITERAL(12), %esp        // remove argument & padding
     CFI_ADJUST_CFA_OFFSET(-12)
     testl %eax, %eax              // check if returned method code is null
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 61d0aad..2bf82d0 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1928,15 +1928,11 @@
     pushl %fs:THREAD_SELF_OFFSET  // Pass Thread::Current().
     call SYMBOL(artQuickGenericJniTrampoline)  // (Thread*, sp)
 
-    // The C call will have registered the complete save-frame on success.
+    // The C call will have registered the complete save-frame.
     // The result of the call is:
-    // eax: pointer to native code, 0 on error.
+    // eax: pointer to native code.
     // edx: pointer to the bottom of the used area of the alloca, can restore stack till there.
 
-    // Check for error = 0.
-    test %eax, %eax
-    jz .Lexception_in_native
-
     // Release part of the alloca.
     movl %edx, %esp
 
diff --git a/runtime/arch/x86_64/jni_entrypoints_x86_64.S b/runtime/arch/x86_64/jni_entrypoints_x86_64.S
index f6736df..3860c37 100644
--- a/runtime/arch/x86_64/jni_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/jni_entrypoints_x86_64.S
@@ -45,8 +45,17 @@
     movq %xmm15, 88(%rsp)
     // prepare call
     movq %gs:THREAD_SELF_OFFSET, %rdi      // RDI := Thread::Current()
-    // call
-    call PLT_SYMBOL(artFindNativeMethod)  // (Thread*)
+    // Call artFindNativeMethod() for normal native and artFindNativeMethodRunnable for @FastNative.
+    movq THREAD_TOP_QUICK_FRAME_OFFSET(%rdi), %rax   // uintptr_t tagged_quick_frame
+    andq LITERAL(0xfffffffffffffffe), %rax           // ArtMethod** sp
+    movq (%rax), %rax                                // ArtMethod* method
+    testl LITERAL(ACCESS_FLAGS_METHOD_IS_FAST_NATIVE), ART_METHOD_ACCESS_FLAGS_OFFSET(%rax)
+    jne .Llookup_stub_fast_native
+    call SYMBOL(artFindNativeMethod)  // (Thread*)
+    jmp .Llookup_stub_continue
+.Llookup_stub_fast_native:
+    call SYMBOL(artFindNativeMethodRunnable)  // (Thread*)
+.Llookup_stub_continue:
     // restore arguments
     movq 0(%rsp), %xmm0
     movq 8(%rsp), %xmm1
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index e37ed42..22d0ce4 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1886,15 +1886,11 @@
     movq %rbp, %rsi
     call SYMBOL(artQuickGenericJniTrampoline)  // (Thread*, sp)
 
-    // The C call will have registered the complete save-frame on success.
+    // The C call will have registered the complete save-frame.
     // The result of the call is:
-    // %rax: pointer to native code, 0 on error.
+    // %rax: pointer to native code.
     // %rdx: pointer to the bottom of the used area of the alloca, can restore stack till there.
 
-    // Check for error = 0.
-    test %rax, %rax
-    jz .Lexception_in_native
-
     // Release part of the alloca.
     movq %rdx, %rsp
 
diff --git a/runtime/entrypoints/jni/jni_entrypoints.cc b/runtime/entrypoints/jni/jni_entrypoints.cc
index 422b4da..d008e1a 100644
--- a/runtime/entrypoints/jni/jni_entrypoints.cc
+++ b/runtime/entrypoints/jni/jni_entrypoints.cc
@@ -26,17 +26,16 @@
 namespace art {
 
 // Used by the JNI dlsym stub to find the native method to invoke if none is registered.
-extern "C" const void* artFindNativeMethod(Thread* self) {
-  DCHECK_EQ(self, Thread::Current());
-  Locks::mutator_lock_->AssertNotHeld(self);  // We come here as Native.
-  ScopedObjectAccess soa(self);
-
+extern "C" const void* artFindNativeMethodRunnable(Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  Locks::mutator_lock_->AssertSharedHeld(self);  // We come here as Runnable.
   ArtMethod* method = self->GetCurrentMethod(nullptr);
   DCHECK(method != nullptr);
 
   // Lookup symbol address for method, on failure we'll return null with an exception set,
   // otherwise we return the address of the method we found.
-  void* native_code = soa.Vm()->FindCodeForNativeMethod(method);
+  JavaVMExt* vm = down_cast<JNIEnvExt*>(self->GetJniEnv())->GetVm();
+  void* native_code = vm->FindCodeForNativeMethod(method);
   if (native_code == nullptr) {
     self->AssertPendingException();
     return nullptr;
@@ -45,4 +44,12 @@
   return method->RegisterNative(native_code);
 }
 
+// Used by the JNI dlsym stub to find the native method to invoke if none is registered.
+extern "C" const void* artFindNativeMethod(Thread* self) {
+  DCHECK_EQ(self, Thread::Current());
+  Locks::mutator_lock_->AssertNotHeld(self);  // We come here as Native.
+  ScopedObjectAccess soa(self);
+  return artFindNativeMethodRunnable(self);
+}
+
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 1cc4d24..3c65500 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -2300,37 +2300,6 @@
   }
 }
 
-extern "C" const void* artFindNativeMethod(Thread* self);
-
-static uint64_t artQuickGenericJniEndJNIRef(Thread* self,
-                                            uint32_t cookie,
-                                            bool fast_native ATTRIBUTE_UNUSED,
-                                            jobject l,
-                                            jobject lock) {
-  // TODO: add entrypoints for @FastNative returning objects.
-  if (lock != nullptr) {
-    return reinterpret_cast<uint64_t>(JniMethodEndWithReferenceSynchronized(l, cookie, lock, self));
-  } else {
-    return reinterpret_cast<uint64_t>(JniMethodEndWithReference(l, cookie, self));
-  }
-}
-
-static void artQuickGenericJniEndJNINonRef(Thread* self,
-                                           uint32_t cookie,
-                                           bool fast_native,
-                                           jobject lock) {
-  if (lock != nullptr) {
-    JniMethodEndSynchronized(cookie, lock, self);
-    // Ignore "fast_native" here because synchronized functions aren't very fast.
-  } else {
-    if (UNLIKELY(fast_native)) {
-      JniMethodFastEnd(cookie, self);
-    } else {
-      JniMethodEnd(cookie, self);
-    }
-  }
-}
-
 /*
  * Initializes an alloca region assumed to be directly below sp for a native call:
  * Create a HandleScope and call stack and fill a mini stack with values to be pushed to registers.
@@ -2424,40 +2393,11 @@
   }
 
   // Retrieve the stored native code.
+  // Note that it may point to the lookup stub or trampoline.
+  // FIXME: This is broken for @CriticalNative as the art_jni_dlsym_lookup_stub
+  // does not handle that case. Calls from compiled stubs are also broken.
   void const* nativeCode = called->GetEntryPointFromJni();
 
-  // There are two cases for the content of nativeCode:
-  // 1) Pointer to the native function.
-  // 2) Pointer to the trampoline for native code binding.
-  // In the second case, we need to execute the binding and continue with the actual native function
-  // pointer.
-  DCHECK(nativeCode != nullptr);
-  if (runtime->GetClassLinker()->IsJniDlsymLookupStub(nativeCode)) {
-    // FIXME: This is broken for @FastNative and @CriticalNative as we're still runnable.
-    // Calls from compiled stubs are also broken.
-    // TODO: We could just let the GenericJNI stub call the ArtFindNativeMethod()
-    // rather than calling it explicitly here.
-    nativeCode = artFindNativeMethod(self);
-
-    if (nativeCode == nullptr) {
-      DCHECK(self->IsExceptionPending());    // There should be an exception pending now.
-
-      // @CriticalNative calls do not need to call back into JniMethodEnd.
-      if (LIKELY(!critical_native)) {
-        // End JNI, as the assembly will move to deliver the exception.
-        jobject lock = called->IsSynchronized() ? visitor.GetFirstHandleScopeJObject() : nullptr;
-        if (shorty[0] == 'L') {
-          artQuickGenericJniEndJNIRef(self, cookie, fast_native, nullptr, lock);
-        } else {
-          artQuickGenericJniEndJNINonRef(self, cookie, fast_native, lock);
-        }
-      }
-
-      return GetTwoWordFailureValue();
-    }
-    // Note that the native code pointer will be automatically set by artFindNativeMethod().
-  }
-
 #if defined(__mips__) && !defined(__LP64__)
   // On MIPS32 if the first two arguments are floating-point, we need to know their types
   // so that art_quick_generic_jni_trampoline can correctly extract them from the stack
diff --git a/test/178-app-image-native-method/expected.txt b/test/178-app-image-native-method/expected.txt
index 02384cd..6327f97 100644
--- a/test/178-app-image-native-method/expected.txt
+++ b/test/178-app-image-native-method/expected.txt
@@ -1,6 +1,10 @@
 JNI_OnLoad called
 test
+testFast
 testMissing
+testMissingFast
 JNI_OnLoad called
 test
+testFast
 testMissing
+testMissingFast
diff --git a/test/178-app-image-native-method/src/Main.java b/test/178-app-image-native-method/src/Main.java
index 0a82abb..bec7740 100644
--- a/test/178-app-image-native-method/src/Main.java
+++ b/test/178-app-image-native-method/src/Main.java
@@ -33,10 +33,10 @@
 
     // FIXME: @FastNative and @CriticalNative fail a state check in artFindNativeMethod().
     test();
-    // testFast();
+    testFast();
     // testCritical();
     testMissing();
-    // testMissingFast();
+    testMissingFast();
     // testMissingCritical();
   }
 
diff --git a/tools/cpp-define-generator/globals.def b/tools/cpp-define-generator/globals.def
index 1054262..09d33ce 100644
--- a/tools/cpp-define-generator/globals.def
+++ b/tools/cpp-define-generator/globals.def
@@ -30,6 +30,8 @@
 #include "stack.h"
 #endif
 
+ASM_DEFINE(ACCESS_FLAGS_METHOD_IS_FAST_NATIVE,
+           art::kAccFastNative)
 ASM_DEFINE(ACCESS_FLAGS_CLASS_IS_FINALIZABLE,
            art::kAccClassIsFinalizable)
 ASM_DEFINE(ACCESS_FLAGS_CLASS_IS_FINALIZABLE_BIT,