Merge changes I16f8b7ec,I075bbf55

* changes:
  jni: Support @FastNative methods that return objects
  Revert "jni: Disable FastNative path for methods returning objects"
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index afb8fce..ca1dc69 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -387,8 +387,7 @@
 // Test the normal compiler and normal generic JNI only.
 // The following features are unsupported in @FastNative:
 // 1) JNI stubs (lookup via dlsym) when methods aren't explicitly registered
-// 2) Returning objects from the JNI function
-// 3) synchronized keyword
+// 2) synchronized keyword
 // -- TODO: We can support (1) if we remove the mutator lock assert during stub lookup.
 # define JNI_TEST_NORMAL_ONLY(TestName)          \
   TEST_F(JniCompilerTest, TestName ## NormalCompiler) { \
@@ -826,8 +825,7 @@
   gJava_MyClassNatives_fooIOO_calls[gCurrentJni] = 0;
 }
 
-// TODO: Maybe. @FastNative support for returning Objects?
-JNI_TEST_NORMAL_ONLY(CompileAndRunIntObjectObjectMethod)
+JNI_TEST(CompileAndRunIntObjectObjectMethod)
 
 int gJava_MyClassNatives_fooSII_calls[kJniKindCount] = {};
 jint Java_MyClassNatives_fooSII(JNIEnv* env ATTRIBUTE_UNUSED,
@@ -1047,8 +1045,7 @@
   gJava_MyClassNatives_fooSIOO_calls[gCurrentJni] = 0;
 }
 
-// TODO: Maybe. @FastNative support for returning Objects?
-JNI_TEST_NORMAL_ONLY(CompileAndRunStaticIntObjectObjectMethod)
+JNI_TEST(CompileAndRunStaticIntObjectObjectMethod)
 
 int gJava_MyClassNatives_fooSSIOO_calls[kJniKindCount] = {};
 jobject Java_MyClassNatives_fooSSIOO(JNIEnv*, jclass klass, jint x, jobject y, jobject z) {
@@ -1216,8 +1213,7 @@
   EXPECT_TRUE(env_->IsSameObject(result, jobj_));
 }
 
-// TODO: Maybe. @FastNative support for returning objects?
-JNI_TEST_NORMAL_ONLY(ReturnGlobalRef)
+JNI_TEST(ReturnGlobalRef)
 
 jint local_ref_test(JNIEnv* env, jobject thisObj, jint x) {
   // Add 10 local references
@@ -1357,8 +1353,7 @@
                                     CurrentJniStringSuffix() + "() with CallStaticObjectMethodV");
 }
 
-// TODO: Maybe support returning objects for @FastNative?
-JNI_TEST_NORMAL_ONLY(UpcallReturnTypeChecking_Instance)
+JNI_TEST(UpcallReturnTypeChecking_Instance)
 
 void JniCompilerTest::UpcallReturnTypeChecking_StaticImpl() {
   SetUpForTest(true, "staticMethodThatShouldReturnClass", "()Ljava/lang/Class;",
@@ -1385,8 +1380,7 @@
                                     CurrentJniStringSuffix() + "() with CallObjectMethodV");
 }
 
-// TODO: Maybe support returning objects for @FastNative?
-JNI_TEST_NORMAL_ONLY(UpcallReturnTypeChecking_Static)
+JNI_TEST(UpcallReturnTypeChecking_Static)
 
 // This should take jclass, but we're imitating a bug pattern.
 void Java_MyClassNatives_instanceMethodThatShouldTakeClass(JNIEnv*, jobject, jclass) {
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index e171441..3bd290d 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -70,6 +70,47 @@
   return JNIMacroAssembler<kPointerSize>::Create(arena, isa, features);
 }
 
+enum class JniEntrypoint {
+  kStart,
+  kEnd
+};
+
+template <PointerSize kPointerSize>
+static ThreadOffset<kPointerSize> GetJniEntrypointThreadOffset(JniEntrypoint which,
+                                                               bool reference_return,
+                                                               bool is_synchronized,
+                                                               bool is_fast_native) {
+  if (which == JniEntrypoint::kStart) {  // JniMethodStart
+    ThreadOffset<kPointerSize> jni_start =
+        is_synchronized
+            ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStartSynchronized)
+            : (is_fast_native
+                   ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodFastStart)
+                   : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStart));
+
+    return jni_start;
+  } else {  // JniMethodEnd
+    ThreadOffset<kPointerSize> jni_end(-1);
+    if (reference_return) {
+      // Pass result.
+      jni_end = is_synchronized
+                    ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReferenceSynchronized)
+                    : (is_fast_native
+                           ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodFastEndWithReference)
+                           : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReference));
+    } else {
+      jni_end = is_synchronized
+                    ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndSynchronized)
+                    : (is_fast_native
+                           ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodFastEnd)
+                           : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEnd));
+    }
+
+    return jni_end;
+  }
+}
+
+
 // Generate the JNI bridge for the given method, general contract:
 // - Arguments are in the managed runtime format, either on stack or in
 //   registers, a reference to the method object is supplied as part of this
@@ -345,13 +386,11 @@
   FrameOffset locked_object_handle_scope_offset(0xBEEFDEAD);
   if (LIKELY(!is_critical_native)) {
     // Skip this for @CriticalNative methods. They do not call JniMethodStart.
-    ThreadOffset<kPointerSize> jni_start =
-        is_synchronized
-            ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStartSynchronized)
-            : ((is_fast_native && !reference_return)  // TODO: support @FastNative returning obj
-                   ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodFastStart)
-                   : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStart));
-
+    ThreadOffset<kPointerSize> jni_start(
+        GetJniEntrypointThreadOffset<kPointerSize>(JniEntrypoint::kStart,
+                                                   reference_return,
+                                                   is_synchronized,
+                                                   is_fast_native).SizeValue());
     main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
     locked_object_handle_scope_offset = FrameOffset(0);
     if (is_synchronized) {
@@ -543,20 +582,15 @@
 
   if (LIKELY(!is_critical_native)) {
     // 12. Call JniMethodEnd
-    ThreadOffset<kPointerSize> jni_end(-1);
+    ThreadOffset<kPointerSize> jni_end(
+        GetJniEntrypointThreadOffset<kPointerSize>(JniEntrypoint::kEnd,
+                                                   reference_return,
+                                                   is_synchronized,
+                                                   is_fast_native).SizeValue());
     if (reference_return) {
       // Pass result.
-      jni_end = is_synchronized
-                    ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReferenceSynchronized)
-                    : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReference);
       SetNativeParameter(jni_asm.get(), end_jni_conv.get(), end_jni_conv->ReturnRegister());
       end_jni_conv->Next();
-    } else {
-      jni_end = is_synchronized
-                    ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndSynchronized)
-                    : (is_fast_native
-                           ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodFastEnd)
-                           : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEnd));
     }
     // Pass saved local reference state.
     if (end_jni_conv->IsCurrentParamOnStack()) {
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 593d8e9..ffeff76 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -462,7 +462,7 @@
   EXPECT_EQ(72U, sizeof(OatHeader));
   EXPECT_EQ(4U, sizeof(OatMethodOffsets));
   EXPECT_EQ(20U, sizeof(OatQuickMethodHeader));
-  EXPECT_EQ(163 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)),
+  EXPECT_EQ(164 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)),
             sizeof(QuickEntryPoints));
 }
 
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 86a4aa2..10bed13 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -158,7 +158,7 @@
       }
       if (CompareIgnoringSpace(results[lineindex], testline) != 0) {
         LOG(FATAL) << "Output is not as expected at line: " << lineindex
-          << results[lineindex] << "/" << testline;
+          << results[lineindex] << "/" << testline << ", test name: " << testname;
       }
       ++lineindex;
     }
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index 91f3970..69e1d8f 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -5544,7 +5544,7 @@
   " 10c:  ecbd 8a10   vpop  {s16-s31}\n",
   " 110:  e8bd 8de0   ldmia.w sp!, {r5, r6, r7, r8, sl, fp, pc}\n",
   " 114:  4660        mov r0, ip\n",
-  " 116:  f8d9 c2ac   ldr.w ip, [r9, #684]  ; 0x2ac\n",
+  " 116:  f8d9 c2b0   ldr.w ip, [r9, #688]  ; 0x2b0\n",
   " 11a:  47e0        blx ip\n",
   nullptr
 };
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 567791e..cd8815b 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -98,7 +98,7 @@
 ADD_TEST_EQ(THREAD_LOCAL_END_OFFSET,
             art::Thread::ThreadLocalEndOffset<POINTER_SIZE>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_objects.
-#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_END_OFFSET + __SIZEOF_POINTER__)
+#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_END_OFFSET + 2 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET,
             art::Thread::ThreadLocalObjectsOffset<POINTER_SIZE>().Int32Value())
 // Offset of field Thread::tlsPtr_.mterp_current_ibase.
diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
index 1ee1f81..df23f94 100644
--- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
@@ -78,6 +78,7 @@
   qpoints->pJniMethodEnd = JniMethodEnd;
   qpoints->pJniMethodEndSynchronized = JniMethodEndSynchronized;
   qpoints->pJniMethodEndWithReference = JniMethodEndWithReference;
+  qpoints->pJniMethodFastEndWithReference = JniMethodFastEndWithReference;
   qpoints->pJniMethodEndWithReferenceSynchronized = JniMethodEndWithReferenceSynchronized;
   qpoints->pJniMethodFastEnd = JniMethodFastEnd;
   qpoints->pQuickGenericJniTrampoline = art_quick_generic_jni_trampoline;
diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h
index 89712a3..915f18e 100644
--- a/runtime/entrypoints/quick/quick_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_entrypoints.h
@@ -65,6 +65,11 @@
 extern mirror::Object* JniMethodEndWithReference(jobject result, uint32_t saved_local_ref_cookie,
                                                  Thread* self)
     NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
+extern mirror::Object* JniMethodFastEndWithReference(jobject result,
+                                                     uint32_t saved_local_ref_cookie,
+                                                     Thread* self)
+    NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
+
 
 extern mirror::Object* JniMethodEndWithReferenceSynchronized(jobject result,
                                                              uint32_t saved_local_ref_cookie,
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index e402919..3cfee45 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -78,6 +78,7 @@
   V(JniMethodFastEnd, void, uint32_t, Thread*) \
   V(JniMethodEndSynchronized, void, uint32_t, jobject, Thread*) \
   V(JniMethodEndWithReference, mirror::Object*, jobject, uint32_t, Thread*) \
+  V(JniMethodFastEndWithReference, mirror::Object*, jobject, uint32_t, Thread*) \
   V(JniMethodEndWithReferenceSynchronized, mirror::Object*, jobject, uint32_t, jobject, Thread*) \
   V(QuickGenericJniTrampoline, void, ArtMethod*) \
 \
diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
index b25f447..330c742 100644
--- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
@@ -22,6 +22,9 @@
 
 namespace art {
 
+template <bool kDynamicFast>
+static inline void GoToRunnableFast(Thread* self) NO_THREAD_SAFETY_ANALYSIS;
+
 extern void ReadBarrierJni(mirror::CompressedReference<mirror::Object>* handle_on_stack,
                            Thread* self ATTRIBUTE_UNUSED) {
   DCHECK(kUseReadBarrier);
@@ -78,7 +81,28 @@
   bool is_fast = native_method->IsFastNative();
   if (!is_fast) {
     self->TransitionFromSuspendedToRunnable();
-  } else if (UNLIKELY(self->TestAllFlags())) {
+  } else {
+    GoToRunnableFast</*kDynamicFast*/true>(self);
+  }
+}
+
+// TODO: NO_THREAD_SAFETY_ANALYSIS due to different control paths depending on fast JNI.
+template <bool kDynamicFast>
+ALWAYS_INLINE static inline void GoToRunnableFast(Thread* self) NO_THREAD_SAFETY_ANALYSIS {
+  if (kIsDebugBuild) {
+    // Should only enter here if the method is !Fast JNI or @FastNative.
+    ArtMethod* native_method = *self->GetManagedStack()->GetTopQuickFrame();
+
+    if (kDynamicFast) {
+      CHECK(native_method->IsFastNative()) << native_method->PrettyMethod();
+    } else {
+      CHECK(native_method->IsAnnotatedWithFastNative()) << native_method->PrettyMethod();
+    }
+  }
+
+  // When we are in "fast" JNI or @FastNative, we are already Runnable.
+  // Only do a suspend check on the way out of JNI.
+  if (UNLIKELY(self->TestAllFlags())) {
     // In fast JNI mode we never transitioned out of runnable. Perform a suspend check if there
     // is a flag raised.
     DCHECK(Locks::mutator_lock_->IsSharedHeld(self));
@@ -106,20 +130,7 @@
 }
 
 extern void JniMethodFastEnd(uint32_t saved_local_ref_cookie, Thread* self) {
-  // inlined fast version of GoToRunnable(self);
-
-  if (kIsDebugBuild) {
-    ArtMethod* native_method = *self->GetManagedStack()->GetTopQuickFrame();
-    CHECK(native_method->IsAnnotatedWithFastNative()) << native_method->PrettyMethod();
-  }
-
-  if (UNLIKELY(self->TestAllFlags())) {
-    // In fast JNI mode we never transitioned out of runnable. Perform a suspend check if there
-    // is a flag raised.
-    DCHECK(Locks::mutator_lock_->IsSharedHeld(self));
-    self->CheckSuspend();
-  }
-
+  GoToRunnableFast</*kDynamicFast*/false>(self);
   PopLocalReferences(saved_local_ref_cookie, self);
 }
 
@@ -131,10 +142,6 @@
   PopLocalReferences(saved_local_ref_cookie, self);
 }
 
-// TODO: JniMethodFastEndWithReference
-// (Probably don't need to have a synchronized variant since
-// it already has to do atomic operations)
-
 // Common result handling for EndWithReference.
 static mirror::Object* JniMethodEndWithReferenceHandleResult(jobject result,
                                                              uint32_t saved_local_ref_cookie,
@@ -157,6 +164,13 @@
   return o.Ptr();
 }
 
+extern mirror::Object* JniMethodFastEndWithReference(jobject result,
+                                                     uint32_t saved_local_ref_cookie,
+                                                     Thread* self) {
+  GoToRunnableFast</*kDynamicFast*/false>(self);
+  return JniMethodEndWithReferenceHandleResult(result, saved_local_ref_cookie, self);
+}
+
 extern mirror::Object* JniMethodEndWithReference(jobject result,
                                                  uint32_t saved_local_ref_cookie,
                                                  Thread* self) {
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 03254ab..cdb1051 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -122,9 +122,9 @@
 
     // Skip across the entrypoints structures.
 
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_start, thread_local_pos, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_pos, thread_local_end, sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, thread_local_objects, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, thread_local_start, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_start, thread_local_objects, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_objects, mterp_current_ibase, sizeof(size_t));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_current_ibase, mterp_default_ibase, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_default_ibase, mterp_alt_ibase, sizeof(void*));
@@ -223,6 +223,8 @@
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEndSynchronized, pJniMethodEndWithReference,
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEndWithReference,
+                         pJniMethodFastEndWithReference, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodFastEndWithReference,
                          pJniMethodEndWithReferenceSynchronized, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEndWithReferenceSynchronized,
                          pQuickGenericJniTrampoline, sizeof(void*));
diff --git a/runtime/oat.h b/runtime/oat.h
index 4d8687c..814a493 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '8', '9', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '9', '0', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/thread.h b/runtime/thread.h
index 6f5913e..24038f5 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -1392,7 +1392,7 @@
       stacked_shadow_frame_record(nullptr), deoptimization_context_stack(nullptr),
       frame_id_to_shadow_frame(nullptr), name(nullptr), pthread_self(0),
       last_no_thread_suspension_cause(nullptr), checkpoint_function(nullptr),
-      thread_local_start(nullptr), thread_local_pos(nullptr), thread_local_end(nullptr),
+      thread_local_pos(nullptr), thread_local_end(nullptr), thread_local_start(nullptr),
       thread_local_objects(0), mterp_current_ibase(nullptr), mterp_default_ibase(nullptr),
       mterp_alt_ibase(nullptr), thread_local_alloc_stack_top(nullptr),
       thread_local_alloc_stack_end(nullptr), nested_signal_state(nullptr),
@@ -1506,12 +1506,13 @@
     JniEntryPoints jni_entrypoints;
     QuickEntryPoints quick_entrypoints;
 
-    // Thread-local allocation pointer.
-    uint8_t* thread_local_start;
     // thread_local_pos and thread_local_end must be consecutive for ldrd and are 8 byte aligned for
     // potentially better performance.
     uint8_t* thread_local_pos;
     uint8_t* thread_local_end;
+    // Thread-local allocation pointer.
+    uint8_t* thread_local_start;
+
     size_t thread_local_objects;
 
     // Mterp jump table bases.