Merge "ART: Add lock and unlock stubs for ARM64, fix for X86-64"
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index dcf4561..4886561 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -442,8 +442,8 @@
     cmp    r1, #65536
     bpl    .Lrecursive_thin_unlock
     @ transition to unlocked, r3 holds 0
-    str    r3, [r0, #LOCK_WORD_OFFSET]
     dmb    ish                        @ full (StoreLoad) memory barrier
+    str    r3, [r0, #LOCK_WORD_OFFSET]
     bx     lr
 .Lrecursive_thin_unlock:
     sub    r1, r1, #65536
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index f7cb254..c056b2f 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -998,8 +998,81 @@
     DELIVER_PENDING_EXCEPTION
 END art_quick_handle_fill_data
 
-UNIMPLEMENTED art_quick_lock_object
-UNIMPLEMENTED art_quick_unlock_object
+    /*
+     * Entry from managed code that calls artLockObjectFromCode, may block for GC. x0 holds the
+     * possibly null object to lock.
+     *
+     * Derived from arm32 code.
+     */
+    .extern artLockObjectFromCode
+ENTRY art_quick_lock_object
+    cbz    w0, .Lslow_lock
+    add    x4, x0, #LOCK_WORD_OFFSET  // exclusive load/store had no immediate anymore
+.Lretry_lock:
+    ldr    w2, [xSELF, #THREAD_ID_OFFSET] // TODO: Can the thread ID really change during the loop?
+    ldxr   w1, [x4]
+    cbnz   w1, .Lnot_unlocked         // already thin locked
+    stxr   w3, w2, [x4]
+    cbnz   w3, .Lstrex_fail           // store failed, retry
+    dmb    ishld                      // full (LoadLoad) memory barrier, TODO: acquire-release
+    ret
+.Lstrex_fail:
+    b .Lretry_lock                    // unlikely forward branch, need to reload and recheck r1/r2
+.Lnot_unlocked:
+    lsr    w3, w1, 30
+    cbnz   w3, .Lslow_lock            // if either of the top two bits are set, go slow path
+    eor    w2, w1, w2                 // lock_word.ThreadId() ^ self->ThreadId()
+    uxth   w2, w2                     // zero top 16 bits
+    cbnz   w2, .Lslow_lock            // lock word and self thread id's match -> recursive lock
+                                      // else contention, go to slow path
+    add    w2, w1, #65536             // increment count in lock word placing in w2 for storing
+    lsr    w1, w2, 30                 // if either of the top two bits are set, we overflowed.
+    cbnz   w1, .Lslow_lock            // if we overflow the count go slow path
+    str    w2, [x0, #LOCK_WORD_OFFSET]// no need for stxr as we hold the lock
+    ret
+.Lslow_lock:
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case we block
+    mov    x1, xSELF                  // pass Thread::Current
+    mov    x2, sp                     // pass SP
+    bl     artLockObjectFromCode      // (Object* obj, Thread*, SP)
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_W0_IS_ZERO_OR_DELIVER
+END art_quick_lock_object
+
+    /*
+     * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure.
+     * x0 holds the possibly null object to lock.
+     *
+     * Derived from arm32 code.
+     */
+    .extern artUnlockObjectFromCode
+ENTRY art_quick_unlock_object
+    cbz    x0, .Lslow_unlock
+    ldr    w1, [x0, #LOCK_WORD_OFFSET]
+    lsr    w2, w1, 30
+    cbnz   w2, .Lslow_unlock          // if either of the top two bits are set, go slow path
+    ldr    w2, [xSELF, #THREAD_ID_OFFSET]
+    eor    w3, w1, w2                 // lock_word.ThreadId() ^ self->ThreadId()
+    uxth   w3, w3                     // zero top 16 bits
+    cbnz   w3, .Lslow_unlock          // do lock word and self thread id's match?
+    cmp    w1, #65536
+    bpl    .Lrecursive_thin_unlock
+    // transition to unlocked, w3 holds 0
+    dmb    ish                        // full (StoreLoad) memory barrier
+    str    w3, [x0, #LOCK_WORD_OFFSET]
+    ret
+.Lrecursive_thin_unlock:
+    sub    w1, w1, #65536
+    str    w1, [x0, #LOCK_WORD_OFFSET]
+    ret
+.Lslow_unlock:
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case exception allocation triggers GC
+    mov    x1, xSELF                  // pass Thread::Current
+    mov    x2, sp                     // pass SP
+    bl     artUnlockObjectFromCode    // (Object* obj, Thread*, SP)
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_W0_IS_ZERO_OR_DELIVER
+END art_quick_unlock_object
 
     /*
      * Entry from managed code that calls artIsAssignableFromCode and on failure calls
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 86f52aa..76d028d 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -56,6 +56,7 @@
     return &self->tlsPtr_;
   }
 
+ public:
   size_t Invoke3(size_t arg0, size_t arg1, size_t arg2, uintptr_t code, Thread* self) {
     // Push a transition back into managed code onto the linked list in thread.
     ManagedStack fragment;
@@ -169,7 +170,6 @@
     return result;
   }
 
- public:
   // TODO: Set up a frame according to referrer's specs.
   size_t Invoke3WithReferrer(size_t arg0, size_t arg1, size_t arg2, uintptr_t code, Thread* self,
                              mirror::ArtMethod* referrer) {
@@ -357,12 +357,12 @@
 #endif
 }
 
-#if defined(__i386__) || defined(__arm__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
 extern "C" void art_quick_lock_object(void);
 #endif
 
 TEST_F(StubTest, LockObject) {
-#if defined(__i386__) || defined(__arm__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
   static constexpr size_t kThinLockLoops = 100;
 
   Thread* self = Thread::Current();
@@ -396,8 +396,21 @@
     EXPECT_EQ(l_inc.ThinLockCount(), i);
   }
 
-  // TODO: Improve this test. Somehow force it to go to fat locked. But that needs another thread.
+  // Force a fat lock by running identity hashcode to fill up lock word.
+  SirtRef<mirror::Object> obj2(soa.Self(), mirror::String::AllocFromModifiedUtf8(soa.Self(),
+                                                                                 "hello, world!"));
 
+  obj2->IdentityHashCode();
+
+  Invoke3(reinterpret_cast<size_t>(obj2.get()), 0U, 0U,
+          reinterpret_cast<uintptr_t>(&art_quick_lock_object), self);
+
+  LockWord lock_after2 = obj2->GetLockWord(false);
+  LockWord::LockState new_state2 = lock_after2.GetState();
+  EXPECT_EQ(LockWord::LockState::kFatLocked, new_state2);
+  EXPECT_NE(lock_after2.FatLockMonitor(), static_cast<Monitor*>(nullptr));
+
+  // Test done.
 #else
   LOG(INFO) << "Skipping lock_object as I don't know how to do that on " << kRuntimeISA;
   // Force-print to std::cout so it's also outside the logcat.
@@ -419,13 +432,14 @@
 };
 
 
-#if defined(__i386__) || defined(__arm__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
 extern "C" void art_quick_lock_object(void);
 extern "C" void art_quick_unlock_object(void);
 #endif
 
-TEST_F(StubTest, UnlockObject) {
-#if defined(__i386__) || defined(__arm__) || defined(__x86_64__)
+// NO_THREAD_SAFETY_ANALYSIS as we do not want to grab exclusive mutator lock for MonitorInfo.
+static void TestUnlockObject(StubTest* test) NO_THREAD_SAFETY_ANALYSIS {
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
   static constexpr size_t kThinLockLoops = 100;
 
   Thread* self = Thread::Current();
@@ -439,8 +453,8 @@
   LockWord::LockState old_state = lock.GetState();
   EXPECT_EQ(LockWord::LockState::kUnlocked, old_state);
 
-  Invoke3(reinterpret_cast<size_t>(obj.get()), 0U, 0U,
-          reinterpret_cast<uintptr_t>(&art_quick_unlock_object), self);
+  test->Invoke3(reinterpret_cast<size_t>(obj.get()), 0U, 0U,
+                reinterpret_cast<uintptr_t>(&art_quick_unlock_object), self);
 
   // This should be an illegal monitor state.
   EXPECT_TRUE(self->IsExceptionPending());
@@ -450,15 +464,15 @@
   LockWord::LockState new_state = lock_after.GetState();
   EXPECT_EQ(LockWord::LockState::kUnlocked, new_state);
 
-  Invoke3(reinterpret_cast<size_t>(obj.get()), 0U, 0U,
-          reinterpret_cast<uintptr_t>(&art_quick_lock_object), self);
+  test->Invoke3(reinterpret_cast<size_t>(obj.get()), 0U, 0U,
+                reinterpret_cast<uintptr_t>(&art_quick_lock_object), self);
 
   LockWord lock_after2 = obj->GetLockWord(false);
   LockWord::LockState new_state2 = lock_after2.GetState();
   EXPECT_EQ(LockWord::LockState::kThinLocked, new_state2);
 
-  Invoke3(reinterpret_cast<size_t>(obj.get()), 0U, 0U,
-          reinterpret_cast<uintptr_t>(&art_quick_unlock_object), self);
+  test->Invoke3(reinterpret_cast<size_t>(obj.get()), 0U, 0U,
+                reinterpret_cast<uintptr_t>(&art_quick_unlock_object), self);
 
   LockWord lock_after3 = obj->GetLockWord(false);
   LockWord::LockState new_state3 = lock_after3.GetState();
@@ -472,13 +486,16 @@
 
   constexpr size_t kNumberOfLocks = 10;  // Number of objects = lock
   constexpr size_t kIterations = 10000;  // Number of iterations
+  constexpr size_t kMoveToFat = 1000;     // Chance of 1:kMoveFat to make a lock fat.
 
   size_t counts[kNumberOfLocks];
+  bool fat[kNumberOfLocks];  // Whether a lock should be thin or fat.
   SirtRef<mirror::String>* objects[kNumberOfLocks];
 
   // Initialize = allocate.
   for (size_t i = 0; i < kNumberOfLocks; ++i) {
     counts[i] = 0;
+    fat[i] = false;
     objects[i] = new SirtRef<mirror::String>(soa.Self(),
                                              mirror::String::AllocFromModifiedUtf8(soa.Self(), ""));
   }
@@ -487,36 +504,57 @@
     // Select which lock to update.
     size_t index = r.next() % kNumberOfLocks;
 
-    bool lock;  // Whether to lock or unlock in this step.
-    if (counts[index] == 0) {
-      lock = true;
-    } else if (counts[index] == kThinLockLoops) {
-      lock = false;
-    } else {
-      // Randomly.
-      lock = r.next() % 2 == 0;
-    }
+    // Make lock fat?
+    if (!fat[index] && (r.next() % kMoveToFat == 0)) {
+      fat[index] = true;
+      objects[index]->get()->IdentityHashCode();
 
-    if (lock) {
-      Invoke3(reinterpret_cast<size_t>(objects[index]->get()), 0U, 0U,
-              reinterpret_cast<uintptr_t>(&art_quick_lock_object), self);
-      counts[index]++;
+      LockWord lock_iter = objects[index]->get()->GetLockWord(false);
+      LockWord::LockState iter_state = lock_iter.GetState();
+      if (counts[index] == 0) {
+        EXPECT_EQ(LockWord::LockState::kHashCode, iter_state);
+      } else {
+        EXPECT_EQ(LockWord::LockState::kFatLocked, iter_state);
+      }
     } else {
-      Invoke3(reinterpret_cast<size_t>(objects[index]->get()), 0U, 0U,
-              reinterpret_cast<uintptr_t>(&art_quick_unlock_object), self);
-      counts[index]--;
-    }
+      bool lock;  // Whether to lock or unlock in this step.
+      if (counts[index] == 0) {
+        lock = true;
+      } else if (counts[index] == kThinLockLoops) {
+        lock = false;
+      } else {
+        // Randomly.
+        lock = r.next() % 2 == 0;
+      }
 
-    EXPECT_FALSE(self->IsExceptionPending());
+      if (lock) {
+        test-> Invoke3(reinterpret_cast<size_t>(objects[index]->get()), 0U, 0U,
+                       reinterpret_cast<uintptr_t>(&art_quick_lock_object), self);
+        counts[index]++;
+      } else {
+        test->Invoke3(reinterpret_cast<size_t>(objects[index]->get()), 0U, 0U,
+                      reinterpret_cast<uintptr_t>(&art_quick_unlock_object), self);
+        counts[index]--;
+      }
 
-    // Check the new state.
-    LockWord lock_iter = objects[index]->get()->GetLockWord(false);
-    LockWord::LockState iter_state = lock_iter.GetState();
-    if (counts[index] > 0) {
-      EXPECT_EQ(LockWord::LockState::kThinLocked, iter_state);
-      EXPECT_EQ(counts[index] - 1, lock_iter.ThinLockCount());
-    } else {
-      EXPECT_EQ(LockWord::LockState::kUnlocked, iter_state);
+      EXPECT_FALSE(self->IsExceptionPending());
+
+      // Check the new state.
+      LockWord lock_iter = objects[index]->get()->GetLockWord(true);
+      LockWord::LockState iter_state = lock_iter.GetState();
+      if (fat[index]) {
+        // Abuse MonitorInfo.
+        EXPECT_EQ(LockWord::LockState::kFatLocked, iter_state) << index;
+        MonitorInfo info(objects[index]->get());
+        EXPECT_EQ(counts[index], info.entry_count_) << index;
+      } else {
+        if (counts[index] > 0) {
+          EXPECT_EQ(LockWord::LockState::kThinLocked, iter_state);
+          EXPECT_EQ(counts[index] - 1, lock_iter.ThinLockCount());
+        } else {
+          EXPECT_EQ(LockWord::LockState::kUnlocked, iter_state);
+        }
+      }
     }
   }
 
@@ -526,21 +564,21 @@
     size_t index = kNumberOfLocks - 1 - i;
     size_t count = counts[index];
     while (count > 0) {
-      Invoke3(reinterpret_cast<size_t>(objects[index]->get()), 0U, 0U,
-              reinterpret_cast<uintptr_t>(&art_quick_unlock_object), self);
+      test->Invoke3(reinterpret_cast<size_t>(objects[index]->get()), 0U, 0U,
+                    reinterpret_cast<uintptr_t>(&art_quick_unlock_object), self);
 
       count--;
     }
 
     LockWord lock_after4 = objects[index]->get()->GetLockWord(false);
     LockWord::LockState new_state4 = lock_after4.GetState();
-    EXPECT_EQ(LockWord::LockState::kUnlocked, new_state4);
+    EXPECT_TRUE(LockWord::LockState::kUnlocked == new_state4
+                || LockWord::LockState::kFatLocked == new_state4);
 
     delete objects[index];
   }
 
-  // TODO: Improve this test. Somehow force it to go to fat locked. But that needs another thread.
-
+  // Test done.
 #else
   LOG(INFO) << "Skipping unlock_object as I don't know how to do that on " << kRuntimeISA;
   // Force-print to std::cout so it's also outside the logcat.
@@ -548,6 +586,9 @@
 #endif
 }
 
+TEST_F(StubTest, UnlockObject) {
+  TestUnlockObject(this);
+}
 
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
 extern "C" void art_quick_check_cast(void);
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index a55dbb6..b886fb0 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -697,8 +697,8 @@
     jz   .Lslow_unlock
     movl LOCK_WORD_OFFSET(%edi), %ecx     // ecx := lock word
     movl %gs:THREAD_ID_OFFSET, %edx       // edx := thread id
-    test %ecx, %ecx
-    jb   .Lslow_unlock                    // lock word contains a monitor
+    test LITERAL(0xC0000000), %ecx
+    jnz  .Lslow_unlock                    // lock word contains a monitor
     cmpw %cx, %dx                         // does the thread id match?
     jne  .Lslow_unlock
     cmpl LITERAL(65536), %ecx