Inflate contended lock word by suspending owner.
Bug 6961405.
Don't inflate monitors for Notify and NotifyAll.
Tidy lock word, handle recursive lock case alongside unlocked case and move
assembly out of line (except for ARM quick). Also handle null in out-of-line
assembly as the test is quick and the enter/exit code is already a safepoint.
To gain ownership of a monitor on behalf of another thread, monitor contenders
must not hold the monitor_lock_, so they wait on a condition variable.
Reduce size of per mutex contention log.
Be consistent in calling thin lock thread ids just thread ids.
Fix potential thread death races caused by the use of FindThreadByThreadId,
make it invariant that returned threads are either self or suspended now.
Code size reduction on ARM boot.oat 0.2%.
Old nexus 7 speedup 0.25%, new nexus 7 speedup 1.4%, nexus 10 speedup 2.24%,
nexus 4 speedup 2.09% on DeltaBlue.
Change-Id: Id52558b914f160d9c8578fdd7fc8199a9598576a
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 5b2dd6c..cb61698 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -318,22 +318,67 @@
END art_quick_handle_fill_data
/*
- * Entry from managed code that calls artLockObjectFromCode, may block for GC.
+ * Entry from managed code that calls artLockObjectFromCode, may block for GC. r0 holds the
+ * possibly null object to lock.
*/
.extern artLockObjectFromCode
ENTRY art_quick_lock_object
+ cbz r0, slow_lock
+retry_lock:
+ ldrex r1, [r0, #LOCK_WORD_OFFSET]
+ ldrt r2, [r9, #THREAD_ID_OFFSET]
+ cmp r1, #0
+ bmi slow_lock @ lock word contains a monitor
+ bne already_thin
+ @ unlocked case - r2 holds thread id with count of 0
+ strex r3, r2, [r0, #LOCK_WORD_OFFSET]
+ cbnz r3, strex_fail @ store failed, retry
+ bx lr
+strex_fail:
+ b retry_lock @ unlikely forward branch, need to reload and recheck r1/r2
+already_thin:
+ eor r2, r1, r2 @ lock_word.ThreadId() ^ self->ThreadId()
+ uxth r2, r2 @ zero top 16 bits
+ cbnz r2, slow_lock @ lock word and self thread id's match -> recursive lock
+ @ else contention, go to slow path
+ adds r2, r1, #65536 @ increment count in lock word placing in r2 for storing
+ bmi slow_lock @ if we overflow the count go slow
+ str r2, [r0, #LOCK_WORD_OFFSET] @ no need for strex as we hold the lock
+ bx lr
+slow_lock:
SETUP_REF_ONLY_CALLEE_SAVE_FRAME @ save callee saves in case we block
mov r1, r9 @ pass Thread::Current
mov r2, sp @ pass SP
bl artLockObjectFromCode @ (Object* obj, Thread*, SP)
- RESTORE_REF_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+ RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+ RETURN_IF_RESULT_IS_ZERO
+ DELIVER_PENDING_EXCEPTION
END art_quick_lock_object
/*
* Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure.
+ * r0 holds the possibly null object to lock.
*/
.extern artUnlockObjectFromCode
ENTRY art_quick_unlock_object
+ cbz r0, slow_unlock
+ ldr r1, [r0, #LOCK_WORD_OFFSET]
+ ldr r2, [r9, #THREAD_ID_OFFSET]
+ cmp r1, #0
+ bmi slow_unlock @ lock word contains a monitor
+ eor r3, r1, r2 @ lock_word.ThreadId() ^ self->ThreadId()
+ uxth r3, r3 @ zero top 16 bits
+ cbnz r3, slow_unlock @ do lock word and self thread id's match?
+ cmp r1, #65536
+ bpl recursive_thin_unlock
+ @ transition to unlocked, r3 holds 0
+ str r3, [r0, #LOCK_WORD_OFFSET]
+ bx lr
+recursive_thin_unlock:
+ sub r1, r1, #65536
+ str r1, [r0, #LOCK_WORD_OFFSET]
+ bx lr
+slow_unlock:
SETUP_REF_ONLY_CALLEE_SAVE_FRAME @ save callee saves in case exception allocation triggers GC
mov r1, r9 @ pass Thread::Current
mov r2, sp @ pass SP