Speed up and slightly simplify Mutex

Eliminate the separate seqentially consistent contention counter load
in Mutex::ExclusiveUnlock by putting the contenion counter in the
Mutex's state word.

Replace some CHECK_GE checks with CHECK_GT. We were checking quantities
intended to be non-negative against >= 0 just before decrementing them.

Remove a pointless volatile declaration.

Introduce constants for the first FUTEX_WAKE argument. Remove all uses
of -1 as that argument, everywhere in ART. It appears to work, but the
documentation says it's wrong.

This does not yet address the ReaderWriterMutex issue, which is handled
in a different way in a separate CL.

Benchmark runs with and without this CL weakly suggest a tiny, not
statistically significant, improvement in both time and space with this
CL.

Bug: 111835365
Test: Build and boot AOSP. TreeHugger.
Change-Id: Ie53c65f2ce774a8cb4d224e2c1b3a110eb880f0c
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 555053b..48359fe 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_BASE_MUTEX_H_
 #define ART_RUNTIME_BASE_MUTEX_H_
 
+#include <limits.h>  // for INT_MAX
 #include <pthread.h>
 #include <stdint.h>
 #include <unistd.h>  // for pid_t
@@ -59,6 +60,9 @@
 #ifdef ART_USE_FUTEXES
 // To enable lock contention logging, set this to true.
 constexpr bool kLogLockContentions = false;
+// FUTEX_WAKE first argument:
+constexpr int kWakeOne = 1;
+constexpr int kWakeAll = INT_MAX;
 #else
 // Keep this false as lock contention logging is supported only with
 // futex.
@@ -191,8 +195,9 @@
     AssertNotHeldExclusive(self);
   }
 
-  // Id associated with exclusive owner. No memory ordering semantics if called from a thread other
-  // than the owner.
+  // Id associated with exclusive owner. No memory ordering semantics if called from a thread
+  // other than the owner. GetTid() == GetExclusiveOwnerTid() is a reliable way to determine
+  // whether we hold the lock; any other information may be invalidated before we return.
   pid_t GetExclusiveOwnerTid() const;
 
   // Returns how many times this Mutex has been locked, it is better to use AssertHeld/NotHeld.
@@ -209,12 +214,33 @@
 
  private:
 #if ART_USE_FUTEXES
-  // 0 is unheld, 1 is held.
-  AtomicInteger state_;
+  // Low order bit: 0 is unheld, 1 is held.
+  // High order bits: Number of waiting contenders.
+  AtomicInteger state_and_contenders_;
+
+  static constexpr int32_t kHeldMask = 1;
+
+  static constexpr int32_t kContenderShift = 1;
+
+  static constexpr int32_t kContenderIncrement = 1 << kContenderShift;
+
+  void increment_contenders() {
+    state_and_contenders_.fetch_add(kContenderIncrement);
+  }
+
+  void decrement_contenders() {
+    state_and_contenders_.fetch_sub(kContenderIncrement);
+  }
+
+  int32_t get_contenders() {
+    // Result is guaranteed to include any contention added by this thread; otherwise approximate.
+    // Treat contenders as unsigned because we're paranoid about overflow; should never matter.
+    return static_cast<uint32_t>(state_and_contenders_.load(std::memory_order_relaxed))
+        >> kContenderShift;
+  }
+
   // Exclusive owner.
   Atomic<pid_t> exclusive_owner_;
-  // Number of waiting contenders.
-  AtomicInteger num_contenders_;
 #else
   pthread_mutex_t mutex_;
   Atomic<pid_t> exclusive_owner_;  // Guarded by mutex_. Asynchronous reads are OK.
@@ -421,7 +447,7 @@
   AtomicInteger sequence_;
   // Number of threads that have come into to wait, not the length of the waiters on the futex as
   // waiters may have been requeued onto guard_. Guarded by guard_.
-  volatile int32_t num_waiters_;
+  int32_t num_waiters_;
 
   void RequeueWaiters(int32_t count);
 #else