Use memory chunks for monitors on LP64

Monitor IDs in lock words are only 30b. On a 32b system that works
fine, as memory is usually aligned enough that shifting works out.
On 64b systems, the virtual memory space is too large for that.
This adds memory chunks into which we allocate the monitors so that
we have base_addr + offset and can use the offset as the monitor ID.
To allow for relatively compact but growable storage, we use a list
of chunks.

Added a global lock for the monitor pool.

Change-Id: I0e290c4914a2556e0b2eef9902422d7c4dcf536d
diff --git a/runtime/monitor_pool.h b/runtime/monitor_pool.h
index 32e1553..5bc28f1 100644
--- a/runtime/monitor_pool.h
+++ b/runtime/monitor_pool.h
@@ -20,11 +20,11 @@
 #include "monitor.h"
 
 #ifdef __LP64__
-#include <bitset>
 #include <stdint.h>
-
+#include "atomic.h"
 #include "runtime.h"
-#include "safe_map.h"
+#else
+#include "base/stl_util.h"     // STLDeleteElements
 #endif
 
 namespace art {
@@ -41,11 +41,36 @@
 #endif
   }
 
+  static Monitor* CreateMonitor(Thread* self, Thread* owner, mirror::Object* obj, int32_t hash_code)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+#ifndef __LP64__
+    return new Monitor(self, owner, obj, hash_code);
+#else
+    return GetMonitorPool()->CreateMonitorInPool(self, owner, obj, hash_code);
+#endif
+  }
+
+  static void ReleaseMonitor(Thread* self, Monitor* monitor) {
+#ifndef __LP64__
+    delete monitor;
+#else
+    GetMonitorPool()->ReleaseMonitorToPool(self, monitor);
+#endif
+  }
+
+  static void ReleaseMonitors(Thread* self, std::list<Monitor*>* monitors) {
+#ifndef __LP64__
+    STLDeleteElements(monitors);
+#else
+    GetMonitorPool()->ReleaseMonitorsToPool(self, monitors);
+#endif
+  }
+
   static Monitor* MonitorFromMonitorId(MonitorId mon_id) {
 #ifndef __LP64__
     return reinterpret_cast<Monitor*>(mon_id << 3);
 #else
-    return Runtime::Current()->GetMonitorPool()->LookupMonitorFromTable(mon_id);
+    return GetMonitorPool()->LookupMonitor(mon_id);
 #endif
   }
 
@@ -57,39 +82,98 @@
 #endif
   }
 
-  static MonitorId CreateMonitorId(Thread* self, Monitor* mon) {
+  static MonitorId ComputeMonitorId(Monitor* mon, Thread* self) {
 #ifndef __LP64__
-    UNUSED(self);
     return MonitorIdFromMonitor(mon);
 #else
-    return Runtime::Current()->GetMonitorPool()->AllocMonitorIdFromTable(self, mon);
+    return GetMonitorPool()->ComputeMonitorIdInPool(mon, self);
 #endif
   }
 
-  static void ReleaseMonitorId(MonitorId mon_id) {
+  static MonitorPool* GetMonitorPool() {
 #ifndef __LP64__
-    UNUSED(mon_id);
+    return nullptr;
 #else
-    Runtime::Current()->GetMonitorPool()->ReleaseMonitorIdFromTable(mon_id);
+    return Runtime::Current()->GetMonitorPool();
 #endif
   }
 
  private:
 #ifdef __LP64__
-  MonitorPool();
+  // When we create a monitor pool, threads have not been initialized, yet, so ignore thread-safety
+  // analysis.
+  MonitorPool() NO_THREAD_SAFETY_ANALYSIS;
 
-  Monitor* LookupMonitorFromTable(MonitorId mon_id);
+  void AllocateChunk() EXCLUSIVE_LOCKS_REQUIRED(Locks::allocated_monitor_ids_lock_);
 
-  MonitorId LookupMonitorIdFromTable(Monitor* mon);
+  Monitor* CreateMonitorInPool(Thread* self, Thread* owner, mirror::Object* obj, int32_t hash_code)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  MonitorId AllocMonitorIdFromTable(Thread* self, Monitor* mon);
+  void ReleaseMonitorToPool(Thread* self, Monitor* monitor);
+  void ReleaseMonitorsToPool(Thread* self, std::list<Monitor*>* monitors);
 
-  void ReleaseMonitorIdFromTable(MonitorId mon_id);
+  // Note: This is safe as we do not ever move chunks.
+  Monitor* LookupMonitor(MonitorId mon_id) {
+    size_t offset = MonitorIdToOffset(mon_id);
+    size_t index = offset / kChunkSize;
+    size_t offset_in_chunk = offset % kChunkSize;
+    uintptr_t base = *(monitor_chunks_.LoadRelaxed()+index);
+    return reinterpret_cast<Monitor*>(base + offset_in_chunk);
+  }
 
-  ReaderWriterMutex allocated_ids_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  static constexpr uint32_t kMaxMonitorId = 0xFFFF;
-  std::bitset<kMaxMonitorId> allocated_ids_ GUARDED_BY(allocated_ids_lock_);
-  SafeMap<MonitorId, Monitor*> table_ GUARDED_BY(allocated_ids_lock_);
+  static bool IsInChunk(uintptr_t base_addr, Monitor* mon) {
+    uintptr_t mon_ptr = reinterpret_cast<uintptr_t>(mon);
+    return base_addr <= mon_ptr && (mon_ptr - base_addr < kChunkSize);
+  }
+
+  // Note: This is safe as we do not ever move chunks.
+  MonitorId ComputeMonitorIdInPool(Monitor* mon, Thread* self) {
+    MutexLock mu(self, *Locks::allocated_monitor_ids_lock_);
+    for (size_t index = 0; index < num_chunks_; ++index) {
+      uintptr_t chunk_addr = *(monitor_chunks_.LoadRelaxed() + index);
+      if (IsInChunk(chunk_addr, mon)) {
+        return OffsetToMonitorId(reinterpret_cast<uintptr_t>(mon) - chunk_addr + index * kChunkSize);
+      }
+    }
+    LOG(FATAL) << "Did not find chunk that contains monitor.";
+    return 0;
+  }
+
+  static size_t MonitorIdToOffset(MonitorId id) {
+    return id << 3;
+  }
+
+  static MonitorId OffsetToMonitorId(size_t offset) {
+    return static_cast<MonitorId>(offset >> 3);
+  }
+
+  // TODO: There are assumptions in the code that monitor addresses are 8B aligned (>>3).
+  static constexpr size_t kMonitorAlignment = 8;
+  // Size of a monitor, rounded up to a multiple of alignment.
+  static constexpr size_t kAlignedMonitorSize = (sizeof(Monitor) + kMonitorAlignment - 1) &
+                                                -kMonitorAlignment;
+  // As close to a page as we can get seems a good start.
+  static constexpr size_t kChunkCapacity = kPageSize / kAlignedMonitorSize;
+  // Chunk size that is referenced in the id. We can collapse this to the actually used storage
+  // in a chunk, i.e., kChunkCapacity * kAlignedMonitorSize, but this will mean proper divisions.
+  static constexpr size_t kChunkSize = kPageSize;
+  // The number of initial chunks storable in monitor_chunks_. The number is large enough to make
+  // resizing unlikely, but small enough to not waste too much memory.
+  static constexpr size_t kInitialChunkStorage = 8U;
+
+  // List of memory chunks. Each chunk is kChunkSize.
+  Atomic<uintptr_t*> monitor_chunks_;
+  // Number of chunks stored.
+  size_t num_chunks_ GUARDED_BY(Locks::allocated_monitor_ids_lock_);
+  // Number of chunks storable.
+  size_t capacity_ GUARDED_BY(Locks::allocated_monitor_ids_lock_);
+
+  // To avoid race issues when resizing, we keep all the previous arrays.
+  std::vector<uintptr_t*> old_chunk_arrays_ GUARDED_BY(Locks::allocated_monitor_ids_lock_);
+
+  // Start of free list of monitors.
+  // Note: these point to the right memory regions, but do *not* denote initialized objects.
+  Monitor* first_free_ GUARDED_BY(Locks::allocated_monitor_ids_lock_);
 #endif
 };