Add native memory accounting through custom allocator.

Added a custom allocator that lets you pass in a special tag which
specifices where the allocation came from. This is used when
dumping. The performance overhead is low since each allocation only
does a atomic add/sub for each allocation/free.

The measurements are dumped to traces.txt during SIGQUIT.

Example output:
I/art     (27274): AllocatorTagHeap active=120 max=120 total=168
I/art     (27274): AllocatorTagMonitorList active=1572 max=6240 total=11724
I/art     (27274): AllocatorTagClassTable active=185208 max=185208 total=268608
I/art     (27274): AllocatorTagInternTable active=430368 max=430368 total=436080
I/art     (27274): AllocatorTagMaps active=5616 max=6168 total=34392
I/art     (27274): AllocatorTagLOS active=1024 max=1536 total=2044
I/art     (27274): AllocatorTagSafeMap active=0 max=51936 total=533688
I/art     (27274): AllocatorTagLOSMaps active=144 max=1248 total=5760
I/art     (27274): AllocatorTagReferenceTable active=10944 max=11840 total=19136
I/art     (27274): AllocatorTagHeapBitmap active=32 max=40 total=56
I/art     (27274): AllocatorTagHeapBitmapLOS active=8 max=8 total=8
I/art     (27274): AllocatorTagVerifier active=0 max=18844 total=1073156
I/art     (27274): AllocatorTagModUnionCardSet active=5300 max=5920 total=56020
I/art     (27274): AllocatorTagModUnionReferenceArray active=24864 max=24864 total=24864
I/art     (27274): AllocatorTagJNILibrarires active=320 max=320 total=320
I/art     (27274): AllocatorTagOatFile active=1400 max=1400 total=5852

Change-Id: Ibb470ef2e9c9a24563bb46422d46a55799704d82

(cherry picked from commit 5369c40f75fdcb1be7a7c06db212ce965c83a164)
diff --git a/runtime/monitor_pool.h b/runtime/monitor_pool.h
index 5bc28f1..cb45162 100644
--- a/runtime/monitor_pool.h
+++ b/runtime/monitor_pool.h
@@ -19,6 +19,7 @@
 
 #include "monitor.h"
 
+#include "base/allocator.h"
 #ifdef __LP64__
 #include <stdint.h>
 #include "atomic.h"
@@ -58,7 +59,7 @@
 #endif
   }
 
-  static void ReleaseMonitors(Thread* self, std::list<Monitor*>* monitors) {
+  static void ReleaseMonitors(Thread* self, MonitorList::Monitors* monitors) {
 #ifndef __LP64__
     STLDeleteElements(monitors);
 #else
@@ -110,7 +111,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void ReleaseMonitorToPool(Thread* self, Monitor* monitor);
-  void ReleaseMonitorsToPool(Thread* self, std::list<Monitor*>* monitors);
+  void ReleaseMonitorsToPool(Thread* self, MonitorList::Monitors* monitors);
 
   // Note: This is safe as we do not ever move chunks.
   Monitor* LookupMonitor(MonitorId mon_id) {
@@ -171,6 +172,9 @@
   // To avoid race issues when resizing, we keep all the previous arrays.
   std::vector<uintptr_t*> old_chunk_arrays_ GUARDED_BY(Locks::allocated_monitor_ids_lock_);
 
+  typedef TrackingAllocator<byte, kAllocatorTagMonitorPool> Allocator;
+  Allocator allocator_;
+
   // Start of free list of monitors.
   // Note: these point to the right memory regions, but do *not* denote initialized objects.
   Monitor* first_free_ GUARDED_BY(Locks::allocated_monitor_ids_lock_);