RosAlloc verification.

If enabled, RosAlloc verification checks the allocator internal
metadata and invariants to detect bugs, heap corruptions, and race
conditions. Added runtime options for enabling and disabling
it. Enable it for the debug build.

Bug: 9986565
Bug: 12592026
Change-Id: I923742b87805ae839f1549d78d0d492733da6a58
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 6c9e6f2..65d4c441 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -15,6 +15,9 @@
  */
 
 #include "base/mutex-inl.h"
+#include "mirror/class-inl.h"
+#include "mirror/object.h"
+#include "mirror/object-inl.h"
 #include "thread-inl.h"
 #include "thread_list.h"
 #include "rosalloc.h"
@@ -749,21 +752,35 @@
   }
 }
 
-void RosAlloc::Run::Dump() {
-  size_t idx = size_bracket_idx_;
-  size_t num_slots = numOfSlots[idx];
-  size_t num_vec = RoundUp(num_slots, 32) / 32;
+std::string RosAlloc::Run::BitMapToStr(uint32_t* bit_map_base, size_t num_vec) {
   std::string bit_map_str;
   for (size_t v = 0; v < num_vec; v++) {
-    uint32_t vec = alloc_bit_map_[v];
+    uint32_t vec = bit_map_base[v];
     if (v != num_vec - 1) {
       bit_map_str.append(StringPrintf("%x-", vec));
     } else {
       bit_map_str.append(StringPrintf("%x", vec));
     }
   }
-  LOG(INFO) << "Run : " << std::hex << reinterpret_cast<intptr_t>(this)
-            << std::dec << ", idx=" << idx << ", bit_map=" << bit_map_str;
+  return bit_map_str.c_str();
+}
+
+std::string RosAlloc::Run::Dump() {
+  size_t idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  std::ostringstream stream;
+  stream << "RosAlloc Run = " << reinterpret_cast<void*>(this)
+         << "{ magic_num=" << static_cast<int>(magic_num_)
+         << " size_bracket_idx=" << idx
+         << " is_thread_local=" << static_cast<int>(is_thread_local_)
+         << " to_be_bulk_freed=" << static_cast<int>(to_be_bulk_freed_)
+         << " top_slot_idx=" << top_slot_idx_
+         << " alloc_bit_map=" << BitMapToStr(alloc_bit_map_, num_vec)
+         << " bulk_free_bit_map=" << BitMapToStr(BulkFreeBitMap(), num_vec)
+         << " thread_local_bit_map=" << BitMapToStr(ThreadLocalFreeBitMap(), num_vec)
+         << " }" << std::endl;
+  return stream.str();
 }
 
 void* RosAlloc::Run::AllocSlot() {
@@ -849,7 +866,7 @@
   size_t num_vec = RoundUp(num_slots, 32) / 32;
   bool changed = false;
   uint32_t* vecp = &alloc_bit_map_[0];
-  uint32_t* tl_free_vecp = &thread_local_free_bit_map()[0];
+  uint32_t* tl_free_vecp = &ThreadLocalFreeBitMap()[0];
   bool is_all_free_after = true;
   for (size_t v = 0; v < num_vec; v++, vecp++, tl_free_vecp++) {
     uint32_t tl_free_vec = *tl_free_vecp;
@@ -881,7 +898,7 @@
   size_t num_slots = numOfSlots[idx];
   size_t num_vec = RoundUp(num_slots, 32) / 32;
   uint32_t* vecp = &alloc_bit_map_[0];
-  uint32_t* free_vecp = &bulk_free_bit_map()[0];
+  uint32_t* free_vecp = &BulkFreeBitMap()[0];
   for (size_t v = 0; v < num_vec; v++, vecp++, free_vecp++) {
     uint32_t free_vec = *free_vecp;
     if (free_vec != 0) {
@@ -898,8 +915,8 @@
   byte idx = size_bracket_idx_;
   size_t num_slots = numOfSlots[idx];
   size_t num_vec = RoundUp(num_slots, 32) / 32;
-  uint32_t* to_vecp = &thread_local_free_bit_map()[0];
-  uint32_t* from_vecp = &bulk_free_bit_map()[0];
+  uint32_t* to_vecp = &ThreadLocalFreeBitMap()[0];
+  uint32_t* from_vecp = &BulkFreeBitMap()[0];
   for (size_t v = 0; v < num_vec; v++, to_vecp++, from_vecp++) {
     uint32_t from_vec = *from_vecp;
     if (from_vec != 0) {
@@ -912,11 +929,11 @@
 
 inline void RosAlloc::Run::MarkThreadLocalFreeBitMap(void* ptr) {
   DCHECK_NE(is_thread_local_, 0);
-  MarkFreeBitMapShared(ptr, thread_local_free_bit_map(), "MarkThreadLocalFreeBitMap");
+  MarkFreeBitMapShared(ptr, ThreadLocalFreeBitMap(), "MarkThreadLocalFreeBitMap");
 }
 
 inline void RosAlloc::Run::MarkBulkFreeBitMap(void* ptr) {
-  MarkFreeBitMapShared(ptr, bulk_free_bit_map(), "MarkFreeBitMap");
+  MarkFreeBitMapShared(ptr, BulkFreeBitMap(), "MarkFreeBitMap");
 }
 
 inline void RosAlloc::Run::MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base,
@@ -975,6 +992,32 @@
   return true;
 }
 
+inline bool RosAlloc::Run::IsBulkFreeBitmapClean() {
+  byte idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  for (size_t v = 0; v < num_vec; v++) {
+    uint32_t vec = BulkFreeBitMap()[v];
+    if (vec != 0) {
+      return false;
+    }
+  }
+  return true;
+}
+
+inline bool RosAlloc::Run::IsThreadLocalFreeBitmapClean() {
+  byte idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  for (size_t v = 0; v < num_vec; v++) {
+    uint32_t vec = ThreadLocalFreeBitMap()[v];
+    if (vec != 0) {
+      return false;
+    }
+  }
+  return true;
+}
+
 inline void RosAlloc::Run::ClearBitMaps() {
   byte idx = size_bracket_idx_;
   size_t num_slots = numOfSlots[idx];
@@ -1196,8 +1239,10 @@
   }
 }
 
-void RosAlloc::DumpPageMap(Thread* self) {
-  MutexLock mu(self, lock_);
+std::string RosAlloc::DumpPageMap() {
+  std::ostringstream stream;
+  stream << "RosAlloc PageMap: " << std::endl;
+  lock_.AssertHeld(Thread::Current());
   size_t end = page_map_.size();
   FreePageRun* curr_fpr = NULL;
   size_t curr_fpr_size = 0;
@@ -1218,15 +1263,15 @@
           curr_fpr_size = fpr->ByteSize(this);
           DCHECK_EQ(curr_fpr_size % kPageSize, static_cast<size_t>(0));
           remaining_curr_fpr_size = curr_fpr_size - kPageSize;
-          LOG(INFO) << "[" << i << "]=Empty (FPR start)"
-                    << " fpr_size=" << curr_fpr_size
-                    << " remaining_fpr_size=" << remaining_curr_fpr_size;
+          stream << "[" << i << "]=Empty (FPR start)"
+                 << " fpr_size=" << curr_fpr_size
+                 << " remaining_fpr_size=" << remaining_curr_fpr_size << std::endl;
           if (remaining_curr_fpr_size == 0) {
             // Reset at the end of the current free page run.
             curr_fpr = NULL;
             curr_fpr_size = 0;
           }
-          LOG(INFO) << "curr_fpr=0x" << std::hex << reinterpret_cast<intptr_t>(curr_fpr);
+          stream << "curr_fpr=0x" << std::hex << reinterpret_cast<intptr_t>(curr_fpr) << std::endl;
           DCHECK_EQ(num_running_empty_pages, static_cast<size_t>(0));
         } else {
           // Still part of the current free page run.
@@ -1235,8 +1280,8 @@
           DCHECK_EQ(remaining_curr_fpr_size % kPageSize, static_cast<size_t>(0));
           DCHECK_GE(remaining_curr_fpr_size, static_cast<size_t>(kPageSize));
           remaining_curr_fpr_size -= kPageSize;
-          LOG(INFO) << "[" << i << "]=Empty (FPR part)"
-                    << " remaining_fpr_size=" << remaining_curr_fpr_size;
+          stream << "[" << i << "]=Empty (FPR part)"
+                 << " remaining_fpr_size=" << remaining_curr_fpr_size << std::endl;
           if (remaining_curr_fpr_size == 0) {
             // Reset at the end of the current free page run.
             curr_fpr = NULL;
@@ -1249,36 +1294,38 @@
       case kPageMapLargeObject: {
         DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
         num_running_empty_pages = 0;
-        LOG(INFO) << "[" << i << "]=Large (start)";
+        stream << "[" << i << "]=Large (start)" << std::endl;
         break;
       }
       case kPageMapLargeObjectPart:
         DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
         num_running_empty_pages = 0;
-        LOG(INFO) << "[" << i << "]=Large (part)";
+        stream << "[" << i << "]=Large (part)" << std::endl;
         break;
       case kPageMapRun: {
         DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
         num_running_empty_pages = 0;
         Run* run = reinterpret_cast<Run*>(base_ + i * kPageSize);
         size_t idx = run->size_bracket_idx_;
-        LOG(INFO) << "[" << i << "]=Run (start)"
-                  << " idx=" << idx
-                  << " numOfPages=" << numOfPages[idx]
-                  << " thread_local=" << static_cast<int>(run->is_thread_local_)
-                  << " is_all_free=" << (run->IsAllFree() ? 1 : 0);
+        stream << "[" << i << "]=Run (start)"
+               << " idx=" << idx
+               << " numOfPages=" << numOfPages[idx]
+               << " thread_local=" << static_cast<int>(run->is_thread_local_)
+               << " is_all_free=" << (run->IsAllFree() ? 1 : 0)
+               << std::endl;
         break;
       }
       case kPageMapRunPart:
         DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
         num_running_empty_pages = 0;
-        LOG(INFO) << "[" << i << "]=Run (part)";
+        stream << "[" << i << "]=Run (part)" << std::endl;
         break;
       default:
-        LOG(FATAL) << "Unreachable - page map type: " << pm;
+        stream << "[" << i << "]=Unrecognizable page map type: " << pm;
         break;
     }
   }
+  return stream.str();
 }
 
 size_t RosAlloc::UsableSize(void* ptr) {
@@ -1631,6 +1678,223 @@
   ++(*objects_allocated);
 }
 
+void RosAlloc::Verify() {
+  Thread* self = Thread::Current();
+  CHECK(Locks::mutator_lock_->IsExclusiveHeld(self))
+      << "The mutator locks isn't exclusively locked at RosAlloc::Verify()";
+  MutexLock mu(self, *Locks::thread_list_lock_);
+  WriterMutexLock wmu(self, bulk_free_lock_);
+  std::vector<Run*> runs;
+  {
+    MutexLock mu(self, lock_);
+    size_t pm_end = page_map_.size();
+    size_t i = 0;
+    while (i < pm_end) {
+      byte pm = page_map_[i];
+      switch (pm) {
+        case kPageMapEmpty: {
+          // The start of a free page run.
+          FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
+          DCHECK(fpr->magic_num_ == kMagicNumFree) << "Bad magic number : " << fpr->magic_num_;
+          CHECK(free_page_runs_.find(fpr) != free_page_runs_.end())
+              << "An empty page must belong to the free page run set";
+          size_t fpr_size = fpr->ByteSize(this);
+          CHECK(IsAligned<kPageSize>(fpr_size))
+              << "A free page run size isn't page-aligned : " << fpr_size;
+          size_t num_pages = fpr_size / kPageSize;
+          CHECK_GT(num_pages, static_cast<uintptr_t>(0))
+              << "A free page run size must be > 0 : " << fpr_size;
+          for (size_t j = i + 1; j < i + num_pages; ++j) {
+            CHECK_EQ(page_map_[j], kPageMapEmpty)
+                << "A mismatch between the page map table for kPageMapEmpty "
+                << " at page index " << j
+                << " and the free page run size : page index range : "
+                << i << " to " << (i + num_pages) << std::endl << DumpPageMap();
+          }
+          i += num_pages;
+          CHECK_LE(i, pm_end) << "Page map index " << i << " out of range < " << pm_end
+                              << std::endl << DumpPageMap();
+          break;
+        }
+        case kPageMapLargeObject: {
+          // The start of a large object.
+          size_t num_pages = 1;
+          size_t idx = i + 1;
+          while (idx < pm_end && page_map_[idx] == kPageMapLargeObjectPart) {
+            num_pages++;
+            idx++;
+          }
+          void* start = base_ + i * kPageSize;
+          mirror::Object* obj = reinterpret_cast<mirror::Object*>(start);
+          size_t obj_size = obj->SizeOf();
+          CHECK(obj_size > kLargeSizeThreshold)
+              << "A rosalloc large object size must be > " << kLargeSizeThreshold;
+          CHECK_EQ(num_pages, RoundUp(obj_size, kPageSize) / kPageSize)
+              << "A rosalloc large object size " << obj_size
+              << " does not match the page map table " << (num_pages * kPageSize)
+              << std::endl << DumpPageMap();
+          i += num_pages;
+          CHECK_LE(i, pm_end) << "Page map index " << i << " out of range < " << pm_end
+                              << std::endl << DumpPageMap();
+          break;
+        }
+        case kPageMapLargeObjectPart:
+          LOG(FATAL) << "Unreachable - page map type: " << pm << std::endl << DumpPageMap();
+          break;
+        case kPageMapRun: {
+          // The start of a run.
+          Run* run = reinterpret_cast<Run*>(base_ + i * kPageSize);
+          DCHECK(run->magic_num_ == kMagicNum) << "Bad magic number" << run->magic_num_;
+          size_t idx = run->size_bracket_idx_;
+          CHECK(idx < kNumOfSizeBrackets) << "Out of range size bracket index : " << idx;
+          size_t num_pages = numOfPages[idx];
+          CHECK_GT(num_pages, static_cast<uintptr_t>(0))
+              << "Run size must be > 0 : " << num_pages;
+          for (size_t j = i + 1; j < i + num_pages; ++j) {
+            CHECK_EQ(page_map_[j], kPageMapRunPart)
+                << "A mismatch between the page map table for kPageMapRunPart "
+                << " at page index " << j
+                << " and the run size : page index range " << i << " to " << (i + num_pages)
+                << std::endl << DumpPageMap();
+          }
+          runs.push_back(run);
+          i += num_pages;
+          CHECK_LE(i, pm_end) << "Page map index " << i << " out of range < " << pm_end
+                              << std::endl << DumpPageMap();
+          break;
+        }
+        case kPageMapRunPart:
+          LOG(FATAL) << "Unreachable - page map type: " << pm << std::endl << DumpPageMap();
+          break;
+        default:
+          LOG(FATAL) << "Unreachable - page map type: " << pm << std::endl << DumpPageMap();
+          break;
+      }
+    }
+  }
+
+  // Call Verify() here for the lock order.
+  for (auto& run : runs) {
+    run->Verify(self, this);
+  }
+}
+
+void RosAlloc::Run::Verify(Thread* self, RosAlloc* rosalloc) {
+  DCHECK(magic_num_ == kMagicNum) << "Bad magic number : " << Dump();
+  size_t idx = size_bracket_idx_;
+  CHECK(idx < kNumOfSizeBrackets) << "Out of range size bracket index : " << Dump();
+  byte* slot_base = reinterpret_cast<byte*>(this) + headerSizes[idx];
+  size_t num_slots = numOfSlots[idx];
+  size_t bracket_size = IndexToBracketSize(idx);
+  CHECK_EQ(slot_base + num_slots * bracket_size,
+           reinterpret_cast<byte*>(this) + numOfPages[idx] * kPageSize)
+      << "Mismatch in the end address of the run " << Dump();
+  // Check that the bulk free bitmap is clean. It's only used during BulkFree().
+  CHECK(IsBulkFreeBitmapClean()) << "The bulk free bit map isn't clean " << Dump();
+  // Check the bump index mode, if it's on.
+  if (top_slot_idx_ < num_slots) {
+    // If the bump index mode is on (top_slot_idx_ < num_slots), then
+    // all of the slots after the top index must be free.
+    for (size_t i = top_slot_idx_; i < num_slots; ++i) {
+      size_t vec_idx = i / 32;
+      size_t vec_off = i % 32;
+      uint32_t vec = alloc_bit_map_[vec_idx];
+      CHECK_EQ((vec & (1 << vec_off)), static_cast<uint32_t>(0))
+          << "A slot >= top_slot_idx_ isn't free " << Dump();
+    }
+  } else {
+    CHECK_EQ(top_slot_idx_, num_slots)
+        << "If the bump index mode is off, the top index == the number of slots "
+        << Dump();
+  }
+  // Check the thread local runs, the current runs, and the run sets.
+  if (is_thread_local_) {
+    // If it's a thread local run, then it must be pointed to by an owner thread.
+    bool owner_found = false;
+    std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
+    for (auto it = thread_list.begin(); it != thread_list.end(); ++it) {
+      Thread* thread = *it;
+      for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
+        MutexLock mu(self, *rosalloc->size_bracket_locks_[i]);
+        Run* thread_local_run = reinterpret_cast<Run*>(thread->rosalloc_runs_[i]);
+        if (thread_local_run == this) {
+          CHECK(!owner_found)
+              << "A thread local run has more than one owner thread " << Dump();
+          CHECK_EQ(i, idx)
+              << "A mismatching size bracket index in a thread local run " << Dump();
+          owner_found = true;
+        }
+      }
+    }
+    CHECK(owner_found) << "A thread local run has no owner thread " << Dump();
+  } else {
+    // If it's not thread local, check that the thread local free bitmap is clean.
+    CHECK(IsThreadLocalFreeBitmapClean())
+        << "A non-thread-local run's thread local free bitmap isn't clean "
+        << Dump();
+    // Check if it's a current run for the size bucket.
+    bool is_current_run = false;
+    for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
+      MutexLock mu(self, *rosalloc->size_bracket_locks_[i]);
+      Run* current_run = rosalloc->current_runs_[i];
+      if (idx == i) {
+        if (this == current_run) {
+          is_current_run = true;
+        }
+      } else {
+        // If the size bucket index does not match, then it must not
+        // be a current run.
+        CHECK_NE(this, current_run)
+            << "A current run points to a run with a wrong size bracket index " << Dump();
+      }
+    }
+    // If it's neither a thread local or current run, then it must be
+    // in a run set.
+    if (!is_current_run) {
+      MutexLock mu(self, rosalloc->lock_);
+      std::set<Run*>& non_full_runs = rosalloc->non_full_runs_[idx];
+      // If it's all free, it must be a free page run rather than a run.
+      CHECK(!IsAllFree()) << "A free run must be in a free page run set " << Dump();
+      if (!IsFull()) {
+        // If it's not full, it must in the non-full run set.
+        CHECK(non_full_runs.find(this) != non_full_runs.end())
+            << "A non-full run isn't in the non-full run set " << Dump();
+      } else {
+        // If it's full, it must in the full run set (debug build only.)
+        if (kIsDebugBuild) {
+          hash_set<Run*, hash_run, eq_run>& full_runs = rosalloc->full_runs_[idx];
+          CHECK(full_runs.find(this) != full_runs.end())
+              << " A full run isn't in the full run set " << Dump();
+        }
+      }
+    }
+  }
+  // Check each slot.
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  size_t slots = 0;
+  for (size_t v = 0; v < num_vec; v++, slots += 32) {
+    DCHECK(num_slots >= slots) << "Out of bounds";
+    uint32_t vec = alloc_bit_map_[v];
+    uint32_t thread_local_free_vec = ThreadLocalFreeBitMap()[v];
+    size_t end = std::min(num_slots - slots, static_cast<size_t>(32));
+    for (size_t i = 0; i < end; ++i) {
+      bool is_allocated = ((vec >> i) & 0x1) != 0;
+      // If a thread local run, slots may be marked freed in the
+      // thread local free bitmap.
+      bool is_thread_local_freed = is_thread_local_ && ((thread_local_free_vec >> i) & 0x1) != 0;
+      if (is_allocated && !is_thread_local_freed) {
+        byte* slot_addr = slot_base + (slots + i) * bracket_size;
+        mirror::Object* obj = reinterpret_cast<mirror::Object*>(slot_addr);
+        size_t obj_size = obj->SizeOf();
+        CHECK_LE(obj_size, kLargeSizeThreshold)
+            << "A run slot contains a large object " << Dump();
+        CHECK_EQ(SizeToIndex(obj_size), idx)
+            << "A run slot contains an object with wrong size " << Dump();
+      }
+    }
+  }
+}
+
 }  // namespace allocator
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index 7480975..c4238c7 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -212,11 +212,11 @@
       return size;
     }
     // Returns the base address of the free bit map.
-    uint32_t* bulk_free_bit_map() {
+    uint32_t* BulkFreeBitMap() {
       return reinterpret_cast<uint32_t*>(reinterpret_cast<byte*>(this) + bulkFreeBitMapOffsets[size_bracket_idx_]);
     }
     // Returns the base address of the thread local free bit map.
-    uint32_t* thread_local_free_bit_map() {
+    uint32_t* ThreadLocalFreeBitMap() {
       return reinterpret_cast<uint32_t*>(reinterpret_cast<byte*>(this) + threadLocalFreeBitMapOffsets[size_bracket_idx_]);
     }
     void* End() {
@@ -248,16 +248,26 @@
     bool IsAllFree();
     // Returns true if all the slots in the run are in use.
     bool IsFull();
+    // Returns true if the bulk free bit map is clean.
+    bool IsBulkFreeBitmapClean();
+    // Returns true if the thread local free bit map is clean.
+    bool IsThreadLocalFreeBitmapClean();
     // Clear all the bit maps.
     void ClearBitMaps();
     // Iterate over all the slots and apply the given function.
     void InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg), void* arg);
     // Dump the run metadata for debugging.
-    void Dump();
+    std::string Dump();
+    // Verify for debugging.
+    void Verify(Thread* self, RosAlloc* rosalloc)
+        EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+        EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_list_lock_);
 
    private:
     // The common part of MarkFreeBitMap() and MarkThreadLocalFreeBitMap().
     void MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base, const char* caller_name);
+    // Turns the bit map into a string for debugging.
+    static std::string BitMapToStr(uint32_t* bit_map_base, size_t num_vec);
   };
 
   // The magic number for a run.
@@ -531,7 +541,7 @@
   // Releases the thread-local runs assigned to all the threads back to the common set of runs.
   void RevokeAllThreadLocalRuns() LOCKS_EXCLUDED(Locks::thread_list_lock_);
   // Dumps the page map for debugging.
-  void DumpPageMap(Thread* self);
+  std::string DumpPageMap() EXCLUSIVE_LOCKS_REQUIRED(lock_);
 
   // Callbacks for InspectAll that will count the number of bytes
   // allocated and objects allocated, respectively.
@@ -541,6 +551,9 @@
   bool DoesReleaseAllPages() const {
     return page_release_mode_ == kPageReleaseModeAll;
   }
+
+  // Verify for debugging.
+  void Verify() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
 };
 
 }  // namespace allocator
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index ae04074..094e274 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -88,14 +88,18 @@
     // Mutator lock may be already exclusively held when we do garbage collections for changing the
     // current collector / allocator during process state updates.
     if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
+      // PreGcRosAllocVerification() is called in Heap::TransitionCollector().
       GetHeap()->RevokeAllThreadLocalBuffers();
       MarkingPhase();
       ReclaimPhase();
+      // PostGcRosAllocVerification() is called in Heap::TransitionCollector().
     } else {
       thread_list->SuspendAll();
+      GetHeap()->PreGcRosAllocVerification(&timings_);
       GetHeap()->RevokeAllThreadLocalBuffers();
       MarkingPhase();
       ReclaimPhase();
+      GetHeap()->PostGcRosAllocVerification(&timings_);
       thread_list->ResumeAll();
     }
     ATRACE_END();
@@ -114,10 +118,12 @@
       thread_list->SuspendAll();
       ATRACE_END();
       ATRACE_BEGIN("All mutator threads suspended");
+      GetHeap()->PreGcRosAllocVerification(&timings_);
       done = HandleDirtyObjectsPhase();
       if (done) {
         GetHeap()->RevokeAllThreadLocalBuffers();
       }
+      GetHeap()->PostGcRosAllocVerification(&timings_);
       ATRACE_END();
       uint64_t pause_end = NanoTime();
       ATRACE_BEGIN("Resuming mutator threads");
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 03307f5..0c6a938 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -182,6 +182,7 @@
     }
   }
   Locks::mutator_lock_->AssertExclusiveHeld(self_);
+
   TimingLogger::ScopedSplit split("MarkingPhase", &timings_);
   // Need to do this with mutators paused so that somebody doesn't accidentally allocate into the
   // wrong space.
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index b1bbfc6..f438ca0 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -81,7 +81,8 @@
            size_t parallel_gc_threads, size_t conc_gc_threads, bool low_memory_mode,
            size_t long_pause_log_threshold, size_t long_gc_log_threshold,
            bool ignore_max_footprint, bool use_tlab, bool verify_pre_gc_heap,
-           bool verify_post_gc_heap)
+           bool verify_post_gc_heap, bool verify_pre_gc_rosalloc,
+           bool verify_post_gc_rosalloc)
     : non_moving_space_(nullptr),
       rosalloc_space_(nullptr),
       dlmalloc_space_(nullptr),
@@ -124,6 +125,8 @@
       verify_pre_gc_heap_(verify_pre_gc_heap),
       verify_post_gc_heap_(verify_post_gc_heap),
       verify_mod_union_table_(false),
+      verify_pre_gc_rosalloc_(verify_pre_gc_rosalloc),
+      verify_post_gc_rosalloc_(verify_post_gc_rosalloc),
       last_trim_time_ms_(0),
       allocation_rate_(0),
       /* For GC a lot mode, we limit the allocations stacks to be kGcAlotInterval allocations. This
@@ -1189,6 +1192,8 @@
   if (collector_type == collector_type_) {
     return;
   }
+  VLOG(heap) << "TransitionCollector: " << static_cast<int>(collector_type_)
+             << " -> " << static_cast<int>(collector_type);
   uint64_t start_time = NanoTime();
   uint32_t before_size  = GetTotalMemory();
   uint32_t before_allocated = num_bytes_allocated_.Load();
@@ -1216,6 +1221,7 @@
     usleep(1000);
   }
   tl->SuspendAll();
+  PreGcRosAllocVerification(&semi_space_collector_->GetTimings());
   switch (collector_type) {
     case kCollectorTypeSS:
       // Fall-through.
@@ -1265,6 +1271,7 @@
     }
   }
   ChangeCollector(collector_type);
+  PostGcRosAllocVerification(&semi_space_collector_->GetTimings());
   tl->ResumeAll();
   // Can't call into java code with all threads suspended.
   EnqueueClearedReferences();
@@ -1447,6 +1454,9 @@
   ChangeCollector(post_zygote_collector_type_);
   // TODO: Delete bump_pointer_space_ and temp_pointer_space_?
   if (semi_space_collector_ != nullptr) {
+    // Temporarily disable rosalloc verification because the zygote
+    // compaction will mess up the rosalloc internal metadata.
+    ScopedDisableRosAllocVerification disable_rosalloc_verif(this);
     ZygoteCompactingCollector zygote_collector(this);
     zygote_collector.BuildBins(non_moving_space_);
     // Create a new bump pointer space which we will compact into.
@@ -2108,6 +2118,32 @@
   }
 }
 
+void Heap::PreGcRosAllocVerification(TimingLogger* timings) {
+  if (verify_pre_gc_rosalloc_) {
+    TimingLogger::ScopedSplit split("PreGcRosAllocVerification", timings);
+    for (const auto& space : continuous_spaces_) {
+      if (space->IsRosAllocSpace()) {
+        VLOG(heap) << "PreGcRosAllocVerification : " << space->GetName();
+        space::RosAllocSpace* rosalloc_space = space->AsRosAllocSpace();
+        rosalloc_space->Verify();
+      }
+    }
+  }
+}
+
+void Heap::PostGcRosAllocVerification(TimingLogger* timings) {
+  if (verify_post_gc_rosalloc_) {
+    TimingLogger::ScopedSplit split("PostGcRosAllocVerification", timings);
+    for (const auto& space : continuous_spaces_) {
+      if (space->IsRosAllocSpace()) {
+        VLOG(heap) << "PostGcRosAllocVerification : " << space->GetName();
+        space::RosAllocSpace* rosalloc_space = space->AsRosAllocSpace();
+        rosalloc_space->Verify();
+      }
+    }
+  }
+}
+
 collector::GcType Heap::WaitForGcToComplete(Thread* self) {
   ScopedThreadStateChange tsc(self, kWaitingForGcToComplete);
   MutexLock mu(self, *gc_complete_lock_);
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 499d27c..f35ff4f 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -150,7 +150,8 @@
                 size_t parallel_gc_threads, size_t conc_gc_threads, bool low_memory_mode,
                 size_t long_pause_threshold, size_t long_gc_threshold,
                 bool ignore_max_footprint, bool use_tlab, bool verify_pre_gc_heap,
-                bool verify_post_gc_heap);
+                bool verify_post_gc_heap, bool verify_pre_gc_rosalloc,
+                bool verify_post_gc_rosalloc);
 
   ~Heap();
 
@@ -440,6 +441,11 @@
   void RevokeThreadLocalBuffers(Thread* thread);
   void RevokeAllThreadLocalBuffers();
 
+  void PreGcRosAllocVerification(TimingLogger* timings)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void PostGcRosAllocVerification(TimingLogger* timings)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   accounting::HeapBitmap* GetLiveBitmap() SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
     return live_bitmap_.get();
   }
@@ -796,6 +802,29 @@
   const bool verify_pre_gc_heap_;
   const bool verify_post_gc_heap_;
   const bool verify_mod_union_table_;
+  bool verify_pre_gc_rosalloc_;
+  bool verify_post_gc_rosalloc_;
+
+  // RAII that temporarily disables the rosalloc verification during
+  // the zygote fork.
+  class ScopedDisableRosAllocVerification {
+   private:
+    Heap* heap_;
+    bool orig_verify_pre_gc_;
+    bool orig_verify_post_gc_;
+   public:
+    explicit ScopedDisableRosAllocVerification(Heap* heap)
+        : heap_(heap),
+          orig_verify_pre_gc_(heap_->verify_pre_gc_rosalloc_),
+          orig_verify_post_gc_(heap_->verify_post_gc_rosalloc_) {
+      heap_->verify_pre_gc_rosalloc_ = false;
+      heap_->verify_post_gc_rosalloc_ = false;
+    }
+    ~ScopedDisableRosAllocVerification() {
+      heap_->verify_pre_gc_rosalloc_ = orig_verify_pre_gc_;
+      heap_->verify_post_gc_rosalloc_ = orig_verify_post_gc_;
+    }
+  };
 
   // Parallel GC data structures.
   UniquePtr<ThreadPool> thread_pool_;
diff --git a/runtime/gc/space/rosalloc_space.h b/runtime/gc/space/rosalloc_space.h
index 4cd5a6d..2377423 100644
--- a/runtime/gc/space/rosalloc_space.h
+++ b/runtime/gc/space/rosalloc_space.h
@@ -104,6 +104,10 @@
     return this;
   }
 
+  void Verify() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    rosalloc_->Verify();
+  }
+
  protected:
   RosAllocSpace(const std::string& name, MemMap* mem_map, allocator::RosAlloc* rosalloc,
                 byte* begin, byte* end, byte* limit, size_t growth_limit);
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 4e90478..17afb43 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -429,6 +429,8 @@
   parsed->use_tlab_ = false;
   parsed->verify_pre_gc_heap_ = false;
   parsed->verify_post_gc_heap_ = kIsDebugBuild;
+  parsed->verify_pre_gc_rosalloc_ = kIsDebugBuild;
+  parsed->verify_post_gc_rosalloc_ = false;
 
   parsed->compiler_callbacks_ = nullptr;
   parsed->is_zygote_ = false;
@@ -615,12 +617,20 @@
           parsed->collector_type_ = collector_type;
         } else if (gc_option == "preverify") {
           parsed->verify_pre_gc_heap_ = true;
-        }  else if (gc_option == "nopreverify") {
+        } else if (gc_option == "nopreverify") {
           parsed->verify_pre_gc_heap_ = false;
         }  else if (gc_option == "postverify") {
           parsed->verify_post_gc_heap_ = true;
         } else if (gc_option == "nopostverify") {
           parsed->verify_post_gc_heap_ = false;
+        } else if (gc_option == "preverify_rosalloc") {
+          parsed->verify_pre_gc_rosalloc_ = true;
+        } else if (gc_option == "nopreverify_rosalloc") {
+          parsed->verify_pre_gc_rosalloc_ = false;
+        } else if (gc_option == "postverify_rosalloc") {
+          parsed->verify_post_gc_rosalloc_ = true;
+        } else if (gc_option == "nopostverify_rosalloc") {
+          parsed->verify_post_gc_rosalloc_ = false;
         } else {
           LOG(WARNING) << "Ignoring unknown -Xgc option: " << gc_option;
         }
@@ -1018,7 +1028,9 @@
                        options->ignore_max_footprint_,
                        options->use_tlab_,
                        options->verify_pre_gc_heap_,
-                       options->verify_post_gc_heap_);
+                       options->verify_post_gc_heap_,
+                       options->verify_pre_gc_rosalloc_,
+                       options->verify_post_gc_rosalloc_);
 
   dump_gc_performance_on_shutdown_ = options->dump_gc_performance_on_shutdown_;
 
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 557ba2c..896a18b 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -109,6 +109,8 @@
     bool use_tlab_;
     bool verify_pre_gc_heap_;
     bool verify_post_gc_heap_;
+    bool verify_pre_gc_rosalloc_;
+    bool verify_post_gc_rosalloc_;
     size_t long_pause_log_threshold_;
     size_t long_gc_log_threshold_;
     bool dump_gc_performance_on_shutdown_;