Additional heap verification for the Gc

There are now two verification booleans which can be enabled. When these get enabled, it verifies that each live object only references other live objects.

Changed SetClass to use SetFieldPtr to avoid having an extra card mark. This is safe since all classes are held live by the class linker.

Change-Id: I005bb59e5cc8153a79d3ccb3d7b5cabd29fb4051
diff --git a/build/Android.common.mk b/build/Android.common.mk
index d02b998..7a6495a 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -254,7 +254,7 @@
 
 ifeq ($(ART_USE_GREENLAND_COMPILER),true)
 LIBART_COMMON_SRC_FILES += \
-       src/greenland/inferred_reg_category_map.cc
+	src/greenland/inferred_reg_category_map.cc
 endif
 
 LIBART_COMMON_SRC_FILES += \
@@ -344,6 +344,7 @@
 
 
 LIBART_ENUM_OPERATOR_OUT_HEADER_FILES := \
+	src/heap.h \
 	src/indirect_reference_table.h \
 	src/instruction_set.h \
 	src/invoke_type.h \
diff --git a/src/card_table.h b/src/card_table.h
index 66357c9..a6284e3 100644
--- a/src/card_table.h
+++ b/src/card_table.h
@@ -90,8 +90,7 @@
       }
       byte* run_start = card_cur++;
 
-      // Guaranteed to have at least one clean card at the end of the array.
-      while (*card_cur == GC_CARD_DIRTY) {
+      while (*card_cur == GC_CARD_DIRTY && card_cur < card_end) {
         card_cur++;
       }
       byte* run_end = card_cur;
diff --git a/src/debugger.cc b/src/debugger.cc
index b3c4844..b47377e 100644
--- a/src/debugger.cc
+++ b/src/debugger.cc
@@ -3086,9 +3086,8 @@
     UNIMPLEMENTED(WARNING) << "Native heap send heap segments";
   } else {
     Heap* heap = Runtime::Current()->GetHeap();
-    typedef std::vector<Space*> SpaceVec;
-    const SpaceVec& spaces = heap->GetSpaces();
-    for (SpaceVec::const_iterator cur = spaces.begin(); cur != spaces.end(); ++cur) {
+    const Spaces& spaces = heap->GetSpaces();
+    for (Spaces::const_iterator cur = spaces.begin(); cur != spaces.end(); ++cur) {
       if ((*cur)->IsAllocSpace()) {
         ReaderMutexLock mu(*GlobalSynchronization::heap_bitmap_lock_);
         (*cur)->AsAllocSpace()->Walk(HeapChunkContext::HeapChunkCallback, &context);
diff --git a/src/heap.cc b/src/heap.cc
index 1626e72..47473e3 100644
--- a/src/heap.cc
+++ b/src/heap.cc
@@ -148,6 +148,9 @@
       sticky_gc_count_(0),
       num_bytes_allocated_(0),
       num_objects_allocated_(0),
+      pre_gc_verify_heap_(false),
+      post_gc_verify_heap_(false),
+      verify_mod_union_table_(false),
       last_trim_time_(0),
       try_running_gc_(false),
       requesting_gc_(false),
@@ -206,7 +209,7 @@
     CHECK(oat_end_addr > GetImageSpace()->End());
     if (oat_end_addr > requested_begin) {
       requested_begin = reinterpret_cast<byte*>(RoundUp(reinterpret_cast<uintptr_t>(oat_end_addr),
-                                                        kPageSize));
+                                                          kPageSize));
     }
   }
 
@@ -253,7 +256,7 @@
   // It's still too early to take a lock because there are no threads yet,
   // but we can create the heap lock now. We don't create it earlier to
   // make it clear that you can't use locks during heap initialization.
-  gc_complete_lock_ =  new Mutex("GC complete lock");
+  gc_complete_lock_ = new Mutex("GC complete lock");
   gc_complete_cond_.reset(new ConditionVariable("GC complete condition variable"));
 
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
@@ -264,7 +267,7 @@
 // Sort spaces based on begin address
 class SpaceSorter {
  public:
-  bool operator () (const Space* a, const Space* b) const {
+  bool operator ()(const Space* a, const Space* b) const {
     return a->Begin() < b->Begin();
   }
 };
@@ -292,7 +295,7 @@
     if (space->IsImageSpace()) {
       DCHECK(!seen_zygote);
       DCHECK(!seen_alloc);
-    } if (space->IsZygoteSpace()) {
+    } else if (space->IsZygoteSpace()) {
       DCHECK(!seen_alloc);
       seen_zygote = true;
     } else if (space->IsAllocSpace()) {
@@ -318,9 +321,9 @@
 
 Space* Heap::FindSpaceFromObject(const Object* obj) const {
   // TODO: C++0x auto
-  for (Spaces::const_iterator cur = spaces_.begin(); cur != spaces_.end(); ++cur) {
-    if ((*cur)->Contains(obj)) {
-      return *cur;
+  for (Spaces::const_iterator it = spaces_.begin(); it != spaces_.end(); ++it) {
+    if ((*it)->Contains(obj)) {
+      return *it;
     }
   }
   LOG(FATAL) << "object " << reinterpret_cast<const void*>(obj) << " not inside any spaces!";
@@ -329,9 +332,9 @@
 
 ImageSpace* Heap::GetImageSpace() {
   // TODO: C++0x auto
-  for (Spaces::const_iterator cur = spaces_.begin(); cur != spaces_.end(); ++cur) {
-    if ((*cur)->IsImageSpace()) {
-      return (*cur)->AsImageSpace();
+  for (Spaces::const_iterator it = spaces_.begin(); it != spaces_.end(); ++it) {
+    if ((*it)->IsImageSpace()) {
+      return (*it)->AsImageSpace();
     }
   }
   return NULL;
@@ -393,16 +396,14 @@
   int64_t total_bytes_free = GetFreeMemory();
   size_t max_contiguous_allocation = 0;
   // TODO: C++0x auto
-  for (Spaces::const_iterator cur = spaces_.begin(); cur != spaces_.end(); ++cur) {
-    if ((*cur)->IsAllocSpace()) {
-      (*cur)->AsAllocSpace()->Walk(MSpaceChunkCallback, &max_contiguous_allocation);
+  for (Spaces::const_iterator it = spaces_.begin(); it != spaces_.end(); ++it) {
+    if ((*it)->IsAllocSpace()) {
+      (*it)->AsAllocSpace()->Walk(MSpaceChunkCallback, &max_contiguous_allocation);
     }
   }
 
   std::string msg(StringPrintf("Failed to allocate a %zd-byte %s (%lld total bytes free; largest possible contiguous allocation %zd bytes)",
-                               byte_count,
-                               PrettyDescriptor(c).c_str(),
-                               total_bytes_free, max_contiguous_allocation));
+                               byte_count, PrettyDescriptor(c).c_str(), total_bytes_free, max_contiguous_allocation));
   Thread::Current()->ThrowOutOfMemoryError(msg.c_str());
   return NULL;
 }
@@ -493,28 +494,26 @@
 }
 
 void Heap::RecordAllocation(AllocSpace* space, const Object* obj) {
-  {
-    size_t size = space->AllocationSize(obj);
-    DCHECK_GT(size, 0u);
-    COMPILE_ASSERT(sizeof(size_t) == sizeof(int32_t),
-                   int32_t_must_be_same_size_as_size_t_for_used_atomic_operations);
-    android_atomic_add(size, reinterpret_cast<volatile int32_t*>(
-        reinterpret_cast<size_t>(&num_bytes_allocated_)));
-    android_atomic_add(1, reinterpret_cast<volatile int32_t*>(
-        reinterpret_cast<size_t>(&num_objects_allocated_)));
+  DCHECK(obj != NULL);
 
-    if (Runtime::Current()->HasStatsEnabled()) {
-      RuntimeStats* global_stats = Runtime::Current()->GetStats();
-      RuntimeStats* thread_stats = Thread::Current()->GetStats();
-      ++global_stats->allocated_objects;
-      ++thread_stats->allocated_objects;
-      global_stats->allocated_bytes += size;
-      thread_stats->allocated_bytes += size;
-    }
+  size_t size = space->AllocationSize(obj);
+  DCHECK_GT(size, 0u);
+  COMPILE_ASSERT(sizeof(size_t) == sizeof(int32_t),
+                 int32_t_must_be_same_size_as_size_t_for_used_atomic_operations);
+  android_atomic_add(
+      size, reinterpret_cast<volatile int32_t*>(reinterpret_cast<size_t>(&num_bytes_allocated_)));
+  android_atomic_add(
+      1, reinterpret_cast<volatile int32_t*>(reinterpret_cast<size_t>(&num_objects_allocated_)));
+
+  if (Runtime::Current()->HasStatsEnabled()) {
+    RuntimeStats* global_stats = Runtime::Current()->GetStats();
+    RuntimeStats* thread_stats = Thread::Current()->GetStats();
+    ++global_stats->allocated_objects;
+    ++thread_stats->allocated_objects;
+    global_stats->allocated_bytes += size;
+    thread_stats->allocated_bytes += size;
   }
 
-  DCHECK(obj);
-
   allocation_stack_->AtomicPush(obj);
 }
 
@@ -567,7 +566,24 @@
       ++Thread::Current()->GetStats()->gc_for_alloc_count;
     }
     self->TransitionFromRunnableToSuspended(kWaitingPerformingGc);
-    CollectGarbageInternal(have_zygote_space_ ? GC_PARTIAL : GC_FULL, false);
+    const size_t alloc_space_size = alloc_space_->Size();
+    if (alloc_space_size > kMinAllocSpaceSizeForStickyGC
+        && alloc_space_->Capacity() - alloc_space_size >= kMinRemainingSpaceForStickyGC) {
+      CollectGarbageInternal(GC_STICKY, false);
+    } else {
+      CollectGarbageInternal(have_zygote_space_ ? GC_PARTIAL : GC_FULL, false);
+    }
+    self->TransitionFromSuspendedToRunnable();
+  } else if (have_zygote_space_) {
+    // TODO: Keep track of what kind of Gc we waited to complete and is this to figure out what Gc
+    // to do.
+    // Try a partial Gc.
+    if (Runtime::Current()->HasStatsEnabled()) {
+      ++Runtime::Current()->GetStats()->gc_for_alloc_count;
+      ++Thread::Current()->GetStats()->gc_for_alloc_count;
+    }
+    self->TransitionFromRunnableToSuspended(kWaitingPerformingGc);
+    CollectGarbageInternal(GC_PARTIAL, false);
     self->TransitionFromSuspendedToRunnable();
   }
 
@@ -576,40 +592,17 @@
     return ptr;
   }
 
-  const size_t alloc_space_size = alloc_space_->Size();
-  if (alloc_space_size > kMinAllocSpaceSizeForStickyGC &&
-      alloc_space_->Capacity() - alloc_space_size < kMinRemainingSpaceForStickyGC) {
-    // Partial GC didn't free enough memory, try a full GC.
-    if (Runtime::Current()->HasStatsEnabled()) {
-      ++Runtime::Current()->GetStats()->gc_for_alloc_count;
-      ++Thread::Current()->GetStats()->gc_for_alloc_count;
-    }
-
-    // Don't bother trying a young GC unless we have a few MB AllocSpace.
-    self->TransitionFromRunnableToSuspended(kWaitingPerformingGc);
-    CollectGarbageInternal(GC_STICKY, false);
-    self->TransitionFromSuspendedToRunnable();
-
-    ptr = space->AllocWithoutGrowth(alloc_size);
-    if (ptr != NULL) {
-      return ptr;
-    }
+  // Partial GC didn't free enough memory, try a full GC.
+  if (Runtime::Current()->HasStatsEnabled()) {
+    ++Runtime::Current()->GetStats()->gc_for_alloc_count;
+    ++Thread::Current()->GetStats()->gc_for_alloc_count;
   }
-
-  if (!have_zygote_space_) {
-    // Partial GC didn't free enough memory, try a full GC.
-    if (Runtime::Current()->HasStatsEnabled()) {
-      ++Runtime::Current()->GetStats()->gc_for_alloc_count;
-      ++Thread::Current()->GetStats()->gc_for_alloc_count;
-    }
-    self->TransitionFromRunnableToSuspended(kWaitingPerformingGc);
-    CollectGarbageInternal(GC_PARTIAL, false);
-    self->TransitionFromSuspendedToRunnable();
-
-    ptr = space->AllocWithoutGrowth(alloc_size);
-    if (ptr != NULL) {
-      return ptr;
-    }
+  self->TransitionFromRunnableToSuspended(kWaitingPerformingGc);
+  CollectGarbageInternal(GC_FULL, false);
+  self->TransitionFromSuspendedToRunnable();
+  ptr = space->AllocWithoutGrowth(alloc_size);
+  if (ptr != NULL) {
+    return ptr;
   }
 
   // Allocations have failed after GCs;  this is an exceptional state.
@@ -630,7 +623,8 @@
   // VM spec requires that all SoftReferences have been collected and cleared before throwing OOME.
 
   // OLD-TODO: wait for the finalizers from the previous GC to finish
-  VLOG(gc) << "Forcing collection of SoftReferences for " << PrettySize(alloc_size) << " allocation";
+  VLOG(gc) << "Forcing collection of SoftReferences for " << PrettySize(alloc_size)
+           << " allocation";
 
   if (Runtime::Current()->HasStatsEnabled()) {
     ++Runtime::Current()->GetStats()->gc_for_alloc_count;
@@ -668,6 +662,7 @@
   InstanceCounter(Class* c, bool count_assignable)
       SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_)
       : class_(c), count_assignable_(count_assignable), count_(0) {
+
   }
 
   size_t GetCount() {
@@ -787,7 +782,7 @@
 
   // Clear all of the things in the AllocStack.
   size_t count = alloc_stack->Size();
-  for (size_t i = 0;i < count;++i) {
+  for (size_t i = 0; i < count; ++i) {
     const Object* obj = alloc_stack->Get(i);
     DCHECK(obj != NULL);
     if (mark_bitmap->Test(obj)) {
@@ -796,6 +791,20 @@
   }
 }
 
+void Heap::UnMarkStackAsLive(MarkStack* alloc_stack) {
+  SpaceBitmap* live_bitmap = alloc_space_->GetLiveBitmap();
+
+  // Clear all of the things in the AllocStack.
+  size_t count = alloc_stack->Size();
+  for (size_t i = 0; i < count; ++i) {
+    const Object* obj = alloc_stack->Get(i);
+    DCHECK(obj != NULL);
+    if (live_bitmap->Test(obj)) {
+      live_bitmap->Clear(obj);
+    }
+  }
+}
+
 void Heap::CollectGarbageInternal(GcType gc_type, bool clear_soft_references) {
   GlobalSynchronization::mutator_lock_->AssertNotHeld();
 #ifndef NDEBUG
@@ -851,6 +860,9 @@
 void Heap::CollectGarbageMarkSweepPlan(GcType gc_type, bool clear_soft_references) {
   TimingLogger timings("CollectGarbageInternal", true);
 
+  std::stringstream gc_type_str;
+  gc_type_str << gc_type << " ";
+
   // Suspend all threads are get exclusive access to the heap.
   uint64_t start_time = NanoTime();
   ThreadList* thread_list = Runtime::Current()->GetThreadList();
@@ -866,6 +878,13 @@
     mark_sweep.Init();
     timings.AddSplit("Init");
 
+    // Pre verify the heap
+    if (pre_gc_verify_heap_) {
+      WriterMutexLock mu(*GlobalSynchronization::heap_bitmap_lock_);
+      VerifyHeapReferences(std::string("Pre ") + gc_type_str.str() + "Gc");
+      timings.AddSplit("VerifyHeapReferencesPreGC");
+    }
+
     // Make sure that the tables have the correct pointer for the mark sweep.
     mod_union_table_->Init(&mark_sweep);
     zygote_mod_union_table_->Init(&mark_sweep);
@@ -899,17 +918,12 @@
       }
     }
 
-#if VERIFY_MOD_UNION
-    mod_union_table_->Verify();
-    zygote_mod_union_table_->Verify();
-#endif
-
     WriterMutexLock mu(*GlobalSynchronization::heap_bitmap_lock_);
     if (gc_type == GC_PARTIAL) {
       // Copy the mark bits over from the live bits, do this as early as possible or else we can
       // accidentally un-mark roots.
       // Needed for scanning dirty objects.
-      for (Spaces::iterator it = spaces_.begin();it != spaces_.end(); ++it) {
+      for (Spaces::iterator it = spaces_.begin(); it != spaces_.end(); ++it) {
         if ((*it)->GetGcRetentionPolicy() == GCRP_FULL_COLLECT) {
           mark_sweep.CopyMarkBits(*it);
         }
@@ -919,42 +933,36 @@
       // We can assume that everything < alloc_space_ start is marked at this point.
       mark_sweep.SetCondemned(reinterpret_cast<Object*>(alloc_space_->Begin()));
     } else if (gc_type == GC_STICKY) {
-      for (Spaces::iterator it = spaces_.begin();it != spaces_.end(); ++it) {
+      for (Spaces::iterator it = spaces_.begin(); it != spaces_.end(); ++it) {
         if ((*it)->GetGcRetentionPolicy() != GCRP_NEVER_COLLECT) {
           mark_sweep.CopyMarkBits(*it);
         }
       }
       timings.AddSplit("CopyMarkBits");
 
-      if (VERIFY_OBJECT_ENABLED) {
-        UnMarkStack(live_stack_.get());
-      }
-
       mark_sweep.SetCondemned(reinterpret_cast<Object*>(alloc_space_->Begin()));
     }
 
     MarkStackAsLive(live_stack_.get());
 
+    if (gc_type != GC_STICKY) {
+      live_stack_->Reset();
+    }
+
     mark_sweep.MarkRoots();
     timings.AddSplit("MarkRoots");
 
     // Roots are marked on the bitmap and the mark_stack is empty.
     DCHECK(mark_sweep.IsMarkStackEmpty());
 
-    // Update zygote mod union table.
-    zygote_mod_union_table_->Update();
-    timings.AddSplit("UpdateZygoteModUnionTable");
+    UpdateAndMarkModUnion(timings, gc_type);
 
-    zygote_mod_union_table_->MarkReferences();
-    timings.AddSplit("ZygoteMarkReferences");
-
-    // Processes the cards we cleared earlier and adds their objects into the mod-union table.
-    mod_union_table_->Update();
-    timings.AddSplit("UpdateModUnionTable");
-
-    // Scans all objects in the mod-union table.
-    mod_union_table_->MarkReferences();
-    timings.AddSplit("MarkImageToAllocSpaceReferences");
+    if (verify_mod_union_table_) {
+      zygote_mod_union_table_->Update();
+      zygote_mod_union_table_->Verify();
+      mod_union_table_->Update();
+      mod_union_table_->Verify();
+    }
 
     // Recursively mark all the non-image bits set in the mark bitmap.
     if (gc_type != GC_STICKY) {
@@ -963,6 +971,7 @@
     } else {
       mark_sweep.RecursiveMarkCards(card_table_.get(), dirty_cards, timings);
     }
+    mark_sweep.DisableFinger();
 
     // Need to process references the swap since it uses IsMarked.
     mark_sweep.ProcessReferences(clear_soft_references);
@@ -975,21 +984,8 @@
     // Need to swap for VERIFY_OBJECT_ENABLED since we put things in the live bitmap after they
     // have been allocated.
     const bool swap = true;
-
     if (swap) {
-      // Swap the live and mark bitmaps for each alloc space. This is needed since sweep re-swaps
-      // these bitmaps. Doing this enables us to sweep with the heap unlocked since new allocations
-      // set the live bit, but since we have the bitmaps reversed at this point, this sets the mark bit
-      // instead, resulting in no new allocated objects being incorrectly freed by sweep.
-      for (Spaces::iterator it = spaces_.begin(); it != spaces_.end(); ++it) {
-        Space* space = *it;
-        // We only allocate into AllocSpace, so we only need to swap AllocSpaces.
-        if (space->GetGcRetentionPolicy() == GCRP_ALWAYS_COLLECT) {
-          live_bitmap_->ReplaceBitmap(space->GetLiveBitmap(), space->GetMarkBitmap());
-          mark_bitmap_->ReplaceBitmap(space->GetMarkBitmap(), space->GetLiveBitmap());
-          space->AsAllocSpace()->SwapBitmaps();
-        }
-      }
+      SwapBitmaps();
     }
 
 #ifndef NDEBUG
@@ -1009,6 +1005,13 @@
     bytes_freed = mark_sweep.GetFreedBytes();
   }
 
+  // Post gc verify the heap
+  if (post_gc_verify_heap_) {
+    WriterMutexLock mu(*GlobalSynchronization::heap_bitmap_lock_);
+    VerifyHeapReferences(std::string("Post ") + gc_type_str.str() + "Gc");
+    timings.AddSplit("VerifyHeapReferencesPostGC");
+  }
+
   GrowForUtilization();
   timings.AddSplit("GrowForUtilization");
 
@@ -1025,7 +1028,7 @@
     const size_t percent_free = GetPercentFree();
     const size_t current_heap_size = GetUsedMemorySize();
     const size_t total_memory = GetTotalMemory();
-    LOG(INFO) << (gc_type == GC_PARTIAL ? "Partial " : (gc_type == GC_STICKY ? "Sticky " : ""))
+    LOG(INFO) << gc_type_str.str() << " "
               << "GC freed " << PrettySize(bytes_freed) << ", " << percent_free << "% free, "
               << PrettySize(current_heap_size) << "/" << PrettySize(total_memory) << ", "
               << "paused " << PrettyDuration(duration);
@@ -1036,9 +1039,204 @@
   }
 }
 
+void Heap::UpdateAndMarkModUnion(TimingLogger& timings, GcType gc_type) {
+  if (gc_type == GC_STICKY) {
+    // Don't need to do anythign for mod union table in this case since we are only scanning dirty
+    // cards.
+    return;
+  }
+
+  // Update zygote mod union table.
+  if (gc_type == GC_PARTIAL) {
+    zygote_mod_union_table_->Update();
+    timings.AddSplit("UpdateZygoteModUnionTable");
+
+    zygote_mod_union_table_->MarkReferences();
+    timings.AddSplit("ZygoteMarkReferences");
+  }
+
+  // Processes the cards we cleared earlier and adds their objects into the mod-union table.
+  mod_union_table_->Update();
+  timings.AddSplit("UpdateModUnionTable");
+
+  // Scans all objects in the mod-union table.
+  mod_union_table_->MarkReferences();
+  timings.AddSplit("MarkImageToAllocSpaceReferences");
+}
+
+void Heap::RootMatchesObjectVisitor(const Object* root, void* arg) {
+  Object* obj = reinterpret_cast<Object*>(arg);
+  if (root == obj) {
+    LOG(INFO) << "Object " << obj << " is a root";
+  }
+}
+
+class ScanVisitor {
+ public:
+  void operator ()(const Object* obj) const {
+    LOG(INFO) << "Would have rescanned object " << obj;
+  }
+};
+
+class VerifyReferenceVisitor {
+ public:
+  VerifyReferenceVisitor(Heap* heap, bool* failed)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_,
+                            GlobalSynchronization::heap_bitmap_lock_)
+      : heap_(heap),
+        failed_(failed) {
+  }
+
+  // TODO: Fix lock analysis to not use NO_THREAD_SAFETY_ANALYSIS, requires support for smarter
+  // analysis.
+  void operator ()(const Object* obj, const Object* ref, const MemberOffset& /* offset */,
+                     bool /* is_static */) const NO_THREAD_SAFETY_ANALYSIS {
+    // Verify that the reference is live.
+    if (ref != NULL && !IsLive(ref)) {
+      CardTable* card_table = heap_->GetCardTable();
+      MarkStack* alloc_stack = heap_->allocation_stack_.get();
+      MarkStack* live_stack = heap_->live_stack_.get();
+
+      // Print the cards around our object
+      byte* card_addr = card_table->CardFromAddr(obj);
+      LOG(INFO) << "Object " << obj << " references dead object " << ref << " on IsDirty = "
+                << (*card_addr == GC_CARD_DIRTY);
+      void* cover_begin = card_table->AddrFromCard(card_addr);
+      void* cover_end = reinterpret_cast<void*>(reinterpret_cast<size_t>(cover_begin) +
+          GC_CARD_SIZE);
+      LOG(INFO) << "Card " << reinterpret_cast<void*>(card_addr) << " covers " << cover_begin
+                << "-" << cover_end;
+      SpaceBitmap* bitmap = heap_->GetLiveBitmap()->GetSpaceBitmap(obj);
+
+      // Print out how the object is live.
+      if (bitmap->Test(obj)) {
+        LOG(INFO) << "Object " << obj << " found in live bitmap";
+      } else if (std::binary_search(alloc_stack->Begin(), alloc_stack->End(), obj)) {
+        LOG(INFO) << "Object " << obj << " found in allocation stack";
+      }
+
+      if (std::binary_search(live_stack->Begin(), live_stack->End(), ref)) {
+        LOG(INFO) << "Reference " << ref << " found in live stack!";
+      }
+
+      // Attempt to see if the card table missed the reference.
+      ScanVisitor scan_visitor;
+      byte* byte_cover_begin = reinterpret_cast<byte*>(card_table->AddrFromCard(card_addr));
+      card_table->Scan(bitmap, byte_cover_begin, byte_cover_begin + GC_CARD_SIZE, scan_visitor,
+          IdentityFunctor());
+
+      // Try and see if a mark sweep collector scans the reference.
+      MarkStack* mark_stack = heap_->mark_stack_.get();
+      MarkSweep ms(mark_stack);
+      ms.Init();
+      mark_stack->Reset();
+      ms.SetFinger(reinterpret_cast<Object*>(~size_t(0)));
+      // All the references should end up in the mark stack.
+      ms.ScanRoot(obj);
+      if (std::find(mark_stack->Begin(), mark_stack->End(), ref)) {
+        LOG(INFO) << "Ref found in the mark_stack when rescanning the object!";
+      } else {
+        LOG(INFO) << "Dumping mark stack contents";
+        for (Object** it = mark_stack->Begin(); it != mark_stack->End(); ++it) {
+          LOG(INFO) << *it;
+        }
+      }
+      mark_stack->Reset();
+
+      // Search to see if any of the roots reference our object.
+      void* arg = const_cast<void*>(reinterpret_cast<const void*>(obj));
+      Runtime::Current()->VisitRoots(&Heap::RootMatchesObjectVisitor, arg);
+      *failed_ = true;
+    }
+  }
+
+  bool IsLive(const Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
+    SpaceBitmap* bitmap = heap_->GetLiveBitmap()->GetSpaceBitmap(obj);
+    if (bitmap != NULL) {
+      if (bitmap->Test(obj)) {
+        return true;
+      }
+    } else {
+      heap_->DumpSpaces();
+      LOG(FATAL) << "Object " << obj << " not found in any spaces";
+    }
+    MarkStack* alloc_stack = heap_->allocation_stack_.get();
+    // At this point we need to search the allocation since things in the live stack may get swept.
+    if (std::binary_search(alloc_stack->Begin(), alloc_stack->End(), const_cast<Object*>(obj))) {
+      return true;
+    }
+    // Not either in the live bitmap or allocation stack, so the object must be dead.
+    return false;
+  }
+
+ private:
+  Heap* heap_;
+  bool* failed_;
+};
+
+class VerifyObjectVisitor {
+ public:
+  VerifyObjectVisitor(Heap* heap)
+      : heap_(heap),
+        failed_(false) {
+
+  }
+
+  void operator ()(const Object* obj) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_,
+          GlobalSynchronization::heap_bitmap_lock_) {
+    VerifyReferenceVisitor visitor(heap_, const_cast<bool*>(&failed_));
+    MarkSweep::VisitObjectReferences(obj, visitor);
+  }
+
+  bool Failed() const {
+    return failed_;
+  }
+
+ private:
+  Heap* heap_;
+  bool failed_;
+};
+
+// Must do this with mutators suspended since we are directly accessing the allocation stacks.
+void Heap::VerifyHeapReferences(const std::string& phase) {
+  GlobalSynchronization::mutator_lock_->AssertExclusiveHeld();
+  // Lets sort our allocation stacks so that we can efficiently binary search them.
+  std::sort(allocation_stack_->Begin(), allocation_stack_->End());
+  std::sort(live_stack_->Begin(), live_stack_->End());
+  // Perform the verification.
+  VerifyObjectVisitor visitor(this);
+  GetLiveBitmap()->Visit(visitor);
+  // We don't want to verify the objects in the allocation stack since they themselves may be
+  // pointing to dead objects if they are not reachable.
+  if (visitor.Failed()) {
+    DumpSpaces();
+    LOG(FATAL) << phase << " heap verification failed";
+  }
+}
+
+void Heap::SwapBitmaps() {
+  // Swap the live and mark bitmaps for each alloc space. This is needed since sweep re-swaps
+  // these bitmaps. Doing this enables us to sweep with the heap unlocked since new allocations
+  // set the live bit, but since we have the bitmaps reversed at this point, this sets the mark bit
+  // instead, resulting in no new allocated objects being incorrectly freed by sweep.
+  WriterMutexLock mu(*GlobalSynchronization::heap_bitmap_lock_);
+  for (Spaces::iterator it = spaces_.begin(); it != spaces_.end(); ++it) {
+    Space* space = *it;
+    // We never allocate into zygote spaces.
+    if (space->GetGcRetentionPolicy() != GCRP_NEVER_COLLECT) {
+      live_bitmap_->ReplaceBitmap(space->GetLiveBitmap(), space->GetMarkBitmap());
+      mark_bitmap_->ReplaceBitmap(space->GetMarkBitmap(), space->GetLiveBitmap());
+      space->AsAllocSpace()->SwapBitmaps();
+    }
+  }
+}
+
 void Heap::CollectGarbageConcurrentMarkSweepPlan(GcType gc_type, bool clear_soft_references) {
   TimingLogger timings("ConcurrentCollectGarbageInternal", true);
   uint64_t root_begin = NanoTime(), root_end = 0, dirty_begin = 0, dirty_end = 0;
+  std::stringstream gc_type_str;
+  gc_type_str << gc_type << " ";
 
   // Suspend all threads are get exclusive access to the heap.
   ThreadList* thread_list = Runtime::Current()->GetThreadList();
@@ -1055,6 +1253,13 @@
     mark_sweep.Init();
     timings.AddSplit("Init");
 
+    // Pre verify the heap
+    if (pre_gc_verify_heap_) {
+      WriterMutexLock mu(*GlobalSynchronization::heap_bitmap_lock_);
+      VerifyHeapReferences(std::string("Pre ") + gc_type_str.str() + "Gc");
+      timings.AddSplit("VerifyHeapReferencesPreGC");
+    }
+
     // Swap the stacks, this is safe sunce all the mutators are suspended at this point.
     MarkStack* temp = allocation_stack_.release();
     allocation_stack_.reset(live_stack_.release());
@@ -1088,12 +1293,6 @@
       }
     }
 
-#if VERIFY_MOD_UNION
-    mod_union_table_->Verify();
-    zygote_mod_union_table_->Verify();
-#endif
-
-
     {
       WriterMutexLock mu(*GlobalSynchronization::heap_bitmap_lock_);
 
@@ -1101,7 +1300,7 @@
         // Copy the mark bits over from the live bits, do this as early as possible or else we can
         // accidentally un-mark roots.
         // Needed for scanning dirty objects.
-        for (Spaces::iterator it = spaces_.begin();it != spaces_.end(); ++it) {
+        for (Spaces::iterator it = spaces_.begin(); it != spaces_.end(); ++it) {
           if ((*it)->GetGcRetentionPolicy() == GCRP_FULL_COLLECT) {
             mark_sweep.CopyMarkBits(*it);
           }
@@ -1109,28 +1308,33 @@
         timings.AddSplit("CopyMarkBits");
         mark_sweep.SetCondemned(reinterpret_cast<Object*>(alloc_space_->Begin()));
       } else if (gc_type == GC_STICKY) {
-        for (Spaces::iterator it = spaces_.begin();it != spaces_.end(); ++it) {
+        for (Spaces::iterator it = spaces_.begin(); it != spaces_.end(); ++it) {
           if ((*it)->GetGcRetentionPolicy() != GCRP_NEVER_COLLECT) {
             mark_sweep.CopyMarkBits(*it);
           }
         }
         timings.AddSplit("CopyMarkBits");
-        // We need to unmark the new objects since we marked them as live earlier to avoid verify
-        // objects failing.
-        if (VERIFY_OBJECT_ENABLED) {
-          UnMarkStack(live_stack_.get());
-        }
         mark_sweep.SetCondemned(reinterpret_cast<Object*>(alloc_space_->Begin()));
       }
 
-      // TODO: Investigate whether or not this is really necessary for sticky mark bits.
+      // Mark everything as live so that sweeping system weak works correctly for sticky mark bit
+      // GCs.
       MarkStackAsLive(live_stack_.get());
+      timings.AddSplit("MarkStackAsLive");
 
+      // TODO: Investigate whether or not this is really necessary for sticky mark bits.
       if (gc_type != GC_STICKY) {
         live_stack_->Reset();
         mark_sweep.MarkRoots();
         timings.AddSplit("MarkRoots");
       }
+
+      if (verify_mod_union_table_) {
+        zygote_mod_union_table_->Update();
+        zygote_mod_union_table_->Verify();
+        mod_union_table_->Update();
+        mod_union_table_->Verify();
+      }
     }
 
     // Roots are marked on the bitmap and the mark_stack is empty.
@@ -1144,30 +1348,14 @@
       timings.AddSplit("RootEnd");
 
       WriterMutexLock mu(*GlobalSynchronization::heap_bitmap_lock_);
+      UpdateAndMarkModUnion(timings, gc_type);
       if (gc_type != GC_STICKY) {
-        // Update zygote mod union table.
-        if (gc_type == GC_PARTIAL) {
-          zygote_mod_union_table_->Update();
-          timings.AddSplit("UpdateZygoteModUnionTable");
-
-          zygote_mod_union_table_->MarkReferences();
-          timings.AddSplit("ZygoteMarkReferences");
-        }
-
-        // Processes the cards we cleared earlier and adds their objects into the mod-union table.
-        mod_union_table_->Update();
-        timings.AddSplit("UpdateModUnionTable");
-
-        // Scans all objects in the mod-union table.
-        mod_union_table_->MarkReferences();
-        timings.AddSplit("MarkImageToAllocSpaceReferences");
-
         // Recursively mark all the non-image bits set in the mark bitmap.
         mark_sweep.RecursiveMark(gc_type == GC_PARTIAL, timings);
       } else {
         mark_sweep.RecursiveMarkCards(card_table_.get(), dirty_cards, timings);
-        mark_sweep.DisableFinger();
       }
+      mark_sweep.DisableFinger();
     }
     // Release share on mutator_lock_ and then get exclusive access.
     dirty_begin = NanoTime();
@@ -1186,6 +1374,7 @@
       mark_sweep.RecursiveMarkDirtyObjects(false);
       timings.AddSplit("RecursiveMarkDirtyObjects");
     }
+
     {
       ReaderMutexLock mu(*GlobalSynchronization::heap_bitmap_lock_);
       mark_sweep.ProcessReferences(clear_soft_references);
@@ -1199,18 +1388,9 @@
     // these bitmaps. Doing this enables us to sweep with the heap unlocked since new allocations
     // set the live bit, but since we have the bitmaps reversed at this point, this sets the mark
     // bit instead, resulting in no new allocated objects being incorrectly freed by sweep.
-    bool swap = true;
+    const bool swap = true;
     if (swap) {
-      WriterMutexLock mu(*GlobalSynchronization::heap_bitmap_lock_);
-      for (Spaces::iterator it = spaces_.begin(); it != spaces_.end(); ++it) {
-        Space* space = *it;
-        // We never allocate into zygote spaces.
-        if (space->GetGcRetentionPolicy() == GCRP_ALWAYS_COLLECT) {
-          live_bitmap_->ReplaceBitmap(space->GetLiveBitmap(), space->GetMarkBitmap());
-          mark_bitmap_->ReplaceBitmap(space->GetMarkBitmap(), space->GetLiveBitmap());
-          space->AsAllocSpace()->SwapBitmaps();
-        }
-      }
+      SwapBitmaps();
     }
 
     if (kIsDebugBuild) {
@@ -1220,6 +1400,22 @@
       timings.AddSplit("VerifyImageRoots");
     }
 
+    if (gc_type == GC_STICKY) {
+      // We only sweep over the live stack, and the live stack should not intersect with the
+      // allocation stack, so it should be safe to UnMark anything in the allocation stack as live.
+      // This only works for sticky Gcs though!
+      UnMarkStackAsLive(allocation_stack_.get());
+    }
+    timings.AddSplit("UnMarkStacks");
+
+    // If we are going to do post Gc verification, lets keep the mutators paused since we don't
+    // want them to touch dead objects before we find these in verification.
+    if (post_gc_verify_heap_) {
+      WriterMutexLock mu(*GlobalSynchronization::heap_bitmap_lock_);
+      VerifyHeapReferences(std::string("Post ") + gc_type_str.str() + "Gc");
+      timings.AddSplit("VerifyHeapReferencesPostGC");
+    }
+
     thread_list->ResumeAll();
     dirty_end = NanoTime();
     GlobalSynchronization::mutator_lock_->AssertNotHeld();
@@ -1254,7 +1450,7 @@
     const size_t percent_free = GetPercentFree();
     const size_t current_heap_size = GetUsedMemorySize();
     const size_t total_memory = GetTotalMemory();
-    LOG(INFO) << (gc_type == GC_PARTIAL ? "Partial " : (gc_type == GC_STICKY ? "Sticky " : ""))
+    LOG(INFO) << gc_type_str.str()
               << "Concurrent GC freed " << PrettySize(bytes_freed) << ", " << percent_free
               << "% free, " << PrettySize(current_heap_size) << "/"
               << PrettySize(total_memory) << ", " << "paused " << PrettyDuration(pause_roots)
@@ -1298,9 +1494,8 @@
 }
 
 void Heap::DumpForSigQuit(std::ostream& os) {
-  os << "Heap: " << GetPercentFree() << "% free, "
-     << PrettySize(num_bytes_allocated_) << "/" << PrettySize(GetTotalMemory())
-     << "; " << num_objects_allocated_ << " objects\n";
+  os << "Heap: " << GetPercentFree() << "% free, " << PrettySize(num_bytes_allocated_) << "/"
+     << PrettySize(GetTotalMemory()) << "; " << num_objects_allocated_ << " objects\n";
 }
 
 size_t Heap::GetPercentFree() {
@@ -1313,8 +1508,8 @@
   // TODO: Behavior for multiple alloc spaces?
   size_t alloc_space_capacity = alloc_space->Capacity();
   if (max_allowed_footprint > alloc_space_capacity) {
-    VLOG(gc) << "Clamp target GC heap from " << PrettySize(max_allowed_footprint)
-             << " to " << PrettySize(alloc_space_capacity);
+    VLOG(gc) << "Clamp target GC heap from " << PrettySize(max_allowed_footprint) << " to "
+             << PrettySize(alloc_space_capacity);
     max_allowed_footprint = alloc_space_capacity;
   }
   alloc_space->SetFootprintLimit(max_allowed_footprint);
@@ -1363,10 +1558,10 @@
 }
 
 void Heap::SetReferenceOffsets(MemberOffset reference_referent_offset,
-    MemberOffset reference_queue_offset,
-    MemberOffset reference_queueNext_offset,
-    MemberOffset reference_pendingNext_offset,
-    MemberOffset finalizer_reference_zombie_offset) {
+                                MemberOffset reference_queue_offset,
+                                MemberOffset reference_queueNext_offset,
+                                MemberOffset reference_pendingNext_offset,
+                                MemberOffset finalizer_reference_zombie_offset) {
   reference_referent_offset_ = reference_referent_offset;
   reference_queue_offset_ = reference_queue_offset;
   reference_queueNext_offset_ = reference_queueNext_offset;
@@ -1441,8 +1636,8 @@
   ScopedObjectAccess soa(self);
   JValue args[1];
   args[0].SetL(object);
-  soa.DecodeMethod(WellKnownClasses::java_lang_ref_FinalizerReference_add)->Invoke(self,
-                                                                                  NULL, args, NULL);
+  soa.DecodeMethod(WellKnownClasses::java_lang_ref_FinalizerReference_add)->Invoke(self, NULL, args,
+                                                                                   NULL);
 }
 
 size_t Heap::GetBytesAllocated() const {
@@ -1467,18 +1662,16 @@
     ScopedObjectAccess soa(Thread::Current());
     JValue args[1];
     args[0].SetL(*cleared);
-    soa.DecodeMethod(WellKnownClasses::java_lang_ref_ReferenceQueue_add)->Invoke(soa.Self(),
-                                                                                 NULL, args, NULL);
+    soa.DecodeMethod(WellKnownClasses::java_lang_ref_ReferenceQueue_add)->Invoke(soa.Self(), NULL,
+                                                                                 args, NULL);
     *cleared = NULL;
   }
 }
 
 void Heap::RequestConcurrentGC() {
   // Make sure that we can do a concurrent GC.
-  if (requesting_gc_ ||
-      !Runtime::Current()->IsFinishedStarting() ||
-      Runtime::Current()->IsShuttingDown() ||
-      !Runtime::Current()->IsConcurrentGcEnabled()) {
+  if (requesting_gc_ || !Runtime::Current()->IsFinishedStarting() ||
+      Runtime::Current()->IsShuttingDown() || !Runtime::Current()->IsConcurrentGcEnabled()) {
     return;
   }
 
@@ -1511,9 +1704,9 @@
   }
 }
 
-void Heap::Trim(AllocSpace* alloc_space) {
+void Heap::Trim() {
   WaitForConcurrentGcToComplete();
-  alloc_space->Trim();
+  alloc_space_->Trim();
 }
 
 void Heap::RequestHeapTrim() {
diff --git a/src/heap.h b/src/heap.h
index 6f6cd67..23f2ac3 100644
--- a/src/heap.h
+++ b/src/heap.h
@@ -58,6 +58,7 @@
   // Partial GC, over only the alloc space
   GC_PARTIAL,
 };
+std::ostream& operator<<(std::ostream& os, const GcType& policy);
 
 class LOCKABLE Heap {
  public:
@@ -100,6 +101,10 @@
 
   // Check sanity of all live references. Requires the heap lock.
   void VerifyHeap();
+  static void RootMatchesObjectVisitor(const Object* root, void* arg);
+  void VerifyHeapReferences(const std::string& phase)
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // A weaker test than IsLiveObject or VerifyObject that doesn't require the heap lock,
   // and doesn't abort on error, allowing the caller to report more
@@ -237,7 +242,7 @@
 
   void DumpForSigQuit(std::ostream& os);
 
-  void Trim(AllocSpace* alloc_space);
+  void Trim();
 
   HeapBitmap* GetLiveBitmap() SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_) {
     return live_bitmap_.get();
@@ -259,6 +264,13 @@
   // Un-mark all the objects in the allocation stack.
   void UnMarkStack(MarkStack* alloc_stack);
 
+  // Un-mark all live objects in the allocation stack.
+  void UnMarkStackAsLive(MarkStack* alloc_stack);
+
+  // Update and mark mod union table based on gc type.
+  void UpdateAndMarkModUnion(TimingLogger& timings, GcType gc_type)
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_);
+
   // DEPRECATED: Should remove in "near" future when support for multiple image spaces is added.
   // Assumes there is only one image space.
   ImageSpace* GetImageSpace();
@@ -309,6 +321,9 @@
   static void VerificationCallback(Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(GlobalSychronization::heap_bitmap_lock_);
 
+  // Swpa bitmaps (if we are a full Gc then we swap the zygote bitmap too).
+  void SwapBitmaps();
+
   Spaces spaces_;
 
   // The alloc space which we are currently allocating into.
@@ -353,6 +368,11 @@
   // Number of objects allocated.  Adjusted after each allocation and free.
   volatile size_t num_objects_allocated_;
 
+  // Heap verification flags.
+  const bool pre_gc_verify_heap_;
+  const bool post_gc_verify_heap_;
+  const bool verify_mod_union_table_;
+
   // Last trim time
   uint64_t last_trim_time_;
 
@@ -397,6 +417,8 @@
 
   bool verify_objects_;
 
+  friend class VerifyReferenceVisitor;
+  friend class VerifyObjectVisitor;
   friend class ScopedHeapLock;
   FRIEND_TEST(SpaceTest, AllocAndFree);
   FRIEND_TEST(SpaceTest, AllocAndFreeList);
diff --git a/src/heap_bitmap.h b/src/heap_bitmap.h
index 98b42b3..d202ae3 100644
--- a/src/heap_bitmap.h
+++ b/src/heap_bitmap.h
@@ -54,9 +54,9 @@
 
     SpaceBitmap* GetSpaceBitmap(const Object* obj) {
       // TODO: C++0x auto
-      for (Bitmaps::iterator cur = bitmaps_.begin(); cur != bitmaps_.end(); ++cur) {
-        if ((*cur)->HasAddress(obj)) {
-          return *cur;
+      for (Bitmaps::iterator it = bitmaps_.begin(); it != bitmaps_.end(); ++it) {
+        if ((*it)->HasAddress(obj)) {
+          return *it;
         }
       }
       return NULL;
@@ -65,8 +65,19 @@
     void Walk(SpaceBitmap::Callback* callback, void* arg)
         SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_) {
       // TODO: C++0x auto
-      for (Bitmaps::iterator cur = bitmaps_.begin(); cur != bitmaps_.end(); ++cur) {
-        (*cur)->Walk(callback, arg);
+      for (Bitmaps::iterator it = bitmaps_.begin(); it!= bitmaps_.end(); ++it) {
+        (*it)->Walk(callback, arg);
+      }
+    }
+
+    template <typename Visitor>
+    void Visit(const Visitor& visitor)
+        EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_) {
+      // TODO: C++0x auto
+      for (Bitmaps::iterator it = bitmaps_.begin(); it != bitmaps_.end(); ++it) {
+        SpaceBitmap* bitmap = *it;
+        bitmap->VisitMarkedRange(bitmap->HeapBegin(), bitmap->HeapLimit(), visitor,
+                                 IdentityFunctor());
       }
     }
 
diff --git a/src/mark_stack.h b/src/mark_stack.h
index 224e6d4..7e4cec0 100644
--- a/src/mark_stack.h
+++ b/src/mark_stack.h
@@ -77,6 +77,10 @@
     return const_cast<Object**>(begin_);
   }
 
+  Object** End() {
+    return const_cast<Object**>(ptr_);
+  }
+
   void Reset();
 
  private:
diff --git a/src/mark_sweep.cc b/src/mark_sweep.cc
index c378234..0b09f90 100644
--- a/src/mark_sweep.cc
+++ b/src/mark_sweep.cc
@@ -224,7 +224,6 @@
   CardTable* card_table = heap_->GetCardTable();
   ScanImageRootVisitor image_root_visitor(this);
   SetFingerVisitor finger_visitor(this);
-
   for (Spaces::const_iterator it = spaces.begin(); it != spaces.end(); ++it) {
     Space* space = *it;
     byte* begin = space->Begin();
@@ -358,14 +357,12 @@
 bool MarkSweep::IsMarkedCallback(const Object* object, void* arg) {
   return
       reinterpret_cast<MarkSweep*>(arg)->IsMarked(object) ||
-      //false;
       !reinterpret_cast<MarkSweep*>(arg)->GetHeap()->GetLiveBitmap()->Test(object);
 }
 
 bool MarkSweep::IsLiveCallback(const Object* object, void* arg) {
   return
       reinterpret_cast<MarkSweep*>(arg)->GetHeap()->GetLiveBitmap()->Test(object) ||
-      //false;
       !reinterpret_cast<MarkSweep*>(arg)->IsMarked(object);
 }
 
@@ -542,11 +539,11 @@
       scc.space = space->AsAllocSpace();
       SpaceBitmap* live_bitmap = space->GetLiveBitmap();
       SpaceBitmap* mark_bitmap = space->GetMarkBitmap();
+      if (swap_bitmaps) {
+        std::swap(live_bitmap, mark_bitmap);
+      }
       if (space->GetGcRetentionPolicy() == GCRP_ALWAYS_COLLECT) {
         // Bitmaps are pre-swapped for optimization which enables sweeping with the heap unlocked.
-        if (swap_bitmaps) {
-          std::swap(live_bitmap, mark_bitmap);
-        }
         SpaceBitmap::SweepWalk(*live_bitmap, *mark_bitmap, begin, end,
                                &SweepCallback, reinterpret_cast<void*>(&scc));
       } else {
diff --git a/src/mark_sweep.h b/src/mark_sweep.h
index db845b7..2333bdb 100644
--- a/src/mark_sweep.h
+++ b/src/mark_sweep.h
@@ -142,6 +142,21 @@
   void SweepSystemWeaks(bool swap_bitmaps)
       SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_);
 
+  template <typename Visitor>
+  static void VisitObjectReferences(const Object* obj, const Visitor& visitor)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_,
+                            GlobalSynchronization::mutator_lock_) {
+    DCHECK(obj != NULL);
+    DCHECK(obj->GetClass() != NULL);
+    if (obj->IsClass()) {
+      VisitClassReferences(obj, visitor);
+    } else if (obj->IsArrayInstance()) {
+      VisitArrayReferences(obj, visitor);
+    } else {
+      VisitOtherReferences(obj, visitor);
+    }
+  }
+
  private:
   // Returns true if the object has its bit set in the mark bitmap.
   bool IsMarked(const Object* object) const
@@ -200,28 +215,13 @@
       SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_,
                             GlobalSynchronization::mutator_lock_);
 
-  template <typename Visitor>
-  void VisitObjectReferences(const Object* obj, const Visitor& visitor)
-      SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_,
-                            GlobalSynchronization::mutator_lock_) {
-    DCHECK(obj != NULL);
-    DCHECK(obj->GetClass() != NULL);
-    if (obj->IsClass()) {
-      VisitClassReferences(obj, visitor);
-    } else if (obj->IsArrayInstance()) {
-      VisitArrayReferences(obj, visitor);
-    } else {
-      VisitOtherReferences(obj, visitor);
-    }
-  }
-
   // Grays references in instance fields.
   void ScanInstanceFields(const Object* obj)
       EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   template <typename Visitor>
-  void VisitInstanceFieldsReferences(const Object* obj, const Visitor& visitor)
+  static void VisitInstanceFieldsReferences(const Object* obj, const Visitor& visitor)
       SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     DCHECK(obj != NULL);
@@ -237,7 +237,7 @@
 
 
   template <typename Visitor>
-  void VisitClassReferences(const Object* obj, const Visitor& visitor)
+  static void VisitClassReferences(const Object* obj, const Visitor& visitor)
       SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_,
                             GlobalSynchronization::mutator_lock_) {
     VisitInstanceFieldsReferences(obj, visitor);
@@ -250,9 +250,9 @@
       SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   template <typename Visitor>
-  void VisitStaticFieldsReferences(const Class* klass, const Visitor& visitor)
+  static void VisitStaticFieldsReferences(const Class* klass, const Visitor& visitor)
       SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_,
-                            GlobalSynchronization::mutator_lock_) {\
+                            GlobalSynchronization::mutator_lock_) {
     DCHECK(klass != NULL);
     VisitFieldsReferences(klass, klass->GetReferenceStaticOffsets(), true, visitor);
   }
@@ -263,7 +263,7 @@
       SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   template <typename Visitor>
-  void VisitFieldsReferences(const Object* obj, uint32_t ref_offsets, bool is_static,
+  static void VisitFieldsReferences(const Object* obj, uint32_t ref_offsets, bool is_static,
                              const Visitor& visitor)
       SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_,
                             GlobalSynchronization::mutator_lock_) {
@@ -305,7 +305,7 @@
       SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   template <typename Visitor>
-  void VisitArrayReferences(const Object* obj, const Visitor& visitor)
+  static void VisitArrayReferences(const Object* obj, const Visitor& visitor)
       SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_,
                             GlobalSynchronization::mutator_lock_) {
     visitor(obj, obj->GetClass(), Object::ClassOffset(), false);
@@ -324,7 +324,7 @@
       SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   template <typename Visitor>
-  void VisitOtherReferences(const Object* obj, const Visitor& visitor)
+  static void VisitOtherReferences(const Object* obj, const Visitor& visitor)
       SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_,
                             GlobalSynchronization::mutator_lock_) {
     return VisitInstanceFieldsReferences(obj, visitor);
diff --git a/src/mod_union_table.cc b/src/mod_union_table.cc
index 51fe70a..eb8c598 100644
--- a/src/mod_union_table.cc
+++ b/src/mod_union_table.cc
@@ -252,7 +252,10 @@
   }
 
   // Extra parameters are required since we use this same visitor signature for checking objects.
-  void operator ()(const Object* obj, const Object* ref, const MemberOffset& /* offset */, bool /* is_static */) const {
+  void operator ()(const Object* obj, const Object* ref, const MemberOffset& /* offset */,
+                     bool /* is_static */) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_,
+                            GlobalSynchronization::mutator_lock_) {
     Heap* heap = mod_union_table_->GetMarkSweep()->GetHeap();
     if (mod_union_table_->AddReference(obj, ref) && references_.find(ref) == references_.end()) {
       Space* from_space = heap->FindSpaceFromObject(obj);
@@ -283,7 +286,9 @@
       references_(references) {
   }
 
-  void operator ()(Object* obj, void* /* finger */) const {
+  void operator ()(Object* obj) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_,
+                            GlobalSynchronization::mutator_lock_) {
     DCHECK(obj != NULL);
     MarkSweep* mark_sweep = mod_union_table_->GetMarkSweep();
     CheckReferenceVisitor visitor(mod_union_table_, references_);
@@ -296,7 +301,6 @@
 };
 
 void ModUnionTableReferenceCache::Verify() {
-#if VERIFY_MOD_UNION
   // Start by checking that everything in the mod union table is marked.
   Heap* heap = GetMarkSweep()->GetHeap();
   for (ReferenceMap::const_iterator it = references_.begin(); it != references_.end(); ++it) {
@@ -319,10 +323,9 @@
       uintptr_t end = start + GC_CARD_SIZE;
       SpaceBitmap* live_bitmap =
               heap->FindSpaceFromObject(reinterpret_cast<Object*>(start))->GetLiveBitmap();
-      live_bitmap->VisitMarkedRange(start, end, visitor);
+      live_bitmap->VisitMarkedRange(start, end, visitor, IdentityFunctor());
     }
   }
-#endif
 }
 
 void ModUnionTableReferenceCache::Update() {
diff --git a/src/mod_union_table.h b/src/mod_union_table.h
index 63a46f0..df2023f 100644
--- a/src/mod_union_table.h
+++ b/src/mod_union_table.h
@@ -21,8 +21,6 @@
 #include "safe_map.h"
 #include "space.h"
 
-#define VERIFY_MOD_UNION 0
-
 namespace art {
 
 class Heap;
@@ -53,8 +51,10 @@
   virtual void MarkReferences() = 0;
 
   // Verification, sanity checks that we don't have clean cards which conflict with out cached data
-  // for said cards.
-  virtual void Verify() = 0;
+  // for said cards. Exclusive lock is required since verify sometimes uses
+  // SpaceBitmap::VisitMarkedRange and VisitMarkedRange can't know if the callback will modify the
+  // bitmap or not.
+  virtual void Verify() EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_) = 0;
 
   // Should probably clean this up later.
   void Init(MarkSweep* mark_sweep) {
@@ -121,8 +121,9 @@
   // Mark all references to the alloc space(s).
   void MarkReferences() EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_);
 
-  // Verify the mod-union table.
-  void Verify();
+  // Exclusive lock is required since verify uses SpaceBitmap::VisitMarkedRange and
+  // VisitMarkedRange can't know if the callback will modify the bitmap or not.
+  void Verify() EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_);
 
   // Function that tells whether or not to add a reference to the table.
   virtual bool AddReference(const Object* obj, const Object* ref) = 0;
diff --git a/src/native/dalvik_system_VMRuntime.cc b/src/native/dalvik_system_VMRuntime.cc
index 9e4ecd1..ed61de9 100644
--- a/src/native/dalvik_system_VMRuntime.cc
+++ b/src/native/dalvik_system_VMRuntime.cc
@@ -157,23 +157,17 @@
 static void VMRuntime_trimHeap(JNIEnv*, jobject) {
   // Trim the managed heap.
   Heap* heap = Runtime::Current()->GetHeap();
-  const Spaces& spaces = heap->GetSpaces();
-  // TODO: C++0x auto
-  for (Spaces::const_iterator cur = spaces.begin(); cur != spaces.end(); ++cur) {
-    if ((*cur)->IsAllocSpace()) {
-      uint64_t start_ns = NanoTime();
-      AllocSpace* alloc_space = (*cur)->AsAllocSpace();
-      size_t alloc_space_size = alloc_space->Size();
-      float utilization = static_cast<float>(heap->GetBytesAllocated()) / alloc_space_size;
-      heap->Trim(alloc_space);
-      // Trim the native heap.
-      dlmalloc_trim(0);
-      dlmalloc_inspect_all(MspaceMadviseCallback, NULL);
-      LOG(INFO) << "Parallel heap trimming took " << PrettyDuration(NanoTime() - start_ns)
-                << " on a " << PrettySize(alloc_space_size)
-                << " alloc space with " << static_cast<int>(100 * utilization) << "% utilization";
-    }
-  }
+  uint64_t start_ns = NanoTime();
+  AllocSpace* alloc_space = heap->GetAllocSpace();
+  size_t alloc_space_size = alloc_space->Size();
+  float utilization = static_cast<float>(heap->GetBytesAllocated()) / alloc_space_size;
+  heap->Trim();
+  // Trim the native heap.
+  dlmalloc_trim(0);
+  dlmalloc_inspect_all(MspaceMadviseCallback, NULL);
+  LOG(INFO) << "Parallel heap trimming took " << PrettyDuration(NanoTime() - start_ns)
+            << " on a " << PrettySize(alloc_space_size)
+            << " alloc space with " << static_cast<int>(100 * utilization) << "% utilization";
 }
 
 static void VMRuntime_concurrentGC(JNIEnv*, jobject) {
diff --git a/src/object.h b/src/object.h
index 6d686f4..c20c99a 100644
--- a/src/object.h
+++ b/src/object.h
@@ -2011,7 +2011,10 @@
 
 inline void Object::SetClass(Class* new_klass) {
   // new_klass may be NULL prior to class linker initialization
-  SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Object, klass_), new_klass, false, false);
+  // We don't mark the card since the class is guaranteed to be referenced from another location.
+  // Proxy classes are held live by the class loader, and other classes are roots of the class
+  // linker.
+  SetFieldPtr(OFFSET_OF_OBJECT_MEMBER(Object, klass_), new_klass, false, false);
 }
 
 inline bool Object::InstanceOf(const Class* klass) const {
diff --git a/src/space.cc b/src/space.cc
index 34e7648..bdac650 100644
--- a/src/space.cc
+++ b/src/space.cc
@@ -268,6 +268,8 @@
     if (!Contains(ptrs[i])) {
       num_broken_ptrs++;
       LOG(ERROR) << "FreeList[" << i << "] (" << ptrs[i] << ") not in bounds of heap " << *this;
+      size_t size = mspace_usable_size(ptrs[i]);
+      memset(ptrs[i], 0xEF, size);
     }
   }
   CHECK_EQ(num_broken_ptrs, 0u);