Use the card table to scan the immune region of the heap.

Using the card table results in a dramatic cost reduction in scanning
time.  During a reboot, scanning each object in the zygote for
pointers to the application heap costs an average of 20ms of thread
time on a stingray with 8 megabyte zygote.  In comparison, scanning
dirty cards in the zygote costs 300us on average.

Change-Id: I1dba35646d509e6b1b4535e291a1eb6f66d7b218
diff --git a/vm/alloc/CardTable.c b/vm/alloc/CardTable.c
index a745c43..bbbb4ad 100644
--- a/vm/alloc/CardTable.c
+++ b/vm/alloc/CardTable.c
@@ -44,11 +44,7 @@
  * byte is equal to GC_DIRTY_CARD. See dvmCardTableStartup for details.
  */
 
-/*
- * Initializes the card table; must be called before any other
- * dvmCardTable*() functions.
- */
-bool dvmCardTableStartup(size_t heapMaximumSize)
+static bool allocCardTable(size_t heapMaximumSize)
 {
     size_t length;
     void *allocBase;
@@ -60,6 +56,7 @@
 
     /* Set up the card table */
     length = heapMaximumSize / GC_CARD_SIZE;
+    assert(length * GC_CARD_SIZE == heapMaximumSize);
     /* Allocate an extra 256 bytes to allow fixed low-byte of base */
     allocBase = dvmAllocRegion(length + 0x100, PROT_READ | PROT_WRITE,
                             "dalvik-card-table");
@@ -83,19 +80,109 @@
     return true;
 }
 
+static bool allocModUnionTable(size_t heapMaximumSize)
+{
+    size_t length = heapMaximumSize / GC_CARD_SIZE / HB_BITS_PER_WORD;
+    assert(length * GC_CARD_SIZE * HB_BITS_PER_WORD == heapMaximumSize);
+    int prot = PROT_READ | PROT_WRITE;
+    void *allocBase = dvmAllocRegion(length, prot, "dalvik-modunion-table");
+    if (allocBase == NULL) {
+        return false;
+    }
+    GcHeap *gcHeap = gDvm.gcHeap;
+    gcHeap->modUnionTableBase = (u1*)allocBase;
+    gcHeap->modUnionTableLength = length;
+    return true;
+}
+
+/*
+ * Initializes the card table; must be called before any other
+ * dvmCardTable*() functions.
+ */
+bool dvmCardTableStartup(size_t heapMaximumSize)
+{
+    return allocCardTable(heapMaximumSize) && allocModUnionTable(heapMaximumSize);
+}
+
+/*
+ * Releases storage for the card table and clears its globals.
+ */
+static void freeCardTable()
+{
+    if (gDvm.biasedCardTableBase == NULL) {
+        return;
+    }
+    gDvm.biasedCardTableBase = NULL;
+    munmap(gDvm.gcHeap->cardTableBase, gDvm.gcHeap->cardTableLength);
+    gDvm.gcHeap->cardTableBase = NULL;
+    gDvm.gcHeap->cardTableLength = 0;
+}
+
+/*
+ * Releases storage for the mod union table and clears its globals.
+ */
+static void freeModUnionTable()
+{
+    if (gDvm.gcHeap->modUnionTableBase == NULL) {
+        return;
+    }
+    munmap(gDvm.gcHeap->modUnionTableBase, gDvm.gcHeap->modUnionTableLength);
+    gDvm.gcHeap->modUnionTableBase = NULL;
+    gDvm.gcHeap->modUnionTableLength = 0;
+}
+
 /*
  * Tears down the entire CardTable.
  */
 void dvmCardTableShutdown()
 {
-    gDvm.biasedCardTableBase = NULL;
-    munmap(gDvm.gcHeap->cardTableBase, gDvm.gcHeap->cardTableLength);
+    freeCardTable();
+    freeModUnionTable();
+}
+
+/*
+ * Set a bit in the mod union table for each dirty byte in the card
+ * table.  Clears the corresponding byte in the card table.
+ */
+static void moveCardsToModUnion(u1 *base, u1 *limit)
+{
+    GcHeap *h = gDvm.gcHeap;
+    u1 *baseCard = dvmCardFromAddr(base);
+    u1 *limitCard = dvmCardFromAddr(limit);
+    u4 *bits = (u4*)h->modUnionTableBase;
+    u1 *heapBase = (u1*)dvmHeapSourceGetBase();
+    u1 *card;
+    for (card = baseCard; card < limitCard; ++card) {
+        if (*card == GC_CARD_CLEAN) {
+            continue;
+        }
+        u1 *addr = (u1*)dvmAddrFromCard(card);
+        u1 *biased = (u1*)((uintptr_t)addr - (uintptr_t)heapBase);
+        size_t offset = (uintptr_t)biased / GC_CARD_SIZE / HB_BITS_PER_WORD;
+        u4 bit = 1 << (((uintptr_t)biased / GC_CARD_SIZE) % HB_BITS_PER_WORD);
+        assert((u1*)&bits[offset] >= h->modUnionTableBase);
+        assert((u1*)&bits[offset] < h->modUnionTableBase+h->modUnionTableLength);
+        bits[offset] |= bit;
+        *card = GC_CARD_CLEAN;
+    }
 }
 
 void dvmClearCardTable(void)
 {
-    assert(gDvm.gcHeap->cardTableBase != NULL);
-    memset(gDvm.gcHeap->cardTableBase, GC_CARD_CLEAN, gDvm.gcHeap->cardTableLength);
+    uintptr_t base[HEAP_SOURCE_MAX_HEAP_COUNT];
+    uintptr_t limit[HEAP_SOURCE_MAX_HEAP_COUNT];
+    size_t numHeaps = dvmHeapSourceGetNumHeaps();
+    dvmHeapSourceGetRegions(base, NULL, limit, numHeaps);
+    size_t i;
+    for (i = 0; i < numHeaps; ++i) {
+        if (i != 0) {
+            moveCardsToModUnion((u1*)base[i], (u1*)limit[i]);
+        } else {
+            u1 *baseCard = dvmCardFromAddr((u1*)base[i]);
+            u1 *limitCard = dvmCardFromAddr((u1*)limit[i]);
+            memset(baseCard, GC_CARD_CLEAN, limitCard - baseCard);
+        }
+    }
 }
 
 /*