Sweep concurrently.

After marking, exchange the mark and live bitmaps and resume all
threads.  The sweep proceeds concurrently viewing the new live bitmap
as the old mark bitmap thereby permitting allocations performed while
sweeping to update the live bitmap.

Change-Id: I9c307190a14ce417413175db016be41c38aeeaf3
diff --git a/vm/Sync.c b/vm/Sync.c
index 2223c48..8603455 100644
--- a/vm/Sync.c
+++ b/vm/Sync.c
@@ -333,6 +333,9 @@
 
     assert(mon != NULL);
     assert(isUnmarkedObject != NULL);
+#ifdef WITH_DEADLOCK_PREDICTION
+    dvmDumpMonitorInfo("before monitor sweep");
+#endif
     prev = &handle;
     prev->next = curr = *mon;
     while (curr != NULL) {
@@ -346,6 +349,9 @@
         }
     }
     *mon = handle.next;
+#ifdef WITH_DEADLOCK_PREDICTION
+    dvmDumpMonitorInfo("after monitor sweep");
+#endif
 }
 
 static char *logWriteInt(char *dst, int value)
diff --git a/vm/alloc/Heap.c b/vm/alloc/Heap.c
index 8f5189b..f35bf43 100644
--- a/vm/alloc/Heap.c
+++ b/vm/alloc/Heap.c
@@ -594,11 +594,10 @@
 void dvmCollectGarbageInternal(bool clearSoftRefs, GcReason reason)
 {
     GcHeap *gcHeap = gDvm.gcHeap;
-    u4 suspendStart, totalTime;
-    u4 rootStart, rootEnd, rootTime, rootSuspendTime;
-    u4 dirtyStart, dirtyEnd, dirtyTime, dirtySuspendTime;
-    int numFreed;
-    size_t sizeFreed;
+    u4 rootSuspend, rootSuspendTime, rootStart, rootEnd;
+    u4 dirtySuspend, dirtyStart, dirtyEnd;
+    u4 totalTime;
+    size_t numObjects, numBytes;
     GcMode gcMode;
     int oldThreadPriority = kInvalidPriority;
 
@@ -613,9 +612,10 @@
     gcMode = (reason == GC_FOR_MALLOC) ? GC_PARTIAL : GC_FULL;
     gcHeap->gcRunning = true;
 
-    suspendStart = dvmGetRelativeTimeMsec();
+    rootSuspend = dvmGetRelativeTimeMsec();
     dvmSuspendAllThreads(SUSPEND_FOR_GC);
     rootStart = dvmGetRelativeTimeMsec();
+    rootSuspendTime = rootStart - rootSuspend;
 
     /*
      * If we are not marking concurrently raise the priority of the
@@ -748,8 +748,6 @@
         dvmClearCardTable();
         dvmUnlockHeap();
         dvmResumeAllThreads(SUSPEND_FOR_GC);
-        rootSuspendTime = rootStart - suspendStart;
-        rootTime = rootEnd - rootStart;
     }
 
     /* Recursively mark any objects that marked objects point to strongly.
@@ -765,7 +763,7 @@
          * suspension.
          */
         dvmLockHeap();
-        suspendStart = dvmGetRelativeTimeMsec();
+        dirtySuspend = dvmGetRelativeTimeMsec();
         dvmSuspendAllThreads(SUSPEND_FOR_GC);
         dirtyStart = dvmGetRelativeTimeMsec();
         /*
@@ -817,17 +815,46 @@
     LOGD_HEAP("Handling phantom references...");
     dvmClearWhiteRefs(&gcHeap->phantomReferences);
 
-#ifdef WITH_DEADLOCK_PREDICTION
-    dvmDumpMonitorInfo("before sweep");
-#endif
-    LOGD_HEAP("Sweeping...");
-    dvmHeapSweepUnmarkedObjects(gcMode, &numFreed, &sizeFreed);
-#ifdef WITH_DEADLOCK_PREDICTION
-    dvmDumpMonitorInfo("after sweep");
+#if defined(WITH_JIT)
+    /*
+     * Patching a chaining cell is very cheap as it only updates 4 words. It's
+     * the overhead of stopping all threads and synchronizing the I/D cache
+     * that makes it expensive.
+     *
+     * Therefore we batch those work orders in a queue and go through them
+     * when threads are suspended for GC.
+     */
+    dvmCompilerPerformSafePointChecks();
 #endif
 
+    LOGD_HEAP("Sweeping...");
+
+    dvmHeapSweepSystemWeaks();
+
+    /*
+     * Live objects have a bit set in the mark bitmap, swap the mark
+     * and live bitmaps.  The sweep can proceed concurrently viewing
+     * the new live bitmap as the old mark bitmap, and vice versa.
+     */
+    dvmHeapSourceSwapBitmaps();
+
+    if (gDvm.postVerify) {
+        LOGV_HEAP("Verifying roots and heap after GC");
+        verifyRootsAndHeap();
+    }
+
+    if (reason == GC_CONCURRENT) {
+        dirtyEnd = dvmGetRelativeTimeMsec();
+        dvmUnlockHeap();
+        dvmResumeAllThreads(SUSPEND_FOR_GC);
+    }
+    dvmHeapSweepUnmarkedObjects(gcMode, reason == GC_CONCURRENT,
+                                &numObjects, &numBytes);
     LOGD_HEAP("Cleaning up...");
     dvmHeapFinishMarkStep();
+    if (reason == GC_CONCURRENT) {
+        dvmLockHeap();
+    }
 
     LOGD_HEAP("Done.");
 
@@ -865,38 +892,12 @@
 #endif
     LOGV_HEAP("GC finished");
 
-    if (gDvm.postVerify) {
-        LOGV_HEAP("Verifying roots and heap after GC");
-        verifyRootsAndHeap();
-    }
-
     gcHeap->gcRunning = false;
 
     LOGV_HEAP("Resuming threads");
     dvmUnlockMutex(&gDvm.heapWorkerListLock);
     dvmUnlockMutex(&gDvm.heapWorkerLock);
 
-#if defined(WITH_JIT)
-    /*
-     * Patching a chaining cell is very cheap as it only updates 4 words. It's
-     * the overhead of stopping all threads and synchronizing the I/D cache
-     * that makes it expensive.
-     *
-     * Therefore we batch those work orders in a queue and go through them
-     * when threads are suspended for GC.
-     */
-    dvmCompilerPerformSafePointChecks();
-#endif
-
-    dirtyEnd = dvmGetRelativeTimeMsec();
-
-    if (reason == GC_CONCURRENT) {
-        dirtySuspendTime = dirtyStart - suspendStart;
-        dirtyTime = dirtyEnd - dirtyStart;
-    }
-
-    dvmResumeAllThreads(SUSPEND_FOR_GC);
-
     if (reason == GC_CONCURRENT) {
         /*
          * Wake-up any threads that blocked after a failed allocation
@@ -906,6 +907,8 @@
     }
 
     if (reason != GC_CONCURRENT) {
+        dirtyEnd = dvmGetRelativeTimeMsec();
+        dvmResumeAllThreads(SUSPEND_FOR_GC);
         if (oldThreadPriority != kInvalidPriority) {
             if (setpriority(PRIO_PROCESS, 0, oldThreadPriority) != 0) {
                 LOGW_HEAP("Unable to reset priority to %d: %s\n",
@@ -921,20 +924,22 @@
     }
 
     if (reason != GC_CONCURRENT) {
-        u4 suspendTime = rootStart - suspendStart;
         u4 markSweepTime = dirtyEnd - rootStart;
-        totalTime = suspendTime + markSweepTime;
+        totalTime = rootSuspendTime + markSweepTime;
         LOGD("%s freed %d objects / %zd bytes in (%ums) %ums",
-             GcReasonStr[reason], numFreed, sizeFreed,
-             suspendTime, markSweepTime);
+             GcReasonStr[reason], numObjects, numBytes,
+             rootSuspendTime, markSweepTime);
     } else {
+        u4 rootTime = rootEnd - rootStart;
+        u4 dirtySuspendTime = dirtyStart - dirtySuspend;
+        u4 dirtyTime = dirtyEnd - dirtyStart;
         totalTime = rootSuspendTime + rootTime + dirtySuspendTime + dirtyTime;
         LOGD("%s freed %d objects / %zd bytes in (%ums) %ums (%ums) %ums",
-             GcReasonStr[reason], numFreed, sizeFreed,
+             GcReasonStr[reason], numObjects, numBytes,
              rootSuspendTime, rootTime,
              dirtySuspendTime, dirtyTime);
     }
-    dvmLogGcStats(numFreed, sizeFreed, totalTime);
+    dvmLogGcStats(numObjects, numBytes, totalTime);
     if (gcHeap->ddmHpifWhen != 0) {
         LOGD_HEAP("Sending VM heap info to DDM\n");
         dvmDdmSendHeapInfo(gcHeap->ddmHpifWhen, false);
diff --git a/vm/alloc/HeapSource.c b/vm/alloc/HeapSource.c
index fd5d79f..8eac5cd 100644
--- a/vm/alloc/HeapSource.c
+++ b/vm/alloc/HeapSource.c
@@ -301,7 +301,7 @@
     assert(heap->bytesAllocated < mspace_footprint(heap->msp));
 }
 
-static void countFree(Heap *heap, const void *ptr, bool isObj)
+static void countFree(Heap *heap, const void *ptr, size_t *numBytes)
 {
     HeapSource *hs;
     size_t delta;
@@ -314,13 +314,12 @@
     } else {
         heap->bytesAllocated = 0;
     }
-    if (isObj) {
-        hs = gDvm.gcHeap->heapSource;
-        dvmHeapBitmapClearObjectBit(&hs->liveBits, ptr);
-        if (heap->objectsAllocated > 0) {
-            heap->objectsAllocated--;
-        }
+    hs = gDvm.gcHeap->heapSource;
+    dvmHeapBitmapClearObjectBit(&hs->liveBits, ptr);
+    if (heap->objectsAllocated > 0) {
+        heap->objectsAllocated--;
     }
+    *numBytes += delta;
 }
 
 static HeapSource *gHs = NULL;
@@ -718,8 +717,6 @@
     assert(numHeaps == hs->numHeaps);
     for (i = 0; i < hs->numHeaps; ++i) {
         base = (uintptr_t)hs->heaps[i].base;
-        /* Using liveBits.max will include all the markBits as well. */
-        assert(hs->liveBits.max >= hs->markBits.max);
         /* -1 because limit is exclusive but max is inclusive. */
         max = MIN((uintptr_t)hs->heaps[i].limit - 1, hs->liveBits.max);
         aliasBitmap(&liveBits[i], &hs->liveBits, base, max);
@@ -928,47 +925,26 @@
 }
 
 /*
- * Frees the memory pointed to by <ptr>, which may be NULL.
+ * Frees the first numPtrs objects in the ptrs list and returns the
+ * amount of reclaimed storage. The list must contain addresses all in
+ * the same mspace, and must be in increasing order. This implies that
+ * there are no duplicates, and no entries are NULL.
  */
-void
-dvmHeapSourceFree(void *ptr)
+size_t dvmHeapSourceFreeList(size_t numPtrs, void **ptrs)
 {
     Heap *heap;
-
-    HS_BOILERPLATE();
-
-    heap = ptr2heap(gHs, ptr);
-    if (heap != NULL) {
-        countFree(heap, ptr, true);
-        /* Only free objects that are in the active heap.
-         * Touching old heaps would pull pages into this process.
-         */
-        if (heap == gHs->heaps) {
-            mspace_free(heap->msp, ptr);
-        }
-    }
-}
-
-/*
- * Frees the first numPtrs objects in the ptrs list. The list must
- * contain addresses all in the same mspace, and must be in increasing
- * order. This implies that there are no duplicates, and no entries
- * are NULL.
- */
-void
-dvmHeapSourceFreeList(size_t numPtrs, void **ptrs)
-{
-    Heap *heap;
+    size_t numBytes;
 
     HS_BOILERPLATE();
 
     if (numPtrs == 0) {
-        return;
+        return 0;
     }
 
     assert(ptrs != NULL);
     assert(*ptrs != NULL);
     heap = ptr2heap(gHs, *ptrs);
+    numBytes = 0;
     if (heap != NULL) {
         mspace *msp = heap->msp;
         // Calling mspace_free on shared heaps disrupts sharing too
@@ -992,7 +968,7 @@
             // countFree ptrs[0] and initializing merged.
             assert(ptrs[0] != NULL);
             assert(ptr2heap(gHs, ptrs[0]) == heap);
-            countFree(heap, ptrs[0], true);
+            countFree(heap, ptrs[0], &numBytes);
             void *merged = ptrs[0];
 
             size_t i;
@@ -1001,7 +977,7 @@
                 assert(ptrs[i] != NULL);
                 assert((intptr_t)merged < (intptr_t)ptrs[i]);
                 assert(ptr2heap(gHs, ptrs[i]) == heap);
-                countFree(heap, ptrs[i], true);
+                countFree(heap, ptrs[i], &numBytes);
                 // Try to merge. If it works, merged now includes the
                 // memory of ptrs[i]. If it doesn't, free merged, and
                 // see if ptrs[i] starts a new run of adjacent
@@ -1019,10 +995,11 @@
             for (i = 0; i < numPtrs; i++) {
                 assert(ptrs[i] != NULL);
                 assert(ptr2heap(gHs, ptrs[i]) == heap);
-                countFree(heap, ptrs[i], true);
+                countFree(heap, ptrs[i], &numBytes);
             }
         }
     }
+    return numBytes;
 }
 
 /*
diff --git a/vm/alloc/HeapSource.h b/vm/alloc/HeapSource.h
index 46f5cc1..ab00889 100644
--- a/vm/alloc/HeapSource.h
+++ b/vm/alloc/HeapSource.h
@@ -105,17 +105,12 @@
 void *dvmHeapSourceAllocAndGrow(size_t n);
 
 /*
- * Frees the memory pointed to by <ptr>, which may be NULL.
+ * Frees the first numPtrs objects in the ptrs list and returns the
+ * amount of reclaimed storage.  The list must contain addresses all
+ * in the same mspace, and must be in increasing order. This implies
+ * that there are no duplicates, and no entries are NULL.
  */
-void dvmHeapSourceFree(void *ptr);
-
-/*
- * Frees the first numPtrs objects in the ptrs list. The list must
- * contain addresses all in the same mspace, and must be in increasing
- * order. This implies that there are no duplicates, and no entries
- * are NULL.
- */
-void dvmHeapSourceFreeList(size_t numPtrs, void **ptrs);
+size_t dvmHeapSourceFreeList(size_t numPtrs, void **ptrs);
 
 /*
  * Returns true iff <ptr> was allocated from the heap source.
diff --git a/vm/alloc/MarkSweep.c b/vm/alloc/MarkSweep.c
index d35f3dc..f673907 100644
--- a/vm/alloc/MarkSweep.c
+++ b/vm/alloc/MarkSweep.c
@@ -943,13 +943,6 @@
 
     markContext = &gDvm.gcHeap->markContext;
 
-    /* The sweep step freed every object that appeared in the
-     * HeapSource bitmaps that didn't appear in the mark bitmaps.
-     * The new state of the HeapSource is exactly the final
-     * mark bitmaps, so swap them in.
-     */
-    dvmHeapSourceSwapBitmaps();
-
     /* The mark bits are now not needed.
      */
     dvmHeapSourceZeroMarkBitmap();
@@ -961,10 +954,17 @@
     markContext->finger = NULL;
 }
 
+typedef struct {
+    size_t numObjects;
+    size_t numBytes;
+    bool isConcurrent;
+} SweepContext;
+
 static void sweepBitmapCallback(size_t numPtrs, void **ptrs,
                                 const void *finger, void *arg)
 {
     const ClassObject *const classJavaLangClass = gDvm.classJavaLangClass;
+    SweepContext *ctx = arg;
     size_t i;
 
     for (i = 0; i < numPtrs; i++) {
@@ -987,11 +987,19 @@
     }
     // TODO: dvmHeapSourceFreeList has a loop, just like the above
     // does. Consider collapsing the two loops to save overhead.
-    dvmHeapSourceFreeList(numPtrs, ptrs);
+    if (ctx->isConcurrent) {
+        dvmLockHeap();
+    }
+    ctx->numBytes += dvmHeapSourceFreeList(numPtrs, ptrs);
+    ctx->numObjects += numPtrs;
+    if (ctx->isConcurrent) {
+        dvmUnlockHeap();
+    }
 }
 
-/* Returns true if the given object is unmarked.  Ignores the low bits
- * of the pointer because the intern table may set them.
+/*
+ * Returns true if the given object is unmarked.  This assumes that
+ * the bitmaps have not yet been swapped.
  */
 static int isUnmarkedObject(void *object)
 {
@@ -999,54 +1007,49 @@
             &gDvm.gcHeap->markContext);
 }
 
-/* Walk through the list of objects that haven't been
- * marked and free them.
+/*
+ * Process all the internal system structures that behave like
+ * weakly-held objects.
  */
-void
-dvmHeapSweepUnmarkedObjects(GcMode mode, int *numFreed, size_t *sizeFreed)
+void dvmHeapSweepSystemWeaks(void)
+{
+    dvmGcDetachDeadInternedStrings(isUnmarkedObject);
+    dvmSweepMonitorList(&gDvm.monitorList, isUnmarkedObject);
+}
+
+/*
+ * Walk through the list of objects that haven't been marked and free
+ * them.  Assumes the bitmaps have been swapped.
+ */
+void dvmHeapSweepUnmarkedObjects(GcMode mode, bool isConcurrent,
+                                 size_t *numObjects, size_t *numBytes)
 {
     HeapBitmap markBits[HEAP_SOURCE_MAX_HEAP_COUNT];
     HeapBitmap liveBits[HEAP_SOURCE_MAX_HEAP_COUNT];
-    size_t origObjectsAllocated;
-    size_t origBytesAllocated;
+    SweepContext ctx;
     size_t numBitmaps, numSweepBitmaps;
     size_t i;
 
-    /* All reachable objects have been marked.
-     * Detach any unreachable interned strings before
-     * we sweep.
-     */
-    dvmGcDetachDeadInternedStrings(isUnmarkedObject);
-
-    /* Free any known objects that are not marked.
-     */
-    origObjectsAllocated = dvmHeapSourceGetValue(HS_OBJECTS_ALLOCATED, NULL, 0);
-    origBytesAllocated = dvmHeapSourceGetValue(HS_BYTES_ALLOCATED, NULL, 0);
-
-    dvmSweepMonitorList(&gDvm.monitorList, isUnmarkedObject);
-
     numBitmaps = dvmHeapSourceGetNumHeaps();
-    dvmHeapSourceGetObjectBitmaps(liveBits, markBits, numBitmaps);
+    dvmHeapSourceGetObjectBitmaps(markBits, liveBits, numBitmaps);
     if (mode == GC_PARTIAL) {
         numSweepBitmaps = 1;
         assert((uintptr_t)gDvm.gcHeap->markContext.immuneLimit == liveBits[0].base);
     } else {
         numSweepBitmaps = numBitmaps;
     }
+    ctx.numObjects = ctx.numBytes = 0;
+    ctx.isConcurrent = isConcurrent;
     for (i = 0; i < numSweepBitmaps; i++) {
         dvmHeapBitmapSweepWalk(&liveBits[i], &markBits[i],
-                               sweepBitmapCallback, NULL);
+                               sweepBitmapCallback, &ctx);
     }
-
-    *numFreed = origObjectsAllocated -
-            dvmHeapSourceGetValue(HS_OBJECTS_ALLOCATED, NULL, 0);
-    *sizeFreed = origBytesAllocated -
-            dvmHeapSourceGetValue(HS_BYTES_ALLOCATED, NULL, 0);
-
+    *numObjects = ctx.numObjects;
+    *numBytes = ctx.numBytes;
 #ifdef WITH_PROFILER
     if (gDvm.allocProf.enabled) {
-        gDvm.allocProf.freeCount += *numFreed;
-        gDvm.allocProf.freeSize += *sizeFreed;
+        gDvm.allocProf.freeCount += ctx.numObjects;
+        gDvm.allocProf.freeSize += ctx.numBytes;
     }
 #endif
 }
diff --git a/vm/alloc/MarkSweep.h b/vm/alloc/MarkSweep.h
index 0fc5933..ffdfbd5 100644
--- a/vm/alloc/MarkSweep.h
+++ b/vm/alloc/MarkSweep.h
@@ -53,7 +53,8 @@
 void dvmClearWhiteRefs(Object **list);
 void dvmHeapScheduleFinalizations(void);
 void dvmHeapFinishMarkStep(void);
-
-void dvmHeapSweepUnmarkedObjects(GcMode mode, int *numFreed, size_t *sizeFreed);
+void dvmHeapSweepSystemWeaks(void);
+void dvmHeapSweepUnmarkedObjects(GcMode mode, bool isConcurrent,
+                                 size_t *numObjects, size_t *numBytes);
 
 #endif  // _DALVIK_ALLOC_MARK_SWEEP