Parellel mark stack processing
Enabled parallel mark stack processing by using a thread pool.
Optimized object scanning by removing dependent loads for IsClass.
Performance:
Prime: ~10% speedup of partial GC.
Nakasi: ~50% speedup of partial GC.
Change-Id: I43256a068efc47cb52d93108458ea18d4e02fccc
diff --git a/src/heap.h b/src/heap.h
index 8ed5881..6c4c38b 100644
--- a/src/heap.h
+++ b/src/heap.h
@@ -31,6 +31,7 @@
#include "offsets.h"
#include "safe_map.h"
#include "timing_logger.h"
+#include "thread_pool.h"
#define VERIFY_OBJECT_ENABLED 0
@@ -312,6 +313,13 @@
// GC performance measuring
void DumpGcPerformanceInfo();
+ // Thread pool.
+ void CreateThreadPool();
+ void DeleteThreadPool();
+ ThreadPool* GetThreadPool() {
+ return thread_pool_.get();
+ }
+
private:
// Allocates uninitialized storage. Passing in a null space tries to place the object in the
// large object space.
@@ -408,6 +416,9 @@
Mutex* gc_complete_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
UniquePtr<ConditionVariable> gc_complete_cond_ GUARDED_BY(gc_complete_lock_);
+ // Reference queue lock
+ UniquePtr<Mutex> reference_queue_lock_;
+
// True while the garbage collector is running.
volatile bool is_gc_running_ GUARDED_BY(gc_complete_lock_);
@@ -450,6 +461,9 @@
const bool verify_post_gc_heap_;
const bool verify_mod_union_table_;
+ // Parallel GC data structures.
+ UniquePtr<ThreadPool> thread_pool_;
+
// After how many GCs we force to do a partial GC instead of sticky mark bits GC.
const size_t partial_gc_frequency_;