Parellel mark stack processing

Enabled parallel mark stack processing by using a thread pool.

Optimized object scanning by removing dependent loads for IsClass.

Performance:
Prime: ~10% speedup of partial GC.
Nakasi: ~50% speedup of partial GC.

Change-Id: I43256a068efc47cb52d93108458ea18d4e02fccc
diff --git a/src/object.cc b/src/object.cc
index 9a4588a..cebbb2a 100644
--- a/src/object.cc
+++ b/src/object.cc
@@ -733,6 +733,19 @@
   RegisterNative(self, Runtime::Current()->GetJniDlsymLookupStub()->GetData());
 }
 
+Class* Class::java_lang_Class_ = NULL;
+
+void Class::SetClassClass(Class* java_lang_Class) {
+  CHECK(java_lang_Class_ == NULL) << java_lang_Class_ << " " << java_lang_Class;
+  CHECK(java_lang_Class != NULL);
+  java_lang_Class_ = java_lang_Class;
+}
+
+void Class::ResetClass() {
+  CHECK(java_lang_Class_ != NULL);
+  java_lang_Class_ = NULL;
+}
+
 void Class::SetStatus(Status new_status) {
   CHECK(new_status > GetStatus() || new_status == kStatusError || !Runtime::Current()->IsStarted())
       << PrettyClass(this) << " " << GetStatus() << " -> " << new_status;