Merge "Shadow frame support for MIR->LLVM-IR conversion" into ics-mr1-plus-art
diff --git a/src/compiler_llvm/method_compiler.cc b/src/compiler_llvm/method_compiler.cc
index 4021f7b..6c1e194 100644
--- a/src/compiler_llvm/method_compiler.cc
+++ b/src/compiler_llvm/method_compiler.cc
@@ -274,8 +274,15 @@
 
   // Allocate the shadow frame now!
   shadow_frame_size_ = 0;
+  uint16_t arg_reg_start = code_item_->registers_size_ - code_item_->ins_size_;
   if (method_info_.need_shadow_frame_entry) {
     for (uint32_t i = 0, num_of_regs = code_item_->registers_size_; i < num_of_regs; ++i) {
+      if (i >= arg_reg_start && !method_info_.set_to_another_object[i]) {
+        // If we don't set argument registers to another object, we don't need the shadow frame
+        // entry for it. Because the arguments must have been in the caller's shadow frame.
+        continue;
+      }
+
       if (IsRegCanBeObject(i)) {
         reg_to_shadow_frame_index_[i] = shadow_frame_size_++;
       }
@@ -4209,8 +4216,10 @@
                          (code_item_->registers_size_ - code_item_->ins_size_);
   bool has_invoke = false;
   bool may_have_loop = false;
-  bool modify_this = false;
   bool may_throw_exception = false;
+  bool assume_this_non_null = false;
+  std::vector<bool>& set_to_another_object = method_info_.set_to_another_object;
+  set_to_another_object.resize(code_item_->registers_size_, false);
 
   Instruction const* insn;
   for (uint32_t dex_pc = 0;
@@ -4229,16 +4238,16 @@
     case Instruction::MOVE_WIDE:
     case Instruction::MOVE_WIDE_FROM16:
     case Instruction::MOVE_WIDE_16:
+    case Instruction::MOVE_RESULT:
+    case Instruction::MOVE_RESULT_WIDE:
+      break;
+
     case Instruction::MOVE_OBJECT:
     case Instruction::MOVE_OBJECT_FROM16:
     case Instruction::MOVE_OBJECT_16:
-    case Instruction::MOVE_RESULT:
-    case Instruction::MOVE_RESULT_WIDE:
     case Instruction::MOVE_RESULT_OBJECT:
     case Instruction::MOVE_EXCEPTION:
-      if (dec_insn.vA == this_reg_idx) {
-        modify_this = true;
-      }
+      set_to_another_object[dec_insn.vA] = true;
       break;
 
     case Instruction::RETURN_VOID:
@@ -4251,13 +4260,13 @@
     case Instruction::CONST_16:
     case Instruction::CONST:
     case Instruction::CONST_HIGH16:
+      set_to_another_object[dec_insn.vA] = true;
+      break;
+
     case Instruction::CONST_WIDE_16:
     case Instruction::CONST_WIDE_32:
     case Instruction::CONST_WIDE:
     case Instruction::CONST_WIDE_HIGH16:
-      if (dec_insn.vA == this_reg_idx) {
-        modify_this = true;
-      }
       break;
 
     case Instruction::CONST_STRING:
@@ -4266,16 +4275,12 @@
       if (!compiler_->CanAssumeStringIsPresentInDexCache(dex_cache_, dec_insn.vB)) {
         may_throw_exception = true;
       }
-      if (dec_insn.vA == this_reg_idx) {
-        modify_this = true;
-      }
+      set_to_another_object[dec_insn.vA] = true;
       break;
 
     case Instruction::CONST_CLASS:
       may_throw_exception = true;
-      if (dec_insn.vA == this_reg_idx) {
-        modify_this = true;
-      }
+      set_to_another_object[dec_insn.vA] = true;
       break;
 
     case Instruction::MONITOR_ENTER:
@@ -4284,14 +4289,15 @@
       may_throw_exception = true;
       break;
 
-    case Instruction::INSTANCE_OF:
     case Instruction::ARRAY_LENGTH:
+      may_throw_exception = true;
+      break;
+
+    case Instruction::INSTANCE_OF:
     case Instruction::NEW_INSTANCE:
     case Instruction::NEW_ARRAY:
       may_throw_exception = true;
-      if (dec_insn.vA == this_reg_idx) {
-        modify_this = true;
-      }
+      set_to_another_object[dec_insn.vA] = true;
       break;
 
     case Instruction::FILLED_NEW_ARRAY:
@@ -4314,16 +4320,11 @@
 
     case Instruction::PACKED_SWITCH:
     case Instruction::SPARSE_SWITCH:
-      break;
-
     case Instruction::CMPL_FLOAT:
     case Instruction::CMPG_FLOAT:
     case Instruction::CMPL_DOUBLE:
     case Instruction::CMPG_DOUBLE:
     case Instruction::CMP_LONG:
-      if (dec_insn.vA == this_reg_idx) {
-        modify_this = true;
-      }
       break;
 
     case Instruction::IF_EQ:
@@ -4362,8 +4363,8 @@
     case Instruction::AGET_CHAR:
     case Instruction::AGET_SHORT:
       may_throw_exception = true;
-      if (dec_insn.vA == this_reg_idx) {
-        modify_this = true;
+      if (insn->Opcode() == Instruction::AGET_OBJECT) {
+        set_to_another_object[dec_insn.vA] = true;
       }
       break;
 
@@ -4385,8 +4386,8 @@
     case Instruction::IGET_CHAR:
     case Instruction::IGET_SHORT:
       {
-        if (dec_insn.vA == this_reg_idx) {
-          modify_this = true;
+        if (insn->Opcode() == Instruction::IGET_OBJECT) {
+          set_to_another_object[dec_insn.vA] = true;
         }
         uint32_t reg_idx = dec_insn.vB;
         uint32_t field_idx = dec_insn.vC;
@@ -4399,6 +4400,7 @@
         } else if (reg_idx != this_reg_idx) {
           // NullPointerException
           may_throw_exception = true;
+          assume_this_non_null = true;
         }
       }
       break;
@@ -4422,6 +4424,7 @@
         } else if (reg_idx != this_reg_idx) {
           // NullPointerException
           may_throw_exception = true;
+          assume_this_non_null = true;
         }
       }
       break;
@@ -4434,8 +4437,8 @@
     case Instruction::SGET_CHAR:
     case Instruction::SGET_SHORT:
       {
-        if (dec_insn.vA == this_reg_idx) {
-          modify_this = true;
+        if (insn->Opcode() == Instruction::AGET_OBJECT) {
+          set_to_another_object[dec_insn.vA] = true;
         }
         uint32_t field_idx = dec_insn.vB;
 
@@ -4549,9 +4552,6 @@
     case Instruction::SHL_LONG_2ADDR:
     case Instruction::SHR_LONG_2ADDR:
     case Instruction::USHR_LONG_2ADDR:
-      if (dec_insn.vA == this_reg_idx) {
-        modify_this = true;
-      }
       break;
 
     case Instruction::DIV_INT:
@@ -4563,9 +4563,6 @@
     case Instruction::DIV_LONG_2ADDR:
     case Instruction::REM_LONG_2ADDR:
       may_throw_exception = true;
-      if (dec_insn.vA == this_reg_idx) {
-        modify_this = true;
-      }
       break;
 
     case Instruction::ADD_FLOAT:
@@ -4588,9 +4585,6 @@
     case Instruction::MUL_DOUBLE_2ADDR:
     case Instruction::DIV_DOUBLE_2ADDR:
     case Instruction::REM_DOUBLE_2ADDR:
-      if (dec_insn.vA == this_reg_idx) {
-        modify_this = true;
-      }
       break;
 
     case Instruction::ADD_INT_LIT16:
@@ -4608,10 +4602,8 @@
     case Instruction::SHL_INT_LIT8:
     case Instruction::SHR_INT_LIT8:
     case Instruction::USHR_INT_LIT8:
-      if (dec_insn.vA == this_reg_idx) {
-        modify_this = true;
-      }
       break;
+
     case Instruction::DIV_INT_LIT16:
     case Instruction::DIV_INT_LIT8:
     case Instruction::REM_INT_LIT16:
@@ -4619,9 +4611,6 @@
       if (dec_insn.vC == 0) {
         may_throw_exception = true;
       }
-      if (dec_insn.vA == this_reg_idx) {
-        modify_this = true;
-      }
       break;
 
     case Instruction::THROW_VERIFICATION_ERROR:
@@ -4674,13 +4663,15 @@
   // According to the statistics, there are few methods that modify the "this" pointer. So this is a
   // simple way to avoid data flow analysis. After we have a high-level IR before IRBuilder, we
   // should remove this trick.
-  method_info_.this_will_not_be_null = !modify_this;
+  method_info_.this_will_not_be_null =
+      (oat_compilation_unit_->IsStatic()) ? (true) : (!set_to_another_object[this_reg_idx]);
   method_info_.has_invoke = has_invoke;
   // If this method has loop or invoke instruction, it may suspend. Thus we need a shadow frame entry
   // for GC.
   method_info_.need_shadow_frame_entry = has_invoke || may_have_loop;
   // If this method may throw an exception, we need a shadow frame for stack trace (dexpc).
-  method_info_.need_shadow_frame = method_info_.need_shadow_frame_entry || may_throw_exception;
+  method_info_.need_shadow_frame = method_info_.need_shadow_frame_entry || may_throw_exception ||
+                                   (assume_this_non_null && !method_info_.this_will_not_be_null);
   // If can only throw exception, but can't suspend check (no loop, no invoke),
   // then there is no shadow frame entry. Only Shadow frame is needed.
   method_info_.lazy_push_shadow_frame =
diff --git a/src/compiler_llvm/method_compiler.h b/src/compiler_llvm/method_compiler.h
index c30ba3d..ec27d96 100644
--- a/src/compiler_llvm/method_compiler.h
+++ b/src/compiler_llvm/method_compiler.h
@@ -439,6 +439,7 @@
     bool need_shadow_frame_entry;
     bool need_shadow_frame;
     bool lazy_push_shadow_frame;
+    std::vector<bool> set_to_another_object;
   };
   MethodInfo method_info_;
 
diff --git a/src/heap.cc b/src/heap.cc
index 5129a41..22fb3ca 100644
--- a/src/heap.cc
+++ b/src/heap.cc
@@ -30,6 +30,7 @@
 #include "object_utils.h"
 #include "os.h"
 #include "scoped_heap_lock.h"
+#include "ScopedLocalRef.h"
 #include "space.h"
 #include "stl_util.h"
 #include "thread_list.h"
@@ -140,8 +141,13 @@
       card_table_(NULL),
       card_marking_disabled_(false),
       is_gc_running_(false),
+      concurrent_start_size_(128 * KB),
+      concurrent_min_free_(256 * KB),
+      try_running_gc_(false),
+      requesting_gc_(false),
       num_bytes_allocated_(0),
       num_objects_allocated_(0),
+      last_trim_time_(0),
       reference_referent_offset_(0),
       reference_queue_offset_(0),
       reference_queueNext_offset_(0),
@@ -245,6 +251,9 @@
   // but we can create the heap lock now. We don't create it earlier to
   // make it clear that you can't use locks during heap initialization.
   lock_ = new Mutex("Heap lock", kHeapLock);
+  condition_ = new ConditionVariable("Heap condition variable");
+
+  concurrent_start_bytes_ = std::numeric_limits<size_t>::max();
 
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "Heap() exiting";
@@ -266,6 +275,7 @@
   delete live_bitmap_;
   delete card_table_;
   delete mark_stack_;
+  delete condition_;
   delete lock_;
 }
 
@@ -296,6 +306,13 @@
     DCHECK_GE(byte_count, sizeof(Object));
     Object* obj = AllocateLocked(byte_count);
     if (obj != NULL) {
+      // Disable CMS until bug is fixed
+      if (false) {
+        if (!is_gc_running_ && num_bytes_allocated_ >= concurrent_start_bytes_) {
+          RequestConcurrentGC();
+        }
+      }
+
       obj->SetClass(c);
       if (Dbg::IsAllocTrackingEnabled()) {
         Dbg::RecordAllocation(c, byte_count);
@@ -597,17 +614,26 @@
     mark_sweep.Init();
     timings.AddSplit("Init");
 
+    if (concurrent) {
+      card_table_->ClearNonImageSpaceCards(this);
+    }
+
     mark_sweep.MarkRoots();
     timings.AddSplit("MarkRoots");
 
+    if (!concurrent) {
+      mark_sweep.ScanDirtyImageRoots();
+      timings.AddSplit("ScanDirtyImageRoots");
+    }
+
     // Roots are marked on the bitmap and the mark_stack is empty
     DCHECK(mark_sweep.IsMarkStackEmpty());
 
     if (concurrent) {
-      timings.AddSplit("RootEnd");
       Unlock();
       thread_list->ResumeAll();
       rootEnd = NanoTime();
+      timings.AddSplit("RootEnd");
     }
 
     // Recursively mark all bits set in the non-image mark bitmap
@@ -623,12 +649,12 @@
       // Re-mark root set.
       mark_sweep.ReMarkRoots();
       timings.AddSplit("ReMarkRoots");
-    }
 
-    // Scan dirty objects, this is required even if we are not doing a
-    // concurrent GC since we use the card table to locate image roots.
-    mark_sweep.RecursiveMarkDirtyObjects();
-    timings.AddSplit("RecursiveMarkDirtyObjects");
+      // Scan dirty objects, this is required even if we are not doing a
+      // concurrent GC since we use the card table to locate image roots.
+      mark_sweep.RecursiveMarkDirtyObjects();
+      timings.AddSplit("RecursiveMarkDirtyObjects");
+    }
 
     mark_sweep.ProcessReferences(clear_soft_references);
     timings.AddSplit("ProcessReferences");
@@ -667,7 +693,7 @@
     duration_ns = (duration_ns / 1000) * 1000;
     if (concurrent) {
       uint64_t pauseRootsTime = (rootEnd - t0) / 1000 * 1000;
-      uint64_t pauseDirtyTime = (t1 - dirtyBegin) / 1000 * 1000;
+      uint64_t pauseDirtyTime = (dirtyEnd - dirtyBegin) / 1000 * 1000;
       LOG(INFO) << "GC freed " << PrettySize(bytes_freed) << ", " << GetPercentFree() << "% free, "
                 << PrettySize(num_bytes_allocated_) << "/" << PrettySize(GetTotalMemory()) << ", "
                 << "paused " << PrettyDuration(pauseRootsTime) << "+" << PrettyDuration(pauseDirtyTime)
@@ -688,6 +714,15 @@
 
 void Heap::WaitForConcurrentGcToComplete() {
   lock_->AssertHeld();
+
+  // Busy wait for GC to finish
+  if (is_gc_running_) {
+    uint64_t waitStart = NanoTime();
+    do {
+      condition_->Wait(*lock_);
+    } while (is_gc_running_);
+    LOG(INFO) << "WaitForConcurrentGcToComplete blocked for " << PrettyDuration(NsToMs(NanoTime() - waitStart));
+  }
 }
 
 void Heap::DumpForSigQuit(std::ostream& os) {
@@ -731,6 +766,14 @@
     target_size = num_bytes_allocated_ + kHeapMinFree;
   }
 
+  // Calculate when to perform the next ConcurrentGC.
+  if (GetTotalMemory() - num_bytes_allocated_ < concurrent_min_free_) {
+    // Not enough free memory to perform concurrent GC.
+    concurrent_start_bytes_ = std::numeric_limits<size_t>::max();
+  } else {
+    concurrent_start_bytes_ = alloc_space_->GetFootprintLimit() - concurrent_start_size_;
+  }
+
   SetIdealFootprint(target_size);
 }
 
@@ -854,6 +897,33 @@
   }
 }
 
+void Heap::RequestConcurrentGC() {
+  // Make sure that our Daemon threads are started
+  if (requesting_gc_ || !Runtime::Current()->IsFinishedStarting()) {
+    return;
+  }
+
+  requesting_gc_ = true;
+  JNIEnv* env = Thread::Current()->GetJniEnv();
+  env->CallStaticVoidMethod(WellKnownClasses::java_lang_Daemons, WellKnownClasses::java_lang_Daemons_requestGC);
+  CHECK(!env->ExceptionCheck());
+  requesting_gc_ = false;
+}
+
+void Heap::ConcurrentGC() {
+  ScopedHeapLock heap_lock;
+  WaitForConcurrentGcToComplete();
+  // Current thread needs to be runnable or else we can't suspend all threads.
+  ScopedThreadStateChange tsc(Thread::Current(), kRunnable);
+  CollectGarbageInternal(true, false);
+  condition_->Broadcast(); // Broadcast anyone that is blocked waiting for concurrent GC
+}
+
+void Heap::Trim() {
+  lock_->AssertHeld();
+  GetAllocSpace()->Trim();
+}
+
 void Heap::RequestHeapTrim() {
   // We don't have a good measure of how worthwhile a trim might be. We can't use the live bitmap
   // because that only marks object heads, so a large array looks like lots of empty space. We
@@ -862,15 +932,17 @@
   // We could try mincore(2) but that's only a measure of how many pages we haven't given away,
   // not how much use we're making of those pages.
   float utilization = static_cast<float>(num_bytes_allocated_) / alloc_space_->Size();
-  if (utilization > 0.75f) {
-    // Don't bother trimming the heap if it's more than 75% utilized.
-    // (This percentage was picked arbitrarily.)
+  uint64_t ms_time = NsToMs(NanoTime());
+  if (utilization > 0.75f || ms_time - last_trim_time_ < 2 * 1000) {
+    // Don't bother trimming the heap if it's more than 75% utilized, or if a
+    // heap trim occurred in the last two seconds.
     return;
   }
-  if (!Runtime::Current()->IsStarted()) {
-    // Heap trimming isn't supported without a Java runtime (such as at dex2oat time)
+  if (!Runtime::Current()->IsFinishedStarting()) {
+    // Heap trimming isn't supported without a Java runtime or Daemons (such as at dex2oat time)
     return;
   }
+  last_trim_time_ = ms_time;
   JNIEnv* env = Thread::Current()->GetJniEnv();
   env->CallStaticVoidMethod(WellKnownClasses::java_lang_Daemons, WellKnownClasses::java_lang_Daemons_requestHeapTrim);
   CHECK(!env->ExceptionCheck());
diff --git a/src/heap.h b/src/heap.h
index de3caa2..a6fb4d4 100644
--- a/src/heap.h
+++ b/src/heap.h
@@ -84,6 +84,10 @@
   // Initiates an explicit garbage collection.
   void CollectGarbage(bool clear_soft_references);
 
+  // Does a concurrent GC, should only be called by the GC daemon thread
+  // through runtime.
+  void ConcurrentGC();
+
   // Implements java.lang.Runtime.maxMemory.
   int64_t GetMaxMemory();
   // Implements java.lang.Runtime.totalMemory.
@@ -218,8 +222,21 @@
     return alloc_space_;
   }
 
+  size_t GetConcurrentStartSize() const { return concurrent_start_size_; }
+
+  void SetConcurrentStartSize(size_t size) {
+    concurrent_start_size_ = size;
+  }
+
+  size_t GetConcurrentMinFree() const { return concurrent_min_free_; }
+
+  void SetConcurrentMinFree(size_t size) {
+    concurrent_min_free_ = size;
+  }
+
   void DumpForSigQuit(std::ostream& os);
 
+  void Trim();
  private:
   // Allocates uninitialized storage.
   Object* AllocateLocked(size_t num_bytes);
@@ -229,6 +246,7 @@
   void EnqueueClearedReferences(Object** cleared_references);
 
   void RequestHeapTrim();
+  void RequestConcurrentGC();
 
   void RecordAllocationLocked(AllocSpace* space, const Object* object);
   void RecordImageAllocations(Space* space);
@@ -251,6 +269,7 @@
   static void VerificationCallback(Object* obj, void* arg);
 
   Mutex* lock_;
+  ConditionVariable* condition_;
 
   std::vector<Space*> spaces_;
 
@@ -272,6 +291,19 @@
   // True while the garbage collector is running.
   bool is_gc_running_;
 
+  // Bytes until concurrent GC
+  size_t concurrent_start_bytes_;
+  size_t concurrent_start_size_;
+  size_t concurrent_min_free_;
+
+  // True while the garbage collector is trying to signal the GC daemon thread.
+  // This flag is needed to prevent recursion from occurring when the JNI calls
+  // allocate memory and request another GC.
+  bool try_running_gc_;
+
+  // Used to ensure that we don't ever recursively request GC.
+  bool requesting_gc_;
+
   // Mark stack that we reuse to avoid re-allocating the mark stack
   MarkStack* mark_stack_;
 
@@ -281,6 +313,9 @@
   // Number of objects allocated.  Adjusted after each allocation and free.
   size_t num_objects_allocated_;
 
+  // Last trim time
+  uint64_t last_trim_time_;
+
   // offset of java.lang.ref.Reference.referent
   MemberOffset reference_referent_offset_;
 
diff --git a/src/mark_sweep.cc b/src/mark_sweep.cc
index d070a57..45ad0fe 100644
--- a/src/mark_sweep.cc
+++ b/src/mark_sweep.cc
@@ -58,9 +58,6 @@
   live_bitmap_ = heap_->GetLiveBits();
   mark_stack_->Reset();
 
-  // TODO: if concurrent, clear the card table.
-  heap_->GetCardTable()->ClearNonImageSpaceCards(heap_);
-
   // TODO: if concurrent, enable card marking in compiler
 
   // TODO: check that the mark bitmap is entirely clear.
@@ -204,9 +201,11 @@
   void* arg = reinterpret_cast<void*>(this);
   const std::vector<Space*>& spaces = heap_->GetSpaces();
   for (size_t i = 0; i < spaces.size(); ++i) {
-    uintptr_t begin = reinterpret_cast<uintptr_t>(spaces[i]->Begin());
-    uintptr_t end = reinterpret_cast<uintptr_t>(spaces[i]->End());
-    mark_bitmap_->ScanWalk(begin, end, &MarkSweep::ScanBitmapCallback, arg);
+    if (!spaces[i]->IsImageSpace()) {
+      uintptr_t begin = reinterpret_cast<uintptr_t>(spaces[i]->Begin());
+      uintptr_t end = reinterpret_cast<uintptr_t>(spaces[i]->End());
+      mark_bitmap_->ScanWalk(begin, end, &MarkSweep::ScanBitmapCallback, arg);
+    }
   }
   finger_ = reinterpret_cast<Object*>(~0);
   // TODO: tune the frequency of emptying the mark stack
diff --git a/src/native/dalvik_system_VMRuntime.cc b/src/native/dalvik_system_VMRuntime.cc
index a500f6a..09ca251 100644
--- a/src/native/dalvik_system_VMRuntime.cc
+++ b/src/native/dalvik_system_VMRuntime.cc
@@ -166,7 +166,8 @@
   size_t alloc_space_size = heap->GetAllocSpace()->Size();
   float utilization = static_cast<float>(heap->GetBytesAllocated()) / alloc_space_size;
   uint64_t start_ns = NanoTime();
-  heap->GetAllocSpace()->Trim();
+
+  heap->Trim();
 
   // Trim the native heap.
   dlmalloc_trim(0);
@@ -181,11 +182,16 @@
             << " heap with " << static_cast<int>(100 * utilization) << "% utilization";
 }
 
+static void VMRuntime_concurrentGC(JNIEnv*, jobject) {
+  Runtime::Current()->GetHeap()->ConcurrentGC();
+}
+
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(VMRuntime, addressOf, "(Ljava/lang/Object;)J"),
   NATIVE_METHOD(VMRuntime, bootClassPath, "()Ljava/lang/String;"),
   NATIVE_METHOD(VMRuntime, classPath, "()Ljava/lang/String;"),
   NATIVE_METHOD(VMRuntime, clearGrowthLimit, "()V"),
+  NATIVE_METHOD(VMRuntime, concurrentGC, "()V"),
   NATIVE_METHOD(VMRuntime, disableJitCompilation, "()V"),
   NATIVE_METHOD(VMRuntime, getTargetHeapUtilization, "()F"),
   NATIVE_METHOD(VMRuntime, isDebuggerActive, "()Z"),
diff --git a/src/runtime.cc b/src/runtime.cc
index 784a3f6..dda8f62 100644
--- a/src/runtime.cc
+++ b/src/runtime.cc
@@ -63,12 +63,14 @@
       class_linker_(NULL),
       signal_catcher_(NULL),
       java_vm_(NULL),
+      pre_allocated_OutOfMemoryError_(NULL),
       jni_stub_array_(NULL),
       abstract_method_error_stub_array_(NULL),
       resolution_method_(NULL),
       system_class_loader_(NULL),
       shutting_down_(false),
       started_(false),
+      finished_starting_(false),
       vfprintf_(NULL),
       exit_(NULL),
       abort_(NULL),
@@ -81,7 +83,7 @@
     resolution_stub_array_[i] = NULL;
   }
   for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
-    callee_save_method_[i] = NULL;
+    callee_save_methods_[i] = NULL;
   }
 }
 
@@ -548,11 +550,18 @@
 
   CHECK(host_prefix_.empty()) << host_prefix_;
 
-  // Relocate the OatFiles (ELF images)
+  // Relocate the OatFiles (ELF images).
   class_linker_->RelocateExecutable();
 
-  // Restore main thread state to kNative as expected by native code
-  Thread::Current()->SetState(kNative);
+  // Restore main thread state to kNative as expected by native code.
+  Thread* self = Thread::Current();
+  self->SetState(kNative);
+
+  // Pre-allocate an OutOfMemoryError for the double-OOME case.
+  self->ThrowNewException("Ljava/lang/OutOfMemoryError;",
+                          "OutOfMemoryError thrown while trying to throw OutOfMemoryError; no stack available");
+  pre_allocated_OutOfMemoryError_ = self->GetException();
+  self->ClearException();
 
   started_ = true;
 
@@ -570,9 +579,11 @@
 
   CreateSystemClassLoader();
 
-  Thread::Current()->GetJniEnv()->locals.AssertEmpty();
+  self->GetJniEnv()->locals.AssertEmpty();
 
   VLOG(startup) << "Runtime::Start exiting";
+
+  finished_starting_ = true;
 }
 
 void Runtime::DidForkFromZygote() {
@@ -848,6 +859,9 @@
   intern_table_->VisitRoots(visitor, arg);
   java_vm_->VisitRoots(visitor, arg);
   thread_list_->VisitRoots(visitor, arg);
+  if (pre_allocated_OutOfMemoryError_ != NULL) {
+    visitor(pre_allocated_OutOfMemoryError_, arg);
+  }
   visitor(jni_stub_array_, arg);
   visitor(abstract_method_error_stub_array_, arg);
   for (int i = 0; i < Runtime::kLastTrampolineMethodType; i++) {
@@ -855,7 +869,7 @@
   }
   visitor(resolution_method_, arg);
   for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
-    visitor(callee_save_method_[i], arg);
+    visitor(callee_save_methods_[i], arg);
   }
 }
 
@@ -940,7 +954,7 @@
 
 void Runtime::SetCalleeSaveMethod(Method* method, CalleeSaveType type) {
   DCHECK_LT(static_cast<int>(type), static_cast<int>(kLastCalleeSaveType));
-  callee_save_method_[type] = method;
+  callee_save_methods_[type] = method;
 }
 
 void Runtime::EnableMethodTracing(Trace* tracer) {
diff --git a/src/runtime.h b/src/runtime.h
index 55dab07..9181f1f 100644
--- a/src/runtime.h
+++ b/src/runtime.h
@@ -48,6 +48,7 @@
 class SignalCatcher;
 class String;
 class ThreadList;
+class Throwable;
 class Trace;
 
 class Runtime {
@@ -116,6 +117,10 @@
     return started_;
   }
 
+  bool IsFinishedStarting() const {
+    return finished_starting_;
+  }
+
   static Runtime* Current() {
     return instance_;
   }
@@ -168,14 +173,18 @@
     return java_vm_;
   }
 
-  const std::vector<std::string>& GetProperties() const {
-    return properties_;
-  }
-
   MonitorList* GetMonitorList() const {
     return monitor_list_;
   }
 
+  Throwable* GetPreAllocatedOutOfMemoryError() {
+    return pre_allocated_OutOfMemoryError_;
+  }
+
+  const std::vector<std::string>& GetProperties() const {
+    return properties_;
+  }
+
   ThreadList* GetThreadList() const {
     return thread_list_;
   }
@@ -251,12 +260,12 @@
   };
 
   bool HasCalleeSaveMethod(CalleeSaveType type) const {
-    return callee_save_method_[type] != NULL;
+    return callee_save_methods_[type] != NULL;
   }
 
   Method* GetCalleeSaveMethod(CalleeSaveType type) const {
     CHECK(HasCalleeSaveMethod(type));
-    return callee_save_method_[type];
+    return callee_save_methods_[type];
   }
 
   void SetCalleeSaveMethod(Method* method, CalleeSaveType type);
@@ -355,13 +364,15 @@
 
   JavaVMExt* java_vm_;
 
+  Throwable* pre_allocated_OutOfMemoryError_;
+
   ByteArray* jni_stub_array_;
 
   ByteArray* abstract_method_error_stub_array_;
 
   ByteArray* resolution_stub_array_[kLastTrampolineMethodType];
 
-  Method* callee_save_method_[kLastCalleeSaveType];
+  Method* callee_save_methods_[kLastCalleeSaveType];
 
   Method* resolution_method_;
 
@@ -371,6 +382,11 @@
   bool shutting_down_;
   bool started_;
 
+  // New flag added which tells us if the runtime has finished starting. If
+  // this flag is set then the Daemon threads are created and the class loader
+  // is created. This flag is needed for knowing if its safe to request CMS.
+  bool finished_starting_;
+
   // Hooks supported by JNI_CreateJavaVM
   jint (*vfprintf_)(FILE* stream, const char* format, va_list ap);
   void (*exit_)(jint status);
diff --git a/src/runtime_linux.cc b/src/runtime_linux.cc
index a4bdacc..df66162 100644
--- a/src/runtime_linux.cc
+++ b/src/runtime_linux.cc
@@ -251,8 +251,8 @@
   // TODO: instead, get debuggerd running on the host, try to connect, and hang around on success.
   if (getenv("debug_db_uid") != NULL) {
     LOG(INTERNAL_FATAL) << "********************************************************\n"
-                        << "* Process " << getpid() << " has been suspended while crashing. Attach gdb:\n"
-                        << "*     gdb -p " << getpid() << "\n"
+                        << "* Process " << getpid() << " thread " << GetTid() << " has been suspended while crashing. Attach gdb:\n"
+                        << "*     gdb -p " << GetTid() << "\n"
                         << "********************************************************\n";
     // Wait for debugger to attach.
     while (true) {
diff --git a/src/thread.cc b/src/thread.cc
index e1764ef..b263039 100644
--- a/src/thread.cc
+++ b/src/thread.cc
@@ -339,17 +339,10 @@
     DecodeField(WellKnownClasses::java_lang_Thread_priority)->SetInt(peer_, thread_priority);
     peer_thread_name.reset(GetThreadName());
   }
-  // thread_name may have been null, so don't trust this to be non-null
+  // 'thread_name' may have been null, so don't trust 'peer_thread_name' to be non-null.
   if (peer_thread_name.get() != NULL) {
     SetThreadName(peer_thread_name->ToModifiedUtf8().c_str());
   }
-
-  // Pre-allocate an OutOfMemoryError for the double-OOME case.
-  ThrowNewException("Ljava/lang/OutOfMemoryError;",
-      "OutOfMemoryError thrown while trying to throw OutOfMemoryError; no stack available");
-  ScopedLocalRef<jthrowable> exception(env, env->ExceptionOccurred());
-  env->ExceptionClear();
-  pre_allocated_OutOfMemoryError_ = Decode<Throwable*>(env, exception.get());
 }
 
 void Thread::SetThreadName(const char* name) {
@@ -756,7 +749,9 @@
     delay = new_delay;
     if (delay == 0) {
       sched_yield();
-      delay = 10000;
+      // Default to 1 milliseconds (note that this gets multiplied by 2 before
+      // the first sleep)
+      delay = 500;
     } else {
       usleep(delay);
       total_delay += delay;
@@ -840,7 +835,6 @@
       class_loader_override_(NULL),
       long_jump_context_(NULL),
       throwing_OutOfMemoryError_(false),
-      pre_allocated_OutOfMemoryError_(NULL),
       debug_invoke_req_(new DebugInvokeReq),
       trace_stack_(new std::vector<TraceStackFrame>),
       name_(new std::string(kThreadNameDuringStartup)) {
@@ -1485,12 +1479,12 @@
     throwing_OutOfMemoryError_ = true;
     ThrowNewException("Ljava/lang/OutOfMemoryError;", NULL);
   } else {
-    SetException(pre_allocated_OutOfMemoryError_);
+    Dump(LOG(ERROR)); // The pre-allocated OOME has no stack, so help out and log one.
+    SetException(Runtime::Current()->GetPreAllocatedOutOfMemoryError());
   }
   throwing_OutOfMemoryError_ = false;
 }
 
-
 Thread* Thread::CurrentFromGdb() {
   return Thread::Current();
 }
@@ -1885,9 +1879,6 @@
   if (peer_ != NULL) {
     visitor(peer_, arg);
   }
-  if (pre_allocated_OutOfMemoryError_ != NULL) {
-    visitor(pre_allocated_OutOfMemoryError_, arg);
-  }
   if (class_loader_override_ != NULL) {
     visitor(class_loader_override_, arg);
   }
diff --git a/src/thread.h b/src/thread.h
index 6a2b27e..ef539c0 100644
--- a/src/thread.h
+++ b/src/thread.h
@@ -628,8 +628,6 @@
   // A boolean telling us whether we're recursively throwing OOME.
   uint32_t throwing_OutOfMemoryError_;
 
-  Throwable* pre_allocated_OutOfMemoryError_;
-
   // JDWP invoke-during-breakpoint support.
   DebugInvokeReq* debug_invoke_req_;
 
diff --git a/src/thread_list.cc b/src/thread_list.cc
index 0dc06f8..a1e9d2f 100644
--- a/src/thread_list.cc
+++ b/src/thread_list.cc
@@ -23,6 +23,7 @@
 #include "debugger.h"
 #include "scoped_heap_lock.h"
 #include "scoped_thread_list_lock.h"
+#include "timing_logger.h"
 #include "utils.h"
 
 namespace art {
@@ -159,7 +160,6 @@
   CHECK_EQ(self->GetState(), kRunnable);
   ScopedThreadListLock thread_list_lock;
   Thread* debug_thread = Dbg::GetDebugThread();
-
   {
     // Increment everybody's suspend count (except our own).
     MutexLock mu(thread_suspend_count_lock_);
diff --git a/src/thread_list.h b/src/thread_list.h
index 9e45bfb..dc6e820 100644
--- a/src/thread_list.h
+++ b/src/thread_list.h
@@ -22,6 +22,8 @@
 
 namespace art {
 
+class TimingLogger;
+
 class ThreadList {
  public:
   static const uint32_t kMaxThreadId = 0xFFFF;
diff --git a/src/utils.cc b/src/utils.cc
index 87c8704..084456d 100644
--- a/src/utils.cc
+++ b/src/utils.cc
@@ -958,7 +958,7 @@
     os << prefix << "(unwind_backtrace_thread failed for thread " << tid << ")\n";
     return;
   } else if (frame_count == 0) {
-    os << prefix << "(no native stack frames)\n";
+    os << prefix << "(no native stack frames for thread " << tid << ")\n";
     return;
   }
 
diff --git a/src/well_known_classes.cc b/src/well_known_classes.cc
index 20e71d5..a0397ff 100644
--- a/src/well_known_classes.cc
+++ b/src/well_known_classes.cc
@@ -45,6 +45,7 @@
 jmethodID WellKnownClasses::com_android_dex_Dex_create;
 jmethodID WellKnownClasses::java_lang_ClassNotFoundException_init;
 jmethodID WellKnownClasses::java_lang_ClassLoader_loadClass;
+jmethodID WellKnownClasses::java_lang_Daemons_requestGC;
 jmethodID WellKnownClasses::java_lang_Daemons_requestHeapTrim;
 jmethodID WellKnownClasses::java_lang_Daemons_start;
 jmethodID WellKnownClasses::java_lang_ref_FinalizerReference_add;
@@ -123,6 +124,8 @@
   com_android_dex_Dex_create = CacheMethod(env, com_android_dex_Dex, true, "create", "(Ljava/nio/ByteBuffer;)Lcom/android/dex/Dex;");
   java_lang_ClassNotFoundException_init = CacheMethod(env, java_lang_ClassNotFoundException, false, "<init>", "(Ljava/lang/String;Ljava/lang/Throwable;)V");
   java_lang_ClassLoader_loadClass = CacheMethod(env, java_lang_ClassLoader, false, "loadClass", "(Ljava/lang/String;)Ljava/lang/Class;");
+
+  java_lang_Daemons_requestGC = CacheMethod(env, java_lang_Daemons, true, "requestGC", "()V");
   java_lang_Daemons_requestHeapTrim = CacheMethod(env, java_lang_Daemons, true, "requestHeapTrim", "()V");
   java_lang_Daemons_start = CacheMethod(env, java_lang_Daemons, true, "start", "()V");
 
diff --git a/src/well_known_classes.h b/src/well_known_classes.h
index 6a6dd60..d2c4959 100644
--- a/src/well_known_classes.h
+++ b/src/well_known_classes.h
@@ -53,6 +53,7 @@
   static jmethodID com_android_dex_Dex_create;
   static jmethodID java_lang_ClassLoader_loadClass;
   static jmethodID java_lang_ClassNotFoundException_init;
+  static jmethodID java_lang_Daemons_requestGC;
   static jmethodID java_lang_Daemons_requestHeapTrim;
   static jmethodID java_lang_Daemons_start;
   static jmethodID java_lang_ref_FinalizerReference_add;