Merge "Performance improvement for mapping table creation."

commit: 9cefcd7e7b1fdfcd38c35aad36e1c0a0aabfa32c [log] [tgz]
author: Vladimir Marko <vmarko@google.com> Fri Dec 06 10:07:40 2013 +0000
committer: Gerrit Code Review <noreply-gerritcodereview@google.com> Fri Dec 06 10:07:42 2013 +0000
tree: fb11800de822ef97c1b9b0df9ac6246286600d3d
parent: 29c788da9b75f998c5da8f320cd50351b83a3dff [diff]
parent: 06606b9c4a1c00154ed15f719ad8ea994e54ee8e [diff]
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index af93a56..ef9a9ce 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc

@@ -633,8 +633,7 @@
     ScopedThreadStateChange tsc(self, kBlocked);
     if (lock_word == obj->GetLockWord()) {  // If lock word hasn't changed.
       bool timed_out;
-      Thread* owner = thread_list->SuspendThreadByThreadId(lock_word.ThinLockOwner(), false,
-                                                           &timed_out);
+      Thread* owner = thread_list->SuspendThreadByThreadId(owner_thread_id, false, &timed_out);
       if (owner != nullptr) {
         // We succeeded in suspending the thread, check the lock's status didn't change.
         lock_word = obj->GetLockWord();

diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 6bd2560..4048bd3 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc

@@ -131,11 +131,6 @@
   heap_->WaitForGcToComplete(self);
   heap_->DeleteThreadPool();
 
-  // For RosAlloc, revoke thread local runs. Note that in tests
-  // (common_test.h) we repeat allocating and deleting Runtime
-  // objects.
-  heap_->RevokeAllThreadLocalBuffers();
-
   // Make sure our internal threads are dead before we start tearing down things they're using.
   Dbg::StopJdwp();
   delete signal_catcher_;

diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index 8449607..e47fd37 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h

@@ -50,7 +50,8 @@
   // old_state_and_flags.suspend_request is true.
   DCHECK_NE(new_state, kRunnable);
   DCHECK_EQ(this, Thread::Current());
-  union StateAndFlags old_state_and_flags = state_and_flags_;
+  union StateAndFlags old_state_and_flags;
+  old_state_and_flags.as_int = state_and_flags_.as_int;
   state_and_flags_.as_struct.state = new_state;
   return static_cast<ThreadState>(old_state_and_flags.as_struct.state);
 }
@@ -87,7 +88,7 @@
   union StateAndFlags old_state_and_flags;
   union StateAndFlags new_state_and_flags;
   do {
-    old_state_and_flags = state_and_flags_;
+    old_state_and_flags.as_int = state_and_flags_.as_int;
     if (UNLIKELY((old_state_and_flags.as_struct.flags & kCheckpointRequest) != 0)) {
       RunCheckpointFunction();
       continue;
@@ -104,22 +105,23 @@
 
 inline ThreadState Thread::TransitionFromSuspendedToRunnable() {
   bool done = false;
-  union StateAndFlags old_state_and_flags = state_and_flags_;
+  union StateAndFlags old_state_and_flags;
+  old_state_and_flags.as_int = state_and_flags_.as_int;
   int16_t old_state = old_state_and_flags.as_struct.state;
   DCHECK_NE(static_cast<ThreadState>(old_state), kRunnable);
   do {
     Locks::mutator_lock_->AssertNotHeld(this);  // Otherwise we starve GC..
-    old_state_and_flags = state_and_flags_;
+    old_state_and_flags.as_int = state_and_flags_.as_int;
     DCHECK_EQ(old_state_and_flags.as_struct.state, old_state);
     if (UNLIKELY((old_state_and_flags.as_struct.flags & kSuspendRequest) != 0)) {
       // Wait while our suspend count is non-zero.
       MutexLock mu(this, *Locks::thread_suspend_count_lock_);
-      old_state_and_flags = state_and_flags_;
+      old_state_and_flags.as_int = state_and_flags_.as_int;
       DCHECK_EQ(old_state_and_flags.as_struct.state, old_state);
       while ((old_state_and_flags.as_struct.flags & kSuspendRequest) != 0) {
         // Re-check when Thread::resume_cond_ is notified.
         Thread::resume_cond_->Wait(this);
-        old_state_and_flags = state_and_flags_;
+        old_state_and_flags.as_int = state_and_flags_.as_int;
         DCHECK_EQ(old_state_and_flags.as_struct.state, old_state);
       }
       DCHECK_EQ(GetSuspendCount(), 0);
@@ -127,10 +129,11 @@
     // Re-acquire shared mutator_lock_ access.
     Locks::mutator_lock_->SharedLock(this);
     // Atomically change from suspended to runnable if no suspend request pending.
-    old_state_and_flags = state_and_flags_;
+    old_state_and_flags.as_int = state_and_flags_.as_int;
     DCHECK_EQ(old_state_and_flags.as_struct.state, old_state);
     if (LIKELY((old_state_and_flags.as_struct.flags & kSuspendRequest) == 0)) {
-      union StateAndFlags new_state_and_flags = old_state_and_flags;
+      union StateAndFlags new_state_and_flags;
+      new_state_and_flags.as_int = old_state_and_flags.as_int;
       new_state_and_flags.as_struct.state = kRunnable;
       // CAS the value without a memory barrier, that occurred in the lock above.
       done = android_atomic_cas(old_state_and_flags.as_int, new_state_and_flags.as_int,

diff --git a/runtime/thread.cc b/runtime/thread.cc
index d816ca1..fa49faa 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc

@@ -579,19 +579,21 @@
 }
 
 bool Thread::RequestCheckpoint(Closure* function) {
-  union StateAndFlags old_state_and_flags = state_and_flags_;
+  union StateAndFlags old_state_and_flags;
+  old_state_and_flags.as_int = state_and_flags_.as_int;
   if (old_state_and_flags.as_struct.state != kRunnable) {
     return false;  // Fail, thread is suspended and so can't run a checkpoint.
   }
   if ((old_state_and_flags.as_struct.flags & kCheckpointRequest) != 0) {
     return false;  // Fail, already a checkpoint pending.
   }
-  CHECK(checkpoint_function_ == NULL);
+  CHECK(checkpoint_function_ == nullptr);
   checkpoint_function_ = function;
   // Checkpoint function installed now install flag bit.
   // We must be runnable to request a checkpoint.
-  old_state_and_flags.as_struct.state = kRunnable;
-  union StateAndFlags new_state_and_flags = old_state_and_flags;
+  DCHECK_EQ(old_state_and_flags.as_struct.state, kRunnable);
+  union StateAndFlags new_state_and_flags;
+  new_state_and_flags.as_int = old_state_and_flags.as_int;
   new_state_and_flags.as_struct.flags |= kCheckpointRequest;
   int succeeded = android_atomic_cmpxchg(old_state_and_flags.as_int, new_state_and_flags.as_int,
                                          &state_and_flags_.as_int);
@@ -2120,12 +2122,11 @@
     opeer_ = visitor(opeer_, arg);
   }
   if (exception_ != nullptr) {
-    exception_ = reinterpret_cast<mirror::Throwable*>(visitor(exception_, arg));
+    exception_ = down_cast<mirror::Throwable*>(visitor(exception_, arg));
   }
   throw_location_.VisitRoots(visitor, arg);
   if (class_loader_override_ != nullptr) {
-    class_loader_override_ = reinterpret_cast<mirror::ClassLoader*>(
-        visitor(class_loader_override_, arg));
+    class_loader_override_ = down_cast<mirror::ClassLoader*>(visitor(class_loader_override_, arg));
   }
   jni_env_->locals.VisitRoots(visitor, arg);
   jni_env_->monitors.VisitRoots(visitor, arg);
@@ -2144,7 +2145,7 @@
       frame.this_object_ = visitor(frame.this_object_, arg);
     }
     DCHECK(frame.method_ != nullptr);
-    frame.method_ = reinterpret_cast<mirror::ArtMethod*>(visitor(frame.method_, arg));
+    frame.method_ = down_cast<mirror::ArtMethod*>(visitor(frame.method_, arg));
   }
 }
 

diff --git a/runtime/thread.h b/runtime/thread.h
index db2f7b4..44b2186 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h

@@ -147,7 +147,8 @@
   }
 
   bool IsSuspended() const {
-    union StateAndFlags state_and_flags = state_and_flags_;
+    union StateAndFlags state_and_flags;
+    state_and_flags.as_int = state_and_flags_.as_int;
     return state_and_flags.as_struct.state != kRunnable &&
         (state_and_flags.as_struct.flags & kSuspendRequest) != 0;
   }
@@ -638,7 +639,8 @@
 
   // 32 bits of atomically changed state and flags. Keeping as 32 bits allows and atomic CAS to
   // change from being Suspended to Runnable without a suspend request occurring.
-  union StateAndFlags {
+  union PACKED(4) StateAndFlags {
+    StateAndFlags() {}
     struct PACKED(4) {
       // Bitfield of flag values. Must be changed atomically so that flag values aren't lost. See
       // ThreadFlags for bit field meanings.
@@ -650,6 +652,11 @@
       volatile uint16_t state;
     } as_struct;
     volatile int32_t as_int;
+
+   private:
+    // gcc does not handle struct with volatile member assignments correctly.
+    // See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=47409
+    DISALLOW_COPY_AND_ASSIGN(StateAndFlags);
   };
   union StateAndFlags state_and_flags_;
   COMPILE_ASSERT(sizeof(union StateAndFlags) == sizeof(int32_t),

diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index dd3f11c..aed8c77 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc

@@ -162,6 +162,35 @@
 }
 #endif
 
+// Unlike suspending all threads where we can wait to acquire the mutator_lock_, suspending an
+// individual thread requires polling. delay_us is the requested sleep and total_delay_us
+// accumulates the total time spent sleeping for timeouts. The first sleep is just a yield,
+// subsequently sleeps increase delay_us from 1ms to 500ms by doubling.
+static void ThreadSuspendSleep(Thread* self, useconds_t* delay_us, useconds_t* total_delay_us,
+                               bool holding_locks) {
+  if (!holding_locks) {
+    for (int i = kLockLevelCount - 1; i >= 0; --i) {
+      BaseMutex* held_mutex = self->GetHeldMutex(static_cast<LockLevel>(i));
+      if (held_mutex != NULL) {
+        LOG(FATAL) << "Holding " << held_mutex->GetName() << " while sleeping for thread suspension";
+      }
+    }
+  }
+  useconds_t new_delay_us = (*delay_us) * 2;
+  CHECK_GE(new_delay_us, *delay_us);
+  if (new_delay_us < 500000) {  // Don't allow sleeping to be more than 0.5s.
+    *delay_us = new_delay_us;
+  }
+  if (*delay_us == 0) {
+    sched_yield();
+    // Default to 1 milliseconds (note that this gets multiplied by 2 before the first sleep).
+    *delay_us = 500;
+  } else {
+    usleep(*delay_us);
+    *total_delay_us += *delay_us;
+  }
+}
+
 size_t ThreadList::RunCheckpoint(Closure* checkpoint_function) {
   Thread* self = Thread::Current();
   if (kIsDebugBuild) {
@@ -208,17 +237,15 @@
   for (const auto& thread : suspended_count_modified_threads) {
     if (!thread->IsSuspended()) {
       // Wait until the thread is suspended.
-      uint64_t start = NanoTime();
+      useconds_t total_delay_us = 0;
       do {
-        // Sleep for 100us.
-        usleep(100);
+        useconds_t delay_us = 100;
+        ThreadSuspendSleep(self, &delay_us, &total_delay_us, true);
       } while (!thread->IsSuspended());
-      uint64_t end = NanoTime();
-      // Shouldn't need to wait for longer than 1 millisecond.
-      const uint64_t threshold = 1;
-      if (NsToMs(end - start) > threshold) {
-        LOG(INFO) << "Warning: waited longer than " << threshold
-                  << " ms for thread suspend\n";
+      // Shouldn't need to wait for longer than 1000 microseconds.
+      constexpr useconds_t kLongWaitThresholdUS = 1000;
+      if (UNLIKELY(total_delay_us > kLongWaitThresholdUS)) {
+        LOG(WARNING) << "Waited " << total_delay_us << " us for thread suspend!";
       }
     }
     // We know for sure that the thread is suspended at this point.
@@ -354,34 +381,6 @@
   }
 }
 
-// Unlike suspending all threads where we can wait to acquire the mutator_lock_, suspending an
-// individual thread requires polling. delay_us is the requested sleep and total_delay_us
-// accumulates the total time spent sleeping for timeouts. The first sleep is just a yield,
-// subsequently sleeps increase delay_us from 1ms to 500ms by doubling.
-static void ThreadSuspendSleep(Thread* self, useconds_t* delay_us, useconds_t* total_delay_us) {
-  for (int i = kLockLevelCount - 1; i >= 0; --i) {
-    BaseMutex* held_mutex = self->GetHeldMutex(static_cast<LockLevel>(i));
-    if (held_mutex != NULL) {
-      LOG(FATAL) << "Holding " << held_mutex->GetName() << " while sleeping for thread suspension";
-    }
-  }
-  {
-    useconds_t new_delay_us = (*delay_us) * 2;
-    CHECK_GE(new_delay_us, *delay_us);
-    if (new_delay_us < 500000) {  // Don't allow sleeping to be more than 0.5s.
-      *delay_us = new_delay_us;
-    }
-  }
-  if ((*delay_us) == 0) {
-    sched_yield();
-    // Default to 1 milliseconds (note that this gets multiplied by 2 before the first sleep).
-    (*delay_us) = 500;
-  } else {
-    usleep(*delay_us);
-    (*total_delay_us) += (*delay_us);
-  }
-}
-
 Thread* ThreadList::SuspendThreadByPeer(jobject peer, bool request_suspension,
                                         bool debug_suspension, bool* timed_out) {
   static const useconds_t kTimeoutUs = 30 * 1000000;  // 30s.
@@ -432,7 +431,7 @@
       }
       // Release locks and come out of runnable state.
     }
-    ThreadSuspendSleep(self, &delay_us, &total_delay_us);
+    ThreadSuspendSleep(self, &delay_us, &total_delay_us, false);
   }
 }
 
@@ -445,13 +444,13 @@
   static const useconds_t kTimeoutUs = 30 * 1000000;  // 30s.
   useconds_t total_delay_us = 0;
   useconds_t delay_us = 0;
-  bool did_suspend_request = false;
   *timed_out = false;
+  Thread* suspended_thread = nullptr;
   Thread* self = Thread::Current();
   CHECK_NE(thread_id, kInvalidThreadId);
   while (true) {
-    Thread* thread = NULL;
     {
+      Thread* thread = NULL;
       ScopedObjectAccess soa(self);
       MutexLock mu(self, *Locks::thread_list_lock_);
       for (const auto& it : list_) {
@@ -460,17 +459,20 @@
           break;
         }
       }
-      if (thread == NULL) {
+      if (thread == nullptr) {
+        CHECK(suspended_thread == nullptr) << "Suspended thread " << suspended_thread
+            << " no longer in thread list";
         // There's a race in inflating a lock and the owner giving up ownership and then dying.
         ThreadSuspendByThreadIdWarning(WARNING, "No such thread id for suspend", thread_id);
         return NULL;
       }
       {
         MutexLock mu(self, *Locks::thread_suspend_count_lock_);
-        if (!did_suspend_request) {
+        if (suspended_thread == nullptr) {
           thread->ModifySuspendCount(self, +1, debug_suspension);
-          did_suspend_request = true;
+          suspended_thread = thread;
         } else {
+          CHECK_EQ(suspended_thread, thread);
           // If the caller isn't requesting suspension, a suspension should have already occurred.
           CHECK_GT(thread->GetSuspendCount(), 0);
         }
@@ -487,7 +489,7 @@
         }
         if (total_delay_us >= kTimeoutUs) {
           ThreadSuspendByThreadIdWarning(WARNING, "Thread suspension timed out", thread_id);
-          if (did_suspend_request) {
+          if (suspended_thread != nullptr) {
             thread->ModifySuspendCount(soa.Self(), -1, debug_suspension);
           }
           *timed_out = true;
@@ -496,7 +498,7 @@
       }
       // Release locks and come out of runnable state.
     }
-    ThreadSuspendSleep(self, &delay_us, &total_delay_us);
+    ThreadSuspendSleep(self, &delay_us, &total_delay_us, false);
   }
 }
 
@@ -719,9 +721,7 @@
   self->Destroy();
 
   uint32_t thin_lock_id = self->thin_lock_thread_id_;
-  self->thin_lock_thread_id_ = 0;
-  ReleaseThreadId(self, thin_lock_id);
-  while (self != NULL) {
+  while (self != nullptr) {
     // Remove and delete the Thread* while holding the thread_list_lock_ and
     // thread_suspend_count_lock_ so that the unregistering thread cannot be suspended.
     // Note: deliberately not using MutexLock that could hold a stale self pointer.
@@ -732,10 +732,14 @@
     if (!self->IsSuspended()) {
       list_.remove(self);
       delete self;
-      self = NULL;
+      self = nullptr;
     }
     Locks::thread_list_lock_->ExclusiveUnlock(self);
   }
+  // Release the thread ID after the thread is finished and deleted to avoid cases where we can
+  // temporarily have multiple threads with the same thread id. When this occurs, it causes
+  // problems in FindThreadByThreadId / SuspendThreadByThreadId.
+  ReleaseThreadId(nullptr, thin_lock_id);
 
   // Clear the TLS data, so that the underlying native thread is recognizably detached.
   // (It may wish to reattach later.)

diff --git a/test/ThreadStress/ThreadStress.java b/test/ThreadStress/ThreadStress.java
index 8d8135d..795c790 100644
--- a/test/ThreadStress/ThreadStress.java
+++ b/test/ThreadStress/ThreadStress.java

@@ -128,13 +128,13 @@
         Thread[] runners = new Thread[numberOfThreads];
         for (int r = 0; r < runners.length; r++) {
             final ThreadStress ts = threadStresses[r];
-            runners[r] = new Thread() {
+            runners[r] = new Thread("Runner thread " + r) {
                 final ThreadStress threadStress = ts;
                 public void run() {
                     int id = threadStress.id;
-                    System.out.println("Starting runner for " + id);
+                    System.out.println("Starting worker for " + id);
                     while (threadStress.nextOperation < operationsPerThread) {
-                        Thread thread = new Thread(ts);
+                        Thread thread = new Thread(ts, "Worker thread " + id);
                         thread.start();
                         try {
                             thread.join();
@@ -144,14 +144,14 @@
                                            + (operationsPerThread - threadStress.nextOperation)
                                            + " operations remaining.");
                     }
-                    System.out.println("Finishing runner for " + id);
+                    System.out.println("Finishing worker for " + id);
                 }
             };
         }
 
         // The notifier thread is a daemon just loops forever to wake
         // up threads in Operation.WAIT
-        Thread notifier = new Thread() {
+        Thread notifier = new Thread("Notifier") {
             public void run() {
                 while (true) {
                     synchronized (lock) {
commit	9cefcd7e7b1fdfcd38c35aad36e1c0a0aabfa32c	[log] [tgz]
author	Vladimir Marko <vmarko@google.com>	Fri Dec 06 10:07:40 2013 +0000
committer	Gerrit Code Review <noreply-gerritcodereview@google.com>	Fri Dec 06 10:07:42 2013 +0000
tree	fb11800de822ef97c1b9b0df9ac6246286600d3d
parent	29c788da9b75f998c5da8f320cd50351b83a3dff [diff]
parent	06606b9c4a1c00154ed15f719ad8ea994e54ee8e [diff]