ART can't create more than 8k threads during the worktime

ART uses LDT to point to the Java Thread structure. This structure
can holds up to 8k pointers, so we can have 8k threads simultaneously.
But the current implementation does not free slots for the finished threads.
This means there cannot be more than 8k threads created during the whole
life cycle.
This patch implements the LDT slots freeing mechanism.

Change-Id: Ifcf8fe1f4434a13f940146fff39b9c7bf91ee17b
Signed-off-by: Alexei Zavjalov <alexei.zavjalov@intel.com>
diff --git a/runtime/arch/arm/thread_arm.cc b/runtime/arch/arm/thread_arm.cc
index 8c1efeb..df4a04a 100644
--- a/runtime/arch/arm/thread_arm.cc
+++ b/runtime/arch/arm/thread_arm.cc
@@ -28,4 +28,8 @@
   CHECK_EQ(THREAD_ID_OFFSET, OFFSETOF_MEMBER(Thread, thin_lock_thread_id_));
 }
 
+void Thread::CleanupCpu() {
+  // Do nothing.
+}
+
 }  // namespace art
diff --git a/runtime/arch/mips/thread_mips.cc b/runtime/arch/mips/thread_mips.cc
index bd54549..f5d211f 100644
--- a/runtime/arch/mips/thread_mips.cc
+++ b/runtime/arch/mips/thread_mips.cc
@@ -27,4 +27,8 @@
   CHECK_EQ(THREAD_EXCEPTION_OFFSET, OFFSETOF_MEMBER(Thread, exception_));
 }
 
+void Thread::CleanupCpu() {
+  // Do nothing.
+}
+
 }  // namespace art
diff --git a/runtime/arch/x86/thread_x86.cc b/runtime/arch/x86/thread_x86.cc
index 39bad58..235da99 100644
--- a/runtime/arch/x86/thread_x86.cc
+++ b/runtime/arch/x86/thread_x86.cc
@@ -40,8 +40,9 @@
 
 namespace art {
 
+static Mutex modify_ldt_lock("modify_ldt lock");
+
 void Thread::InitCpu() {
-  static Mutex modify_ldt_lock("modify_ldt lock");
   MutexLock mu(Thread::Current(), modify_ldt_lock);
 
   const uintptr_t base = reinterpret_cast<uintptr_t>(this);
@@ -113,7 +114,6 @@
   uint16_t table_indicator = 1 << 2;  // LDT
   uint16_t rpl = 3;  // Requested privilege level
   uint16_t selector = (entry_number << 3) | table_indicator | rpl;
-  // TODO: use our assembler to generate code
   __asm__ __volatile__("movw %w0, %%fs"
       :    // output
       : "q"(selector)  // input
@@ -124,7 +124,6 @@
 
   // Sanity check that reads from %fs point to this Thread*.
   Thread* self_check;
-  // TODO: use our assembler to generate code
   CHECK_EQ(THREAD_SELF_OFFSET, OFFSETOF_MEMBER(Thread, self_));
   __asm__ __volatile__("movl %%fs:(%1), %0"
       : "=r"(self_check)  // output
@@ -138,4 +137,36 @@
   CHECK_EQ(THREAD_ID_OFFSET, OFFSETOF_MEMBER(Thread, thin_lock_thread_id_));
 }
 
+void Thread::CleanupCpu() {
+  MutexLock mu(Thread::Current(), modify_ldt_lock);
+
+  // Sanity check that reads from %fs point to this Thread*.
+  Thread* self_check;
+  __asm__ __volatile__("movl %%fs:(%1), %0"
+      : "=r"(self_check)  // output
+      : "r"(THREAD_SELF_OFFSET)  // input
+      :);  // clobber
+  CHECK_EQ(self_check, this);
+
+  // Extract the LDT entry number from the FS register.
+  uint16_t selector;
+  __asm__ __volatile__("movw %%fs, %w0"
+      : "=q"(selector)  // output
+      :  // input
+      :);  // clobber
+
+  // Free LDT entry.
+#if defined(__APPLE__)
+  i386_set_ldt(selector >> 3, 0, 1);
+#else
+  user_desc ldt_entry;
+  memset(&ldt_entry, 0, sizeof(ldt_entry));
+  ldt_entry.entry_number = selector >> 3;
+  ldt_entry.contents = MODIFY_LDT_CONTENTS_DATA;
+  ldt_entry.seg_not_present = 1;
+
+  syscall(__NR_modify_ldt, 1, &ldt_entry, sizeof(ldt_entry));
+#endif
+}
+
 }  // namespace art
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 68f3c04..c3ef228 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1046,9 +1046,11 @@
   }
   opeer_ = nullptr;
 
-  delete jni_env_;
-  jni_env_ = nullptr;
-
+  bool initialized = (jni_env_ != nullptr);  // Did Thread::Init run?
+  if (initialized) {
+    delete jni_env_;
+    jni_env_ = nullptr;
+  }
   CHECK_NE(GetState(), kRunnable);
   CHECK_NE(ReadFlag(kCheckpointRequest), true);
   CHECK(checkpoint_functions_[0] == nullptr);
@@ -1065,6 +1067,10 @@
     delete long_jump_context_;
   }
 
+  if (initialized) {
+    CleanupCpu();
+  }
+
   delete debug_invoke_req_;
   delete single_step_control_;
   delete instrumentation_stack_;
diff --git a/runtime/thread.h b/runtime/thread.h
index 0810909..4a37ca0 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -619,6 +619,7 @@
   void Init(ThreadList*, JavaVMExt*) EXCLUSIVE_LOCKS_REQUIRED(Locks::runtime_shutdown_lock_);
   void InitCardTable();
   void InitCpu();
+  void CleanupCpu();
   void InitTlsEntryPoints();
   void InitTid();
   void InitPthreadKeySelf();