Merge change 5967 into donut * changes: Modifies OpenSSLSocketImpl to use a different lock for the instance count. It was using the same lock when use around native methods meaning that the finalizer could be blocked unnecessarily resulting in a VM crash.

commit: ab4b7f704313075838517ffec400dc5b6250c67c [log] [tgz]
author: Android (Google) Code Review <android-gerrit@google.com> Thu Jul 02 10:17:51 2009 -0700
committer: Android (Google) Code Review <android-gerrit@google.com> Thu Jul 02 10:17:51 2009 -0700
tree: f9e5c188d7eebda5f224df1bd5638040e7169cc7
parent: a8e9edde867b945b20133d31b4b7c21eaba6dfd4 [diff]
parent: f681a0fc7db58aeead2d6970a717d8a466c1b516 [diff]
diff --git a/libcore/luni-kernel/src/main/native/java_lang_ProcessManager.c b/libcore/luni-kernel/src/main/native/java_lang_ProcessManager.c
index ee2fc58..fcff1df 100644
--- a/libcore/luni-kernel/src/main/native/java_lang_ProcessManager.c
+++ b/libcore/luni-kernel/src/main/native/java_lang_ProcessManager.c

@@ -16,6 +16,7 @@
 
 #define LOG_TAG "ProcessManager"
 
+#include <sys/resource.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <unistd.h>
@@ -24,7 +25,6 @@
 #include <stdlib.h>
 #include <string.h>
 #include <errno.h>
-#include <dirent.h>
 
 #include "jni.h"
 #include "JNIHelp.h"
@@ -155,31 +155,21 @@
     }
 }
 
-/** Close all open fds > 2 (i.e. everything but stdin/out/err). */
+/** Close all open fds > 2 (i.e. everything but stdin/out/err), != skipFd. */
 static void closeNonStandardFds(int skipFd) {
-    DIR* dir = opendir("/proc/self/fd");
+    struct rlimit rlimit;
+    getrlimit(RLIMIT_NOFILE, &rlimit);
 
-    if (dir == NULL) {
-        // Print message to standard err. The parent process can read this
-        // from Process.getErrorStream().
-        perror("opendir");
-        return;
-    }
-
-    struct dirent* entry;
-    int dirFd = dirfd(dir);
-    while ((entry = readdir(dir)) != NULL) {
-        int fd = atoi(entry->d_name);
-        if (fd > 2 && fd != dirFd && fd != skipFd
+    int fd;
+    for (fd = 3; fd < rlimit.rlim_max; fd++) {
+        if (fd != skipFd
 #ifdef ANDROID
                 && fd != androidSystemPropertiesFd
 #endif
                 ) {
             close(fd);
-        }        
+        }
     }
-
-    closedir(dir);
 }
 
 #define PIPE_COUNT (4) // number of pipes used to communicate with child proc
@@ -233,6 +223,12 @@
 
     // If this is the child process...
     if (childPid == 0) {
+        /*
+         * Note: We cannot malloc() or free() after this point!
+         * A no-longer-running thread may be holding on to the heap lock, and
+         * an attempt to malloc() or free() would result in deadlock.
+         */
+
         // Replace stdin, out, and err with pipes.
         dup2(stdinIn, 0);
         dup2(stdoutOut, 1);

diff --git a/vm/Debugger.c b/vm/Debugger.c
index c667893..3affa97 100644
--- a/vm/Debugger.c
+++ b/vm/Debugger.c

@@ -2697,10 +2697,10 @@
     dvmUnlockThreadList();
 
     /*
-     * We change our thread status (which should be THREAD_RUNNING) so the
-     * VM can suspend for a GC if the invoke request causes us to run out
-     * of memory.  It's also a good idea to change it before locking the
-     * invokeReq mutex, although that should never be held for long.
+     * We change our (JDWP thread) status, which should be THREAD_RUNNING,
+     * so the VM can suspend for a GC if the invoke request causes us to
+     * run out of memory.  It's also a good idea to change it before locking
+     * the invokeReq mutex, although that should never be held for long.
      */
     Thread* self = dvmThreadSelf();
     int oldStatus = dvmChangeStatus(self, THREAD_VMWAIT);
@@ -2774,18 +2774,25 @@
 
 /*
  * Execute the method described by "*pReq".
+ *
+ * We're currently in VMWAIT, because we're stopped on a breakpoint.  We
+ * want to switch to RUNNING while we execute.
  */
 void dvmDbgExecuteMethod(DebugInvokeReq* pReq)
 {
     Thread* self = dvmThreadSelf();
     const Method* meth;
     Object* oldExcept;
+    int oldStatus;
 
     /*
      * We can be called while an exception is pending in the VM.  We need
      * to preserve that across the method invocation.
      */
     oldExcept = dvmGetException(self);
+    dvmClearException(self);
+
+    oldStatus = dvmChangeStatus(self, THREAD_RUNNING);
 
     /*
      * Translate the method through the vtable, unless we're calling a
@@ -2832,6 +2839,7 @@
 
     if (oldExcept != NULL)
         dvmSetException(self, oldExcept);
+    dvmChangeStatus(self, oldStatus);
 }
 
 // for dvmAddressSetForLine

diff --git a/vm/Globals.h b/vm/Globals.h
index 79b9e91..a2de271 100644
--- a/vm/Globals.h
+++ b/vm/Globals.h

@@ -321,7 +321,7 @@
 
     /*
      * The thread code grabs this before suspending all threads.  There
-     * are four things that can cause a "suspend all":
+     * are a few things that can cause a "suspend all":
      *  (1) the GC is starting;
      *  (2) the debugger has sent a "suspend all" request;
      *  (3) a thread has hit a breakpoint or exception that the debugger
@@ -332,9 +332,6 @@
      * do a blocking "lock" call on this mutex -- if it has been acquired,
      * somebody is probably trying to put you to sleep.  The leading '_' is
      * intended as a reminder that this lock is special.
-     *
-     * This lock is also held while attaching an externally-created thread
-     * through JNI.  That way we can correctly set the initial suspend state.
      */
     pthread_mutex_t _threadSuspendLock;
 

diff --git a/vm/Native.c b/vm/Native.c
index 7a153d6..dcf68b6 100644
--- a/vm/Native.c
+++ b/vm/Native.c

@@ -423,9 +423,21 @@
      *   - check to see if the library is valid
      *   - check config/prelink-linux-arm.map to ensure that the library
      *     is listed and is not being overrun by the previous entry (if
-     *     loading suddenly stops working, this is a good one to check)
+     *     loading suddenly stops working on a prelinked library, this is
+     *     a good one to check)
+     *   - write a trivial app that calls sleep() then dlopen(), attach
+     *     to it with "strace -p <pid>" while it sleeps, and watch for
+     *     attempts to open nonexistent dependent shared libs
+     *
+     * This can execute slowly for a large library on a busy system, so we
+     * want to switch from RUNNING to VMWAIT while it executes.  This allows
+     * the GC to ignore us.
      */
+    Thread* self = dvmThreadSelf();
+    int oldStatus = dvmChangeStatus(self, THREAD_VMWAIT);
     handle = dlopen(pathName, RTLD_LAZY);
+    dvmChangeStatus(self, oldStatus);
+
     if (handle == NULL) {
         LOGI("Unable to dlopen(%s): %s\n", pathName, dlerror());
         return false;
@@ -461,9 +473,9 @@
             Object* prevOverride = self->classLoaderOverride;
 
             self->classLoaderOverride = classLoader;
-            dvmChangeStatus(NULL, THREAD_NATIVE);
+            oldStatus = dvmChangeStatus(self, THREAD_NATIVE);
             version = (*func)(gDvm.vmList, NULL);
-            dvmChangeStatus(NULL, THREAD_RUNNING);
+            dvmChangeStatus(self, oldStatus);
             self->classLoaderOverride = prevOverride;
 
             if (version != JNI_VERSION_1_2 && version != JNI_VERSION_1_4 &&

diff --git a/vm/Thread.c b/vm/Thread.c
index 0adde38..ee195eb 100644
--- a/vm/Thread.c
+++ b/vm/Thread.c

@@ -481,6 +481,20 @@
     assert(cc == 0);
 }
 
+/*
+ * Convert SuspendCause to a string.
+ */
+static const char* getSuspendCauseStr(SuspendCause why)
+{
+    switch (why) {
+    case SUSPEND_NOT:               return "NOT?";
+    case SUSPEND_FOR_GC:            return "gc";
+    case SUSPEND_FOR_DEBUG:         return "debug";
+    case SUSPEND_FOR_DEBUG_EVENT:   return "debug-event";
+    case SUSPEND_FOR_STACK_DUMP:    return "stack-dump";
+    default:                        return "UNKNOWN";
+    }
+}
 
 /*
  * Grab the "thread suspend" lock.  This is required to prevent the
@@ -492,7 +506,6 @@
  */
 static void lockThreadSuspend(const char* who, SuspendCause why)
 {
-    const int kMaxRetries = 10;
     const int kSpinSleepTime = 3*1000*1000;        /* 3s */
     u8 startWhen = 0;       // init req'd to placate gcc
     int sleepIter = 0;
@@ -503,23 +516,37 @@
         if (cc != 0) {
             if (!dvmCheckSuspendPending(NULL)) {
                 /*
-                 * Could be unusual JNI-attach thing, could be we hit
-                 * the window as the suspend or resume was started.  Could
-                 * also be the debugger telling us to resume at roughly
+                 * Could be that a resume-all is in progress, and something
+                 * grabbed the CPU when the wakeup was broadcast.  The thread
+                 * performing the resume hasn't had a chance to release the
+                 * thread suspend lock.  (Should no longer be an issue --
+                 * we now release before broadcast.)
+                 *
+                 * Could be we hit the window as a suspend was started,
+                 * and the lock has been grabbed but the suspend counts
+                 * haven't been incremented yet.
+                 *
+                 * Could be an unusual JNI thread-attach thing.
+                 *
+                 * Could be the debugger telling us to resume at roughly
                  * the same time we're posting an event.
                  */
-                LOGI("threadid=%d ODD: thread-suspend lock held (%s:%d)"
-                     " but suspend not pending\n",
-                    dvmThreadSelf()->threadId, who, why);
+                LOGI("threadid=%d ODD: want thread-suspend lock (%s:%s),"
+                     " it's held, no suspend pending\n",
+                    dvmThreadSelf()->threadId, who, getSuspendCauseStr(why));
+            } else {
+                /* we suspended; reset timeout */
+                sleepIter = 0;
             }
 
             /* give the lock-holder a chance to do some work */
             if (sleepIter == 0)
                 startWhen = dvmGetRelativeTimeUsec();
             if (!dvmIterativeSleep(sleepIter++, kSpinSleepTime, startWhen)) {
-                LOGE("threadid=%d: couldn't get thread-suspend lock (%s:%d),"
+                LOGE("threadid=%d: couldn't get thread-suspend lock (%s:%s),"
                      " bailing\n",
-                    dvmThreadSelf()->threadId, who, why);
+                    dvmThreadSelf()->threadId, who, getSuspendCauseStr(why));
+                /* threads are not suspended, thread dump could crash */
                 dvmDumpAllThreads(false);
                 dvmAbort();
             }
@@ -1224,9 +1251,13 @@
      */
     dvmUnlockThreadList();
 
-    if (pthread_create(&threadHandle, &threadAttr, interpThreadStart,
-            newThread) != 0)
-    {
+    int cc, oldStatus;
+    oldStatus = dvmChangeStatus(self, THREAD_VMWAIT);
+    cc = pthread_create(&threadHandle, &threadAttr, interpThreadStart,
+            newThread);
+    oldStatus = dvmChangeStatus(self, oldStatus);
+
+    if (cc != 0) {
         /*
          * Failure generally indicates that we have exceeded system
          * resource limits.  VirtualMachineError is probably too severe,
@@ -1271,7 +1302,8 @@
      *
      * The easiest way to deal with this is to prevent the new thread from
      * running until the parent says it's okay.  This results in the
-     * following sequence of events for a "badly timed" GC:
+     * following (correct) sequence of events for a "badly timed" GC
+     * (where '-' is us, 'o' is the child, and '+' is some other thread):
      *
      *  - call pthread_create()
      *  - lock thread list
@@ -2202,8 +2234,15 @@
                 &gDvm.threadSuspendCountLock);
         assert(cc == 0);
         if (self->suspendCount != 0) {
-            LOGD("threadid=%d: still suspended after undo (s=%d d=%d)\n",
-                self->threadId, self->suspendCount, self->dbgSuspendCount);
+            /*
+             * The condition was signaled but we're still suspended.  This
+             * can happen if the debugger lets go while a SIGQUIT thread
+             * dump event is pending (assuming SignalCatcher was resumed for
+             * just long enough to try to grab the thread-suspend lock).
+             */
+            LOGD("threadid=%d: still suspended after undo (sc=%d dc=%d s=%c)\n",
+                self->threadId, self->suspendCount, self->dbgSuspendCount,
+                self->isSuspended ? 'Y' : 'N');
         }
     }
     assert(self->suspendCount == 0 && self->dbgSuspendCount == 0);
@@ -2308,6 +2347,7 @@
 {
     const int kMaxRetries = 10;
     const int kSpinSleepTime = 750*1000;        /* 0.75s */
+    bool complained = false;
 
     int sleepIter = 0;
     int retryCount = 0;
@@ -2322,6 +2362,7 @@
                 self->threadId, (int)self->handle,
                 thread->threadId, (int)thread->handle);
             dumpWedgedThread(thread);
+            complained = true;
 
             // keep going; could be slow due to valgrind
             sleepIter = 0;
@@ -2334,6 +2375,11 @@
             }
         }
     }
+
+    if (complained) {
+        LOGW("threadid=%d: spin on suspend resolved\n", self->threadId);
+        //dvmDumpThread(thread, false);   /* suspended, so dump is safe */
+    }
 }
 
 /*
@@ -2470,7 +2516,9 @@
         /* debugger events don't suspend JDWP thread */
         if ((why == SUSPEND_FOR_DEBUG || why == SUSPEND_FOR_DEBUG_EVENT) &&
             thread->handle == dvmJdwpGetDebugThread(gDvm.jdwpState))
+        {
             continue;
+        }
 
         if (thread->suspendCount > 0) {
             thread->suspendCount--;
@@ -2485,6 +2533,43 @@
     dvmUnlockThreadList();
 
     /*
+     * In some ways it makes sense to continue to hold the thread-suspend
+     * lock while we issue the wakeup broadcast.  It allows us to complete
+     * one operation before moving on to the next, which simplifies the
+     * thread activity debug traces.
+     *
+     * This approach caused us some difficulty under Linux, because the
+     * condition variable broadcast not only made the threads runnable,
+     * but actually caused them to execute, and it was a while before
+     * the thread performing the wakeup had an opportunity to release the
+     * thread-suspend lock.
+     *
+     * This is a problem because, when a thread tries to acquire that
+     * lock, it times out after 3 seconds.  If at some point the thread
+     * is told to suspend, the clock resets; but since the VM is still
+     * theoretically mid-resume, there's no suspend pending.  If, for
+     * example, the GC was waking threads up while the SIGQUIT handler
+     * was trying to acquire the lock, we would occasionally time out on
+     * a busy system and SignalCatcher would abort.
+     *
+     * We now perform the unlock before the wakeup broadcast.  The next
+     * suspend can't actually start until the broadcast completes and
+     * returns, because we're holding the thread-suspend-count lock, but the
+     * suspending thread is now able to make progress and we avoid the abort.
+     *
+     * (Technically there is a narrow window between when we release
+     * the thread-suspend lock and grab the thread-suspend-count lock.
+     * This could cause us to send a broadcast to threads with nonzero
+     * suspend counts, but this is expected and they'll all just fall
+     * right back to sleep.  It's probably safe to grab the suspend-count
+     * lock before releasing thread-suspend, since we're still following
+     * the correct order of acquisition, but it feels weird.)
+     */
+
+    LOG_THREAD("threadid=%d: ResumeAll waking others\n", self->threadId);
+    unlockThreadSuspend();
+
+    /*
      * Broadcast a notification to all suspended threads, some or all of
      * which may choose to wake up.  No need to wait for them.
      */
@@ -2493,8 +2578,6 @@
     assert(cc == 0);
     unlockThreadSuspendCount();
 
-    unlockThreadSuspend();
-
     LOG_THREAD("threadid=%d: ResumeAll complete\n", self->threadId);
 }
 
@@ -2962,9 +3045,9 @@
         threadName, isDaemon ? " daemon" : "",
         priority, thread->threadId, kStatusNames[thread->status]);
     dvmPrintDebugMessage(target,
-        "  | group=\"%s\" sCount=%d dsCount=%d s=%d obj=%p\n",
+        "  | group=\"%s\" sCount=%d dsCount=%d s=%c obj=%p self=%p\n",
         groupName, thread->suspendCount, thread->dbgSuspendCount,
-        thread->isSuspended, thread->threadObj);
+        thread->isSuspended ? 'Y' : 'N', thread->threadObj, thread);
     dvmPrintDebugMessage(target,
         "  | sysTid=%d nice=%d sched=%d/%d handle=%d\n",
         thread->systemTid, getpriority(PRIO_PROCESS, thread->systemTid),

diff --git a/vm/interp/Stack.c b/vm/interp/Stack.c
index 103d2b4..d5c5fe9 100644
--- a/vm/interp/Stack.c
+++ b/vm/interp/Stack.c

@@ -351,7 +351,8 @@
 
 #ifndef NDEBUG
     if (self->status != THREAD_RUNNING) {
-        LOGW("Status=%d on call to %s.%s -\n", self->status,
+        LOGW("threadid=%d: status=%d on call to %s.%s -\n",
+            self->threadId, self->status,
             method->clazz->descriptor, method->name);
     }
 #endif
commit	ab4b7f704313075838517ffec400dc5b6250c67c	[log] [tgz]
author	Android (Google) Code Review <android-gerrit@google.com>	Thu Jul 02 10:17:51 2009 -0700
committer	Android (Google) Code Review <android-gerrit@google.com>	Thu Jul 02 10:17:51 2009 -0700
tree	f9e5c188d7eebda5f224df1bd5638040e7169cc7
parent	a8e9edde867b945b20133d31b4b7c21eaba6dfd4 [diff]
parent	f681a0fc7db58aeead2d6970a717d8a466c1b516 [diff]