Modified Thread.c to allow sampling profiler thread in zygote. Added a function that
queries the native thread status. Modified the profiler to sample every thread at
a fixed interval instead of adjusting the delay based on the number of threads. This
will make it easier to compare results across apps. Added ability to shut down
the profiler thread. Added code to track whether the method was a leaf or a
caller.
diff --git a/libcore/dalvik/src/main/java/dalvik/system/SamplingProfiler.java b/libcore/dalvik/src/main/java/dalvik/system/SamplingProfiler.java
index 694df38..65e56f2 100644
--- a/libcore/dalvik/src/main/java/dalvik/system/SamplingProfiler.java
+++ b/libcore/dalvik/src/main/java/dalvik/system/SamplingProfiler.java
@@ -15,62 +15,78 @@
     private static final Logger logger = Logger.getLogger(
             SamplingProfiler.class.getName());
 
+    static final boolean DEBUG = false;
+
+    enum State {
+        /** The sampling thread hasn't started or is waiting to resume. */
+        PAUSED,
+        /** The sampling thread is collecting samples. */
+        RUNNING,
+        /** The sampling thread is shutting down. */
+        SHUTTING_DOWN
+    }
+
     /** Pointer to native state. */
     int pointer = 0;
 
     /** The thread that collects samples. */
     Thread samplingThread;
 
-    /** Whether or not the profiler is running. */
-    boolean running = false;
-    int delayPerThread; // ms
+    /** Time between samples. */
+    volatile int delay; // ms
 
-    /** Number of samples taken. */
-    int sampleCount = 0;
+    /** Number of samples taken (~samples per second * number of threads). */
+    int totalThreadsSampled = 0;
 
     /** Total time spent collecting samples. */
     long totalSampleTime = 0;
 
+    /** The state of the profiler. */
+    volatile State state = State.PAUSED;
+
     private SamplingProfiler() {}
 
     /**
      * Returns true if the profiler is running.
      */
     public boolean isRunning() {
-        return running;
+        return state == State.RUNNING;
     }
 
     /**
      * Starts collecting samples.
      *
-     * @param threadsPerSecond number of threads to sample per second
+     * @param samplesPerSecond number of times to sample each thread per second
+     * @throws IllegalStateException if the profiler is
+     *  {@linkplain #shutDown()} shutting down}
      */
-    public synchronized void start(int threadsPerSecond) {
-        if (threadsPerSecond < 1) {
-            throw new IllegalArgumentException("threadsPerSecond < 1");
+    public synchronized void start(int samplesPerSecond) {
+        if (samplesPerSecond < 1) {
+            throw new IllegalArgumentException("samplesPerSecond < 1");
         }
-        if (!running) {
-            logger.info("Starting profiler.");
-            running = true;
+        ensureNotShuttingDown();
+        delay = 1000 / samplesPerSecond;
+        if (!isRunning()) {
+            if (DEBUG) logger.info("Starting profiler...");
+            state = State.RUNNING;
             if (samplingThread == null) {
                 // TODO: Priority?
-                samplingThread = new Thread(new Sampler());
+                samplingThread = new Thread(new Sampler(), "SamplingProfiler");
                 samplingThread.setDaemon(true);
                 samplingThread.start();
             } else {
                 notifyAll();
             }
         }
-        delayPerThread = 1000 / threadsPerSecond;
     }
 
     /**
-     * Stops sample collection.
+     * Pauses sample collection.
      */
-    public synchronized void stop() {
-        if (running) {
-            logger.info("Stopping profiler.");
-            running = false;
+    public synchronized void pause() {
+        if (isRunning()) {
+            if (DEBUG) logger.info("Pausing profiler...");
+            state = State.PAUSED;
         }
     }
 
@@ -80,49 +96,116 @@
      *
      * <p>Note: The exact format is not documented because it's not set in
      * stone yet.
+     *
+     * @throws IllegalStateException if the profiler is
+     *  {@linkplain #shutDown()} shutting down}
      */
     public synchronized byte[] snapshot() {
-        if (pointer == 0 || sampleCount == 0) {
+        ensureNotShuttingDown();
+        if (pointer == 0 || totalThreadsSampled == 0) {
             return null;
         }
 
-        int size = size(pointer);
-        int collisions = collisions(pointer);
+        if (DEBUG) {
+            int size = size(pointer);
+            int collisions = collisions(pointer);
 
-        long start = System.nanoTime();
-        byte[] bytes = snapshot(pointer);
-        long elapsed = System.nanoTime() - start;
+            long start = System.nanoTime();
+            byte[] bytes = snapshot(pointer);
+            long elapsed = System.nanoTime() - start;
 
-        logger.info("Grabbed snapshot in " + (elapsed / 1000) + "us."
-                + " Samples collected: " + sampleCount
-                + ", Average sample time (per thread): "
-                    + (((totalSampleTime / sampleCount) << 10) / 1000) + "us"
-                + ", Set size: " + size
-                + ", Collisions: " + collisions);
-        sampleCount = 0;
-        totalSampleTime = 0;
+            long averageSampleTime = ((totalSampleTime / totalThreadsSampled)
+                    << 10) / 1000;
+            logger.info("Grabbed snapshot in " + (elapsed / 1000) + "us."
+                    + " Samples collected: " + totalThreadsSampled
+                    + ", Average sample time (per thread): "
+                            + averageSampleTime + "us"
+                    + ", Set size: " + size
+                    + ", Collisions: " + collisions);
+            totalThreadsSampled = 0;
+            totalSampleTime = 0;
 
-        return bytes;
+            return bytes;
+        } else {
+            totalThreadsSampled = 0;
+            return snapshot(pointer);
+        }
     }
 
     /**
      * Identifies the "event thread". For a user-facing application, this
      * might be the UI thread. For a background process, this might be the
      * thread that processes incoming requests.
+     *
+     * @throws IllegalStateException if the profiler is
+     *  {@linkplain #shutDown()} shutting down}
      */
     public synchronized void setEventThread(Thread eventThread) {
+        ensureNotShuttingDown();
         if (pointer == 0) {
             pointer = allocate();
         }
         setEventThread(pointer, eventThread);
     }
 
+    private void ensureNotShuttingDown() {
+        if (state == State.SHUTTING_DOWN) {
+            throw new IllegalStateException("Profiler is shutting down.");
+        }
+    }
+
+    /**
+     * Shuts down the profiler thread and frees native memory. The profiler
+     * will recreate the thread the next time {@link #start(int)} is called.
+     *
+     * @throws IllegalStateException if the profiler is already shutting down
+     */
+    public void shutDown() {
+        Thread toStop;
+        synchronized (this) {
+            ensureNotShuttingDown();
+
+            toStop = samplingThread;
+            if (toStop == null) {
+                // The profiler hasn't started yet.
+                return;
+            }
+
+            state = State.SHUTTING_DOWN;
+            samplingThread = null;
+            notifyAll();
+        }
+
+        // Release lock to 'this' so background thread can grab it and stop.
+        boolean successful = false;
+        while (!successful) {
+            try {
+                toStop.join();
+                successful = true;
+            } catch (InterruptedException e) { /* ignore */ }
+        }
+
+        synchronized (this) {
+            if (pointer != 0) {
+                free(pointer);
+                pointer = 0;
+            }
+
+            totalThreadsSampled = 0;
+            totalSampleTime = 0;
+            state = State.PAUSED;
+        }
+    }
+
     /** Collects some data. Returns number of threads sampled. */
     private static native int sample(int pointer);
 
     /** Allocates native state. */
     private static native int allocate();
 
+    /** Frees native state. */
+    private static native void free(int pointer);
+
     /** Gets the number of methods in the sample set. */
     private static native int size(int pointer);
 
@@ -142,11 +225,15 @@
         public void run() {
             boolean firstSample = true;
             while (true) {
-                int threadsSampled;
                 synchronized (SamplingProfiler.this) {
-                    if (!running) {
-                        logger.info("Stopped profiler.");
-                        while (!running) {
+                    if (!isRunning()) {
+                        if (DEBUG) logger.info("Paused profiler.");
+                        while (!isRunning()) {
+                            if (state == State.SHUTTING_DOWN) {
+                                // Stop thread.
+                                return;
+                            }
+
                             try {
                                 SamplingProfiler.this.wait();
                             } catch (InterruptedException e) { /* ignore */ }
@@ -159,20 +246,24 @@
                     }
 
                     if (firstSample) {
-                        logger.info("Started profiler.");
+                        if (DEBUG) logger.info("Started profiler.");
                         firstSample = false;
                     }
 
-                    long start = System.nanoTime();
-                    threadsSampled = sample(pointer);
-                    long elapsed = System.nanoTime() - start;
+                    if (DEBUG) {
+                        long start = System.nanoTime();
+                        int threadsSampled = sample(pointer);
+                        long elapsed = System.nanoTime() - start;
 
-                    sampleCount += threadsSampled;
-                    totalSampleTime += elapsed >> 10; // shift avoids overflow.
+                        totalThreadsSampled += threadsSampled;
+                        totalSampleTime += elapsed >> 10; // avoids overflow.
+                    } else {
+                        totalThreadsSampled += sample(pointer);
+                    }
                 }
 
                 try {
-                    Thread.sleep(delayPerThread * threadsSampled);
+                    Thread.sleep(delay);
                 } catch (InterruptedException e) { /* ignore */ }
             }
         }
@@ -211,9 +302,12 @@
 
     private static void appendCounts(DataInputStream in, StringBuilder sb)
             throws IOException {
-        sb.append("      running: ").append(in.readShort()).append('\n');
-        sb.append("      native: ").append(in.readShort()).append('\n');
-        sb.append("      suspended: ").append(in.readShort()).append('\n');
+        sb.append("      running:\n");
+        sb.append("        caller: ").append(in.readShort()).append('\n');
+        sb.append("        leaf: ").append(in.readShort()).append('\n');
+        sb.append("      suspended:\n");
+        sb.append("        caller: ").append(in.readShort()).append('\n');
+        sb.append("        leaf: ").append(in.readShort()).append('\n');
     }
 
     /** This will be allocated when the user calls getInstance(). */
diff --git a/vm/Thread.c b/vm/Thread.c
index 6a1e9aa..4cce9f8 100644
--- a/vm/Thread.c
+++ b/vm/Thread.c
@@ -1295,10 +1295,18 @@
     assert(threadObj != NULL);
 
     if(gDvm.zygote) {
-        dvmThrowException("Ljava/lang/IllegalStateException;",
-            "No new threads in -Xzygote mode");
+        // Allow the sampling profiler thread. We shut it down before forking.
+        StringObject* nameStr = (StringObject*) dvmGetFieldObject(threadObj,
+                    gDvm.offJavaLangThread_name);
+        char* threadName = dvmCreateCstrFromString(nameStr);
+        bool profilerThread = strcmp(threadName, "SamplingProfiler") == 0;
+        free(threadName);
+        if (!profilerThread) {
+            dvmThrowException("Ljava/lang/IllegalStateException;",
+                "No new threads in -Xzygote mode");
 
-        goto fail;
+            goto fail;
+        }
     }
 
     self = dvmThreadSelf();
@@ -2232,6 +2240,11 @@
     dvmUnlockThreadList();
 
     setThreadSelf(NULL);
+
+    if (self->statFile > 0) {
+        close(self->statFile);
+    }
+
     freeThread(self);
 }
 
@@ -3850,3 +3863,115 @@
      */
     gcScanAllThreads();
 }
+
+/* Converts a Linux thread state to a ThreadStatus. */
+static ThreadStatus stateToStatus(char state) {
+    switch (state) {
+        case 'R': return THREAD_RUNNING; // running
+        case 'S': return THREAD_WAIT;    // sleeping in interruptible wait
+        case 'D': return THREAD_WAIT;    // uninterruptible disk sleep
+        case 'Z': return THREAD_ZOMBIE;  // zombie
+        case 'T': return THREAD_WAIT;    // traced or stopped on a signal
+        case 'W': return THREAD_PAGING;  // paging memory
+        default:
+            LOGE("Unexpected state: %c", state);
+            return THREAD_NATIVE;
+    }
+}
+
+/* Reads the state char starting from the beginning of the file. */
+static char readStateFromBeginning(Thread* thread) {
+    char buffer[256];
+    int size = read(thread->statFile, buffer, sizeof(buffer));
+    if (size == 0) {
+        LOGE("EOF trying to read stat file.");
+        return 0;
+    }
+    char* endOfName = (char*) memchr(buffer, ')', sizeof(buffer));
+    if (endOfName == NULL) {
+        LOGE("End of executable name not found.");
+        return 0;
+    }
+    char* state = endOfName + 2;
+    thread->stateOffset = state - buffer;
+    if (*state == 0) {
+        LOGE("State char is 0.");
+    }
+    return *state;
+}
+
+/*
+ * Looks for the state char at the last place we found it. Read from the
+ * beginning if necessary.
+ */
+static char readStateRelatively(Thread* thread) {
+    char buffer[3];
+    // Position file offset at end of executable name.
+    int result = lseek(thread->statFile, thread->stateOffset - 2, SEEK_SET);
+    if (result < 0) {
+        LOGE("lseek() error.");
+        return 0;
+    }
+    int size = read(thread->statFile, buffer, sizeof(buffer));
+    if (size < (int) sizeof(buffer)) {
+        LOGE("EOF trying to read stat file.");
+        return 0;
+    }
+    if (buffer[0] != ')') {
+        // The executable name must have changed.
+        result = lseek(thread->statFile, 0, SEEK_SET);
+        if (result < 0) {
+            LOGE("lseek() error.");
+            return 0;
+        }
+        return readStateFromBeginning(thread);
+    }
+    char state = buffer[2];
+    if (state == 0) {
+        LOGE("State char is 0.");
+    }
+    return state;
+}
+
+ThreadStatus dvmGetNativeThreadStatus(Thread* thread) {
+    // TODO: Add a custom hook so this just reads a char from memory.
+
+    ThreadStatus status = thread->status;
+    if (status != THREAD_NATIVE) {
+        // Return cached status so we don't accidentally return THREAD_NATIVE.
+        return status;
+    }
+
+    if (thread->statFile == -1) {
+        // We tried and failed to open the file earlier. Return current status.
+        return thread->status;
+    }
+
+    // Note: see "man proc" for the format of stat.
+    char state;
+    if (thread->statFile == 0) {
+        // We haven't tried to open the file yet. Do so.
+        char fileName[256];
+        sprintf(fileName, "/proc/self/task/%d/stat", thread->systemTid);
+        thread->statFile = open(fileName, O_RDONLY);
+        if (thread->statFile == NULL) {
+            LOGE("Error opening %s: %s", fileName, strerror(errno));
+            thread->statFile = -1;
+            return thread->status;
+        }
+        state = readStateFromBeginning(thread);
+    } else {
+        state = readStateRelatively(thread);
+    }
+
+    if (state == 0) {
+        close(thread->statFile);
+        thread->statFile = -1;
+        return thread->status;
+    }
+    ThreadStatus nativeStatus = stateToStatus(state);
+
+    // The thread status could have changed from NATIVE.
+    status = thread->status;
+    return status == THREAD_NATIVE ? nativeStatus : status;
+}
diff --git a/vm/Thread.h b/vm/Thread.h
index 50bf5a0..3baf9aa 100644
--- a/vm/Thread.h
+++ b/vm/Thread.h
@@ -51,6 +51,7 @@
     THREAD_STARTING     = 6,        /* started, not yet on thread list */
     THREAD_NATIVE       = 7,        /* off in a JNI native method */
     THREAD_VMWAIT       = 8,        /* waiting on a VM resource */
+    THREAD_PAGING       = 9,        /* paging memory */
 } ThreadStatus;
 
 /* thread priorities, from java.lang.Thread */
@@ -90,7 +91,7 @@
      * Thread's current status.  Can only be changed by the thread itself
      * (i.e. don't mess with this from other threads).
      */
-    ThreadStatus status;
+    volatile ThreadStatus status;
 
     /*
      * This is the number of times the thread has been suspended.  When the
@@ -214,6 +215,11 @@
     /* PC, saved on every instruction; redundant with StackSaveArea */
     const u2*   currentPc2;
 #endif
+
+    /* /proc/PID/task/TID/stat */
+    int statFile;
+    /* offset of state char in stat file, last we checked */
+    int stateOffset;
 } Thread;
 
 /* start point for an internal thread; mimics pthread args */
@@ -434,6 +440,13 @@
 void dvmDumpAllThreads(bool grabLock);
 void dvmDumpAllThreadsEx(const DebugOutputTarget* target, bool grabLock);
 
+/*
+ * Reads the native thread status. If the thread is in native code, this
+ * function queries the native thread state and converts it to an equivalent
+ * ThreadStatus. If the native status can't be read, this function returns
+ * THREAD_NATIVE.
+ */
+ThreadStatus dvmGetNativeThreadStatus(Thread* thread);
 
 #ifdef WITH_MONITOR_TRACKING
 /*
diff --git a/vm/native/dalvik_system_SamplingProfiler.c b/vm/native/dalvik_system_SamplingProfiler.c
index 2381ade..71ef490 100644
--- a/vm/native/dalvik_system_SamplingProfiler.c
+++ b/vm/native/dalvik_system_SamplingProfiler.c
@@ -25,10 +25,10 @@
 #include "Dalvik.h"
 #include "native/InternalNativePriv.h"
 
-// ~16k
+// ~20k
 #define INITIAL_CAPACITY 1024
 
-// ~64k
+// ~80k
 #define MAX_CAPACITY 4096
 
 typedef enum {
@@ -43,20 +43,27 @@
 typedef enum {
     /** Executing bytecode. */
     RUNNING_THREAD,
-    /** In a native method. */
-    NATIVE_THREAD,
     /** Waiting on a lock or VM resource. */
     SUSPENDED_THREAD
 } ThreadState;
 
 #define THREAD_STATE_SIZE (SUSPENDED_THREAD + 1)
 
+typedef enum {
+    /** This method is in the call stack. */
+    CALLING_METHOD,
+    /** VM is in this method. */
+    LEAF_METHOD
+} MethodState;
+
+#define METHOD_STATE_SIZE (LEAF_METHOD + 1)
+
 /** SampleSet entry. */
 typedef struct {
     /** Entry key. */
     const Method* method; // 4 bytes
     /** Sample counts for method divided by thread type and state. */
-    u2 counts[THREAD_TYPE_SIZE][THREAD_STATE_SIZE]; // 12 bytes
+    u2 counts[THREAD_TYPE_SIZE][THREAD_STATE_SIZE][METHOD_STATE_SIZE]; // 16B
 } MethodCount;
 
 /**
@@ -143,7 +150,8 @@
 
 /** Increments counter for method in set. */
 static void countMethod(SampleSet* set, const Method* method,
-        ThreadType threadType, ThreadState threadState) {
+        ThreadType threadType, ThreadState threadState,
+        MethodState methodState) {
     MethodCount* entries = set->entries;
     int start = hash(method) & set->mask;
     int i;
@@ -152,7 +160,7 @@
 
         if (entry->method == method) {
             // We found an existing entry.
-            entry->counts[threadType][threadState]++;
+            entry->counts[threadType][threadState][methodState]++;
             return;
         }
 
@@ -160,14 +168,15 @@
             // Add a new entry.
             if (set->size < set->maxSize) {
                 entry->method = method;
-                entry->counts[threadType][threadState] = 1;
+                entry->counts[threadType][threadState][methodState] = 1;
                 set->collisions += (i != start);
                 set->size++;
             } else {
                 if (set->capacity < MAX_CAPACITY) {
                     // The set is 3/4 full. Expand it, and then add the entry.
                     expand(set);
-                    countMethod(set, method, threadType, threadState);
+                    countMethod(set, method, threadType, threadState,
+                            methodState);
                 } else {
                     // Don't add any more entries.
                     // TODO: Should we replace the LRU entry?
@@ -193,10 +202,10 @@
         ? EVENT_THREAD : OTHER_THREAD;
 
     ThreadState threadState;
-    switch (thread->status) {
+    switch (dvmGetNativeThreadStatus(thread)) {
         case THREAD_RUNNING: threadState = RUNNING_THREAD; break;
-        case THREAD_NATIVE: threadState = NATIVE_THREAD; break;
-        default: threadState = SUSPENDED_THREAD;
+        case THREAD_NATIVE: dvmAbort();
+        default: threadState = SUSPENDED_THREAD; // includes PAGING
     }
 
     /*
@@ -213,13 +222,15 @@
         return;
     }
 
+    MethodState methodState = LEAF_METHOD;
     while (true) {
         StackSaveArea* saveArea = SAVEAREA_FROM_FP(currentFrame);
 
         const Method* method = saveArea->method;
         // Count the method now. We'll validate later that it's a real Method*.
         if (method != NULL) {
-            countMethod(set, method, threadType, threadState);
+            countMethod(set, method, threadType, threadState, methodState);
+            methodState = CALLING_METHOD;
         }
 
         void* callerFrame = saveArea->prevFrame;
@@ -398,7 +409,8 @@
 
         size += 2; // method name size
         size += wrapper->methodNameLength;
-        size += THREAD_TYPE_SIZE * THREAD_STATE_SIZE * 2; // sample counts
+        // sample counts
+        size += THREAD_TYPE_SIZE * THREAD_STATE_SIZE * METHOD_STATE_SIZE * 2;
         wrapper = wrapper->next;
     } while (wrapper != NULL);
 
@@ -451,11 +463,15 @@
                 wrapper->methodNameLength);
 
         // Sample counts.
-        u2 (*counts)[THREAD_STATE_SIZE] = wrapper->methodCount->counts;
-        int type, state;
+        u2 (*counts)[THREAD_STATE_SIZE][METHOD_STATE_SIZE]
+                = wrapper->methodCount->counts;
+        int type, threadState, methodState;
         for (type = 0; type < THREAD_TYPE_SIZE; type++)
-            for (state = 0; state < THREAD_STATE_SIZE; state++)
-                writeShort(offset, counts[type][state]);
+            for (threadState = 0; threadState < THREAD_STATE_SIZE;
+                    threadState++)
+                for (methodState = 0; methodState < METHOD_STATE_SIZE;
+                        methodState++)
+                    writeShort(offset, counts[type][threadState][methodState]);
 
         methodCount++;
         wrapper = wrapper->next;
@@ -489,13 +505,16 @@
      *  MethodEntry:
      *    method name length (2 bytes)
      *    UTF-8 method name
-     *    Counts (for event thread)
-     *    Counts (for other threads)
+     *    CountsByThreadState (for event thread)
+     *    CountsByThreadState (for other threads)
      *
-     *  Counts:
-     *    running count (2 bytes)
-     *    native count (2 bytes)
-     *    suspended count (2 bytes)
+     *  CountsByThreadState:
+     *    CountsByMethodState (for running threads)
+     *    CountsByMethodState (for suspended threads)
+     *
+     *  CountsByMethodState:
+     *    as calling method (2 bytes)
+     *    as leaf method (2 bytes)
      */
 
     SampleSet* set = (SampleSet*) args[0];
@@ -591,6 +610,17 @@
 }
 
 /**
+ * Frees native memory.
+ */
+static void Dalvik_dalvik_system_SamplingProfiler_free(const u4* args,
+        JValue* pResult) {
+    SampleSet* set = (SampleSet*) args[0];
+    free(set->entries);
+    free(set);
+    RETURN_VOID();
+}
+
+/**
  * Identifies the event thread.
  */
 static void Dalvik_dalvik_system_SamplingProfiler_setEventThread(const u4* args,
@@ -608,8 +638,9 @@
     { "size", "(I)I", Dalvik_dalvik_system_SamplingProfiler_size },
     { "sample", "(I)I", Dalvik_dalvik_system_SamplingProfiler_sample },
     { "snapshot", "(I)[B", Dalvik_dalvik_system_SamplingProfiler_snapshot },
+    { "free", "(I)V", Dalvik_dalvik_system_SamplingProfiler_free },
     { "allocate", "()I", Dalvik_dalvik_system_SamplingProfiler_allocate },
     { "setEventThread", "(ILjava/lang/Thread;)V",
             Dalvik_dalvik_system_SamplingProfiler_setEventThread },
     { NULL, NULL, NULL },
-};
+};
\ No newline at end of file