Merge "Add support for sample profiling in traceview."
diff --git a/vm/Profile.cpp b/vm/Profile.cpp
index dfb50b3..8492cd5 100644
--- a/vm/Profile.cpp
+++ b/vm/Profile.cpp
@@ -118,11 +118,12 @@
  * We use this clock when we can because it enables us to track the time that
  * a thread spends running and not blocked.
  */
-static inline u8 getThreadCpuTimeInUsec()
+static inline u8 getThreadCpuTimeInUsec(Thread* thread)
 {
+    clockid_t cid;
     struct timespec tm;
-
-    clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tm);
+    pthread_getcpuclockid(thread->handle, &cid);
+    clock_gettime(cid, &tm);
     if (!(tm.tv_nsec >= 0 && tm.tv_nsec < 1*1000*1000*1000)) {
         ALOGE("bad nsec: %ld", tm.tv_nsec);
         dvmAbort();
@@ -137,7 +138,7 @@
 static inline u8 getStopwatchClock()
 {
 #if defined(HAVE_POSIX_CLOCKS)
-    return getThreadCpuTimeInUsec();
+    return getThreadCpuTimeInUsec(dvmThreadSelf());
 #else
     return getWallTimeInUsec();
 #endif
@@ -171,6 +172,131 @@
 }
 
 /*
+ * Gets a thread's stack trace as an array of method pointers of length pCount.
+ * The returned array must be freed by the caller.
+ */
+static const Method** getStackTrace(Thread* thread, size_t* pCount)
+{
+    void* fp = thread->interpSave.curFrame;
+    assert(thread == dvmThreadSelf() || dvmIsSuspended(thread));
+
+    /* Compute the stack depth. */
+    size_t stackDepth = 0;
+    while (fp != NULL) {
+        const StackSaveArea* saveArea = SAVEAREA_FROM_FP(fp);
+
+        if (!dvmIsBreakFrame((u4*) fp))
+            stackDepth++;
+
+        assert(fp != saveArea->prevFrame);
+        fp = saveArea->prevFrame;
+    }
+    *pCount = stackDepth;
+
+    /*
+     * Allocate memory for stack trace. This must be freed later, either by
+     * freeThreadStackTraceSamples when tracing stops or by freeThread.
+     */
+    const Method** stackTrace = (const Method**) malloc(sizeof(Method*) *
+                                                        stackDepth);
+    if (stackTrace == NULL)
+        return NULL;
+
+    /* Walk the stack a second time, filling in the stack trace. */
+    const Method** ptr = stackTrace;
+    fp = thread->interpSave.curFrame;
+    while (fp != NULL) {
+        const StackSaveArea* saveArea = SAVEAREA_FROM_FP(fp);
+        const Method* method = saveArea->method;
+
+        if (!dvmIsBreakFrame((u4*) fp)) {
+            *ptr++ = method;
+            stackDepth--;
+        }
+        assert(fp != saveArea->prevFrame);
+        fp = saveArea->prevFrame;
+    }
+    assert(stackDepth == 0);
+
+    return stackTrace;
+}
+/*
+ * Get a sample of the stack trace for a thread.
+ */
+static void getSample(Thread* thread)
+{
+    /* Get old and new stack trace for thread. */
+    size_t newLength = 0;
+    const Method** newStackTrace = getStackTrace(thread, &newLength);
+    size_t oldLength = thread->stackTraceSampleLength;
+    const Method** oldStackTrace = thread->stackTraceSample;
+
+    /* Read time clocks to use for all events in this trace. */
+    u4 cpuClockDiff = 0;
+    u4 wallClockDiff = 0;
+    dvmMethodTraceReadClocks(thread, &cpuClockDiff, &wallClockDiff);
+    if (oldStackTrace == NULL) {
+        /*
+         * If there's no previous stack trace sample, log an entry event for
+         * every method in the trace.
+         */
+        for (int i = newLength - 1; i >= 0; --i) {
+            dvmMethodTraceAdd(thread, newStackTrace[i], METHOD_TRACE_ENTER,
+                              cpuClockDiff, wallClockDiff);
+        }
+    } else {
+        /*
+         * If there's a previous stack trace, diff the traces and emit entry
+         * and exit events accordingly.
+         */
+        int diffIndexOld = oldLength - 1;
+        int diffIndexNew = newLength - 1;
+        /* Iterate bottom-up until there's a difference between traces. */
+        while (diffIndexOld >= 0 && diffIndexNew >= 0 &&
+               oldStackTrace[diffIndexOld] == newStackTrace[diffIndexNew]) {
+            diffIndexOld--;
+            diffIndexNew--;
+        }
+        /* Iterate top-down over old trace until diff, emitting exit events. */
+        for (int i = 0; i <= diffIndexOld; ++i) {
+            dvmMethodTraceAdd(thread, oldStackTrace[i], METHOD_TRACE_EXIT,
+                              cpuClockDiff, wallClockDiff);
+        }
+        /* Iterate bottom-up over new trace from diff, emitting entry events. */
+        for (int i = diffIndexNew; i >= 0; --i) {
+            dvmMethodTraceAdd(thread, newStackTrace[i], METHOD_TRACE_ENTER,
+                              cpuClockDiff, wallClockDiff);
+        }
+    }
+
+    /* Free the old stack trace and update the thread's stack trace sample. */
+    free(oldStackTrace);
+    thread->stackTraceSample = newStackTrace;
+    thread->stackTraceSampleLength = newLength;
+}
+
+/*
+ * Entry point for sampling thread.
+ */
+static void* runSamplingThread(void* arg)
+{
+    while (gDvm.methodTrace.traceEnabled) {
+        dvmSuspendAllThreads(SUSPEND_FOR_SAMPLING);
+
+        dvmLockThreadList(dvmThreadSelf());
+        for (Thread *thread = gDvm.threadList; thread != NULL; thread = thread->next) {
+            getSample(thread);
+        }
+        dvmUnlockThreadList();
+
+        dvmResumeAllThreads(SUSPEND_FOR_SAMPLING);
+
+        usleep(gDvm.methodTrace.samplingIntervalUs);
+    }
+    return NULL;
+}
+
+/*
  * Boot-time init.
  */
 bool dvmProfilingStartup()
@@ -282,6 +408,21 @@
 }
 
 /*
+ * Free and reset the "stackTraceSample" field in all threads.
+ */
+static void freeThreadStackTraceSamples()
+{
+    Thread* thread;
+
+    dvmLockThreadList(NULL);
+    for (thread = gDvm.threadList; thread != NULL; thread = thread->next) {
+        free(thread->stackTraceSample);
+        thread->stackTraceSample = NULL;
+    }
+    dvmUnlockThreadList();
+}
+
+/*
  * Dump the thread list to the specified file.
  */
 static void dumpThreadList(FILE* fp) {
@@ -428,6 +569,10 @@
         state->recordSize = TRACE_REC_SIZE_SINGLE_CLOCK;
     }
 
+    /* TODO: Control sampling through gui. */
+    state->samplingEnabled = false;
+    state->samplingIntervalUs = 1000;
+
     /*
      * Output the header.
      */
@@ -452,7 +597,17 @@
      * following to take a Thread* argument, and set the appropriate
      * interpBreak flags only on the target thread.
      */
-    updateActiveProfilers(kSubModeMethodTrace, true);
+    if (state->samplingEnabled) {
+        updateActiveProfilers(kSubModeSampleTrace, true);
+        /* Start the sampling thread. */
+        if (!dvmCreateInternalThread(&state->samplingThreadHandle,
+                "Sampling Thread", &runSamplingThread, NULL)) {
+            dvmThrowInternalError("failed to create sampling thread");
+            goto fail;
+        }
+    } else {
+        updateActiveProfilers(kSubModeMethodTrace, true);
+    }
 
     dvmUnlockMutex(&state->startStopLock);
     return;
@@ -500,7 +655,7 @@
 {
 #if defined(HAVE_POSIX_CLOCKS)
     if (useThreadCpuClock()) {
-        getThreadCpuTimeInUsec();
+        getThreadCpuTimeInUsec(dvmThreadSelf());
     }
 #endif
     if (useWallClock()) {
@@ -565,7 +720,11 @@
         dvmUnlockMutex(&state->startStopLock);
         return;
     } else {
-        updateActiveProfilers(kSubModeMethodTrace, false);
+        if (state->samplingEnabled) {
+            updateActiveProfilers(kSubModeSampleTrace, false);
+        } else {
+            updateActiveProfilers(kSubModeMethodTrace, false);
+        }
     }
 
     /* compute elapsed time */
@@ -719,9 +878,42 @@
     fclose(state->traceFile);
     state->traceFile = NULL;
 
+    /* free and clear sampling traces held by all threads */
+    if (state->samplingEnabled) {
+        freeThreadStackTraceSamples();
+    }
+
     /* wake any threads that were waiting for profiling to complete */
     dvmBroadcastCond(&state->threadExitCond);
     dvmUnlockMutex(&state->startStopLock);
+
+    /* make sure the sampling thread has stopped */
+    if (state->samplingEnabled &&
+        pthread_join(state->samplingThreadHandle, NULL) != 0) {
+        ALOGW("Sampling thread join failed");
+    }
+}
+
+/*
+ * Read clocks and generate time diffs for method trace events.
+ */
+void dvmMethodTraceReadClocks(Thread* self, u4* cpuClockDiff,
+                              u4* wallClockDiff)
+{
+#if defined(HAVE_POSIX_CLOCKS)
+    if (useThreadCpuClock()) {
+        if (!self->cpuClockBaseSet) {
+            /* Initialize per-thread CPU clock base time on first use. */
+            self->cpuClockBase = getThreadCpuTimeInUsec(self);
+            self->cpuClockBaseSet = true;
+        } else {
+            *cpuClockDiff = getThreadCpuTimeInUsec(self) - self->cpuClockBase;
+        }
+    }
+#endif
+    if (useWallClock()) {
+        *wallClockDiff = getWallTimeInUsec() - gDvm.methodTrace.startWhen;
+    }
 }
 
 /*
@@ -730,7 +922,8 @@
  * Multiple threads may be banging on this all at once.  We use atomic ops
  * rather than mutexes for speed.
  */
-void dvmMethodTraceAdd(Thread* self, const Method* method, int action)
+void dvmMethodTraceAdd(Thread* self, const Method* method, int action,
+                       u4 cpuClockDiff, u4 wallClockDiff)
 {
     MethodTraceState* state = &gDvm.methodTrace;
     u4 methodVal;
@@ -739,21 +932,6 @@
 
     assert(method != NULL);
 
-#if defined(HAVE_POSIX_CLOCKS)
-    /*
-     * We can only access the per-thread CPU clock from within the
-     * thread, so we have to initialize the base time on the first use.
-     * (Looks like pthread_getcpuclockid(thread, &id) will do what we
-     * want, but it doesn't appear to be defined on the device.)
-     */
-    if (!self->cpuClockBaseSet) {
-        self->cpuClockBase = getThreadCpuTimeInUsec();
-        self->cpuClockBaseSet = true;
-        //ALOGI("thread base id=%d 0x%llx",
-        //    self->threadId, self->cpuClockBase);
-    }
-#endif
-
     /*
      * Advance "curOffset" atomically.
      */
@@ -784,7 +962,6 @@
 
 #if defined(HAVE_POSIX_CLOCKS)
     if (useThreadCpuClock()) {
-        u4 cpuClockDiff = (u4) (getThreadCpuTimeInUsec() - self->cpuClockBase);
         *ptr++ = (u1) cpuClockDiff;
         *ptr++ = (u1) (cpuClockDiff >> 8);
         *ptr++ = (u1) (cpuClockDiff >> 16);
@@ -793,7 +970,6 @@
 #endif
 
     if (useWallClock()) {
-        u4 wallClockDiff = (u4) (getWallTimeInUsec() - state->startWhen);
         *ptr++ = (u1) wallClockDiff;
         *ptr++ = (u1) (wallClockDiff >> 8);
         *ptr++ = (u1) (wallClockDiff >> 16);
@@ -809,7 +985,11 @@
 void dvmFastMethodTraceEnter(const Method* method, Thread* self)
 {
     if (self->interpBreak.ctl.subMode & kSubModeMethodTrace) {
-        dvmMethodTraceAdd(self, method, METHOD_TRACE_ENTER);
+        u4 cpuClockDiff = 0;
+        u4 wallClockDiff = 0;
+        dvmMethodTraceReadClocks(self, &cpuClockDiff, &wallClockDiff);
+        dvmMethodTraceAdd(self, method, METHOD_TRACE_ENTER, cpuClockDiff,
+                          wallClockDiff);
     }
 }
 
@@ -821,8 +1001,11 @@
 void dvmFastMethodTraceExit(Thread* self)
 {
     if (self->interpBreak.ctl.subMode & kSubModeMethodTrace) {
+        u4 cpuClockDiff = 0;
+        u4 wallClockDiff = 0;
+        dvmMethodTraceReadClocks(self, &cpuClockDiff, &wallClockDiff);
         dvmMethodTraceAdd(self, self->interpSave.method,
-                          METHOD_TRACE_EXIT);
+                          METHOD_TRACE_EXIT, cpuClockDiff, wallClockDiff);
     }
 }
 
@@ -834,7 +1017,11 @@
 void dvmFastNativeMethodTraceExit(const Method* method, Thread* self)
 {
     if (self->interpBreak.ctl.subMode & kSubModeMethodTrace) {
-        dvmMethodTraceAdd(self, method, METHOD_TRACE_EXIT);
+        u4 cpuClockDiff = 0;
+        u4 wallClockDiff = 0;
+        dvmMethodTraceReadClocks(self, &cpuClockDiff, &wallClockDiff);
+        dvmMethodTraceAdd(self, method, METHOD_TRACE_EXIT, cpuClockDiff,
+                          wallClockDiff);
     }
 }
 
diff --git a/vm/Profile.h b/vm/Profile.h
index 6c9e1c7..0ec8705 100644
--- a/vm/Profile.h
+++ b/vm/Profile.h
@@ -52,6 +52,10 @@
 
     int     traceVersion;
     size_t  recordSize;
+
+    bool    samplingEnabled;
+    int     samplingIntervalUs;
+    pthread_t       samplingThreadHandle;
 };
 
 /*
@@ -112,27 +116,45 @@
  */
 #define TRACE_METHOD_ENTER(_self, _method)                                  \
     do {                                                                    \
-        if (_self->interpBreak.ctl.subMode & kSubModeMethodTrace)           \
-            dvmMethodTraceAdd(_self, _method, METHOD_TRACE_ENTER);          \
+        if (_self->interpBreak.ctl.subMode & kSubModeMethodTrace) {         \
+            u4 cpuClockDiff = 0;                                            \
+            u4 wallClockDiff = 0;                                           \
+            dvmMethodTraceReadClocks(_self, &cpuClockDiff, &wallClockDiff); \
+            dvmMethodTraceAdd(_self, _method, METHOD_TRACE_ENTER,           \
+                              cpuClockDiff, wallClockDiff);                 \
+        }                                                                   \
         if (_self->interpBreak.ctl.subMode & kSubModeEmulatorTrace)         \
             dvmEmitEmulatorTrace(_method, METHOD_TRACE_ENTER);              \
     } while(0);
 #define TRACE_METHOD_EXIT(_self, _method)                                   \
     do {                                                                    \
-        if (_self->interpBreak.ctl.subMode & kSubModeMethodTrace)           \
-            dvmMethodTraceAdd(_self, _method, METHOD_TRACE_EXIT);           \
+        if (_self->interpBreak.ctl.subMode & kSubModeMethodTrace) {         \
+            u4 cpuClockDiff = 0;                                            \
+            u4 wallClockDiff = 0;                                           \
+            dvmMethodTraceReadClocks(_self, &cpuClockDiff, &wallClockDiff); \
+            dvmMethodTraceAdd(_self, _method, METHOD_TRACE_EXIT,            \
+                              cpuClockDiff, wallClockDiff);                 \
+        }                                                                   \
         if (_self->interpBreak.ctl.subMode & kSubModeEmulatorTrace)         \
             dvmEmitEmulatorTrace(_method, METHOD_TRACE_EXIT);               \
     } while(0);
 #define TRACE_METHOD_UNROLL(_self, _method)                                 \
     do {                                                                    \
-        if (_self->interpBreak.ctl.subMode & kSubModeMethodTrace)           \
-            dvmMethodTraceAdd(_self, _method, METHOD_TRACE_UNROLL);         \
+        if (_self->interpBreak.ctl.subMode & kSubModeMethodTrace) {         \
+            u4 cpuClockDiff = 0;                                            \
+            u4 wallClockDiff = 0;                                           \
+            dvmMethodTraceReadClocks(_self, &cpuClockDiff, &wallClockDiff); \
+            dvmMethodTraceAdd(_self, _method, METHOD_TRACE_UNROLL,          \
+                              cpuClockDiff, wallClockDiff);                 \
+        }                                                                   \
         if (_self->interpBreak.ctl.subMode & kSubModeEmulatorTrace)         \
             dvmEmitEmulatorTrace(_method, METHOD_TRACE_UNROLL);             \
     } while(0);
 
-void dvmMethodTraceAdd(struct Thread* self, const Method* method, int action);
+void dvmMethodTraceReadClocks(Thread* self, u4* cpuClockDiff,
+                              u4* wallClockDiff);
+void dvmMethodTraceAdd(struct Thread* self, const Method* method, int action,
+                       u4 cpuClockDiff, u4 wallClockDiff);
 void dvmEmitEmulatorTrace(const Method* method, int action);
 
 void dvmMethodTraceGCBegin(void);
diff --git a/vm/Thread.cpp b/vm/Thread.cpp
index 205167e..1ebfca7 100644
--- a/vm/Thread.cpp
+++ b/vm/Thread.cpp
@@ -997,6 +997,7 @@
 #if defined(WITH_SELF_VERIFICATION)
     dvmSelfVerificationShadowSpaceFree(thread);
 #endif
+    free(thread->stackTraceSample);
     free(thread);
 }
 
diff --git a/vm/Thread.h b/vm/Thread.h
index 8deef6e..19bd49c 100644
--- a/vm/Thread.h
+++ b/vm/Thread.h
@@ -287,6 +287,10 @@
     bool        cpuClockBaseSet;
     u8          cpuClockBase;
 
+    /* previous stack trace sample and length (used by sampling profiler) */
+    const Method** stackTraceSample;
+    size_t stackTraceSampleLength;
+
     /* memory allocation profiling state */
     AllocProfState allocProf;
 
@@ -358,6 +362,7 @@
     SUSPEND_FOR_DEX_OPT,
     SUSPEND_FOR_VERIFY,
     SUSPEND_FOR_HPROF,
+    SUSPEND_FOR_SAMPLING,
 #if defined(WITH_JIT)
     SUSPEND_FOR_TBL_RESIZE,  // jit-table resize
     SUSPEND_FOR_IC_PATCH,    // polymorphic callsite inline-cache patch
diff --git a/vm/interp/Interp.cpp b/vm/interp/Interp.cpp
index fa77523..2b13bd8 100644
--- a/vm/interp/Interp.cpp
+++ b/vm/interp/Interp.cpp
@@ -1661,7 +1661,11 @@
         dvmEnableSubMode(thread, kSubModeInstCounting);
     }
     if (dvmIsMethodTraceActive()) {
-        dvmEnableSubMode(thread, kSubModeMethodTrace);
+        if (gDvm.methodTrace.samplingEnabled) {
+            dvmEnableSubMode(thread, kSubModeSampleTrace);
+        } else {
+            dvmEnableSubMode(thread, kSubModeMethodTrace);
+        }
     }
     if (gDvm.emulatorTraceEnableCount > 0) {
         dvmEnableSubMode(thread, kSubModeEmulatorTrace);
diff --git a/vm/interp/InterpState.h b/vm/interp/InterpState.h
index cc0a13f..6ef4472 100644
--- a/vm/interp/InterpState.h
+++ b/vm/interp/InterpState.h
@@ -63,6 +63,7 @@
     kSubModeCallbackPending   = 0x0020,
     kSubModeCountedStep       = 0x0040,
     kSubModeCheckAlways       = 0x0080,
+    kSubModeSampleTrace       = 0x0100,
     kSubModeJitTraceBuild     = 0x4000,
     kSubModeJitSV             = 0x8000,
     kSubModeDebugProfile   = (kSubModeMethodTrace |