| /* |
| * Copyright (C) 2008 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /* |
| * Android's method call profiling goodies. |
| */ |
| #include "Dalvik.h" |
| #include <interp/InterpDefs.h> |
| |
| #include <stdlib.h> |
| #include <stddef.h> |
| #include <string.h> |
| #include <sys/time.h> |
| #include <time.h> |
| #include <sys/mman.h> |
| #include <sched.h> |
| #include <errno.h> |
| #include <fcntl.h> |
| |
| #include <cutils/open_memstream.h> |
| |
| #ifdef HAVE_ANDROID_OS |
| # define UPDATE_MAGIC_PAGE 1 |
| #endif |
| |
| /* |
| * File format: |
| * header |
| * record 0 |
| * record 1 |
| * ... |
| * |
| * Header format: |
| * u4 magic ('SLOW') |
| * u2 version |
| * u2 offset to data |
| * u8 start date/time in usec |
| * u2 record size in bytes (version >= 2 only) |
| * ... padding to 32 bytes |
| * |
| * Record format v1: |
| * u1 thread ID |
| * u4 method ID | method action |
| * u4 time delta since start, in usec |
| * |
| * Record format v2: |
| * u2 thread ID |
| * u4 method ID | method action |
| * u4 time delta since start, in usec |
| * |
| * Record format v3: |
| * u2 thread ID |
| * u4 method ID | method action |
| * u4 time delta since start, in usec |
| * u4 wall time since start, in usec (when clock == "dual" only) |
| * |
| * 32 bits of microseconds is 70 minutes. |
| * |
| * All values are stored in little-endian order. |
| */ |
| #define TRACE_REC_SIZE_SINGLE_CLOCK 10 // using v2 |
| #define TRACE_REC_SIZE_DUAL_CLOCK 14 // using v3 with two timestamps |
| #define TRACE_MAGIC 0x574f4c53 |
| #define TRACE_HEADER_LEN 32 |
| |
| #define FILL_PATTERN 0xeeeeeeee |
| |
| |
| /* |
| * Returns true if the thread CPU clock should be used. |
| */ |
| static inline bool useThreadCpuClock() { |
| #if defined(HAVE_POSIX_CLOCKS) |
| return gDvm.profilerClockSource != kProfilerClockSourceWall; |
| #else |
| return false; |
| #endif |
| } |
| |
| /* |
| * Returns true if the wall clock should be used. |
| */ |
| static inline bool useWallClock() { |
| #if defined(HAVE_POSIX_CLOCKS) |
| return gDvm.profilerClockSource != kProfilerClockSourceThreadCpu; |
| #else |
| return true; |
| #endif |
| } |
| |
| /* |
| * Get the wall-clock date/time, in usec. |
| */ |
| static inline u8 getWallTimeInUsec() |
| { |
| struct timeval tv; |
| |
| gettimeofday(&tv, NULL); |
| return tv.tv_sec * 1000000LL + tv.tv_usec; |
| } |
| |
| #if defined(HAVE_POSIX_CLOCKS) |
| /* |
| * Get the thread-cpu time, in usec. |
| * We use this clock when we can because it enables us to track the time that |
| * a thread spends running and not blocked. |
| */ |
| static inline u8 getThreadCpuTimeInUsec(Thread* thread) |
| { |
| clockid_t cid; |
| struct timespec tm; |
| pthread_getcpuclockid(thread->handle, &cid); |
| clock_gettime(cid, &tm); |
| if (!(tm.tv_nsec >= 0 && tm.tv_nsec < 1*1000*1000*1000)) { |
| ALOGE("bad nsec: %ld", tm.tv_nsec); |
| dvmAbort(); |
| } |
| return tm.tv_sec * 1000000LL + tm.tv_nsec / 1000; |
| } |
| #endif |
| |
| /* |
| * Get the clock used for stopwatch-like timing measurements on a single thread. |
| */ |
| static inline u8 getStopwatchClock() |
| { |
| #if defined(HAVE_POSIX_CLOCKS) |
| return getThreadCpuTimeInUsec(dvmThreadSelf()); |
| #else |
| return getWallTimeInUsec(); |
| #endif |
| } |
| |
| /* |
| * Write little-endian data. |
| */ |
| static inline void storeShortLE(u1* buf, u2 val) |
| { |
| *buf++ = (u1) val; |
| *buf++ = (u1) (val >> 8); |
| } |
| static inline void storeIntLE(u1* buf, u4 val) |
| { |
| *buf++ = (u1) val; |
| *buf++ = (u1) (val >> 8); |
| *buf++ = (u1) (val >> 16); |
| *buf++ = (u1) (val >> 24); |
| } |
| static inline void storeLongLE(u1* buf, u8 val) |
| { |
| *buf++ = (u1) val; |
| *buf++ = (u1) (val >> 8); |
| *buf++ = (u1) (val >> 16); |
| *buf++ = (u1) (val >> 24); |
| *buf++ = (u1) (val >> 32); |
| *buf++ = (u1) (val >> 40); |
| *buf++ = (u1) (val >> 48); |
| *buf++ = (u1) (val >> 56); |
| } |
| |
| /* |
| * Gets a thread's stack trace as an array of method pointers of length pCount. |
| * The returned array must be freed by the caller. |
| */ |
| static const Method** getStackTrace(Thread* thread, size_t* pCount) |
| { |
| void* fp = thread->interpSave.curFrame; |
| assert(thread == dvmThreadSelf() || dvmIsSuspended(thread)); |
| |
| /* Compute the stack depth. */ |
| size_t stackDepth = 0; |
| while (fp != NULL) { |
| const StackSaveArea* saveArea = SAVEAREA_FROM_FP(fp); |
| |
| if (!dvmIsBreakFrame((u4*) fp)) |
| stackDepth++; |
| |
| assert(fp != saveArea->prevFrame); |
| fp = saveArea->prevFrame; |
| } |
| *pCount = stackDepth; |
| |
| /* |
| * Allocate memory for stack trace. This must be freed later, either by |
| * freeThreadStackTraceSamples when tracing stops or by freeThread. |
| */ |
| const Method** stackTrace = (const Method**) malloc(sizeof(Method*) * |
| stackDepth); |
| if (stackTrace == NULL) |
| return NULL; |
| |
| /* Walk the stack a second time, filling in the stack trace. */ |
| const Method** ptr = stackTrace; |
| fp = thread->interpSave.curFrame; |
| while (fp != NULL) { |
| const StackSaveArea* saveArea = SAVEAREA_FROM_FP(fp); |
| const Method* method = saveArea->method; |
| |
| if (!dvmIsBreakFrame((u4*) fp)) { |
| *ptr++ = method; |
| stackDepth--; |
| } |
| assert(fp != saveArea->prevFrame); |
| fp = saveArea->prevFrame; |
| } |
| assert(stackDepth == 0); |
| |
| return stackTrace; |
| } |
| /* |
| * Get a sample of the stack trace for a thread. |
| */ |
| static void getSample(Thread* thread) |
| { |
| /* Get old and new stack trace for thread. */ |
| size_t newLength = 0; |
| const Method** newStackTrace = getStackTrace(thread, &newLength); |
| size_t oldLength = thread->stackTraceSampleLength; |
| const Method** oldStackTrace = thread->stackTraceSample; |
| |
| /* Read time clocks to use for all events in this trace. */ |
| u4 cpuClockDiff = 0; |
| u4 wallClockDiff = 0; |
| dvmMethodTraceReadClocks(thread, &cpuClockDiff, &wallClockDiff); |
| if (oldStackTrace == NULL) { |
| /* |
| * If there's no previous stack trace sample, log an entry event for |
| * every method in the trace. |
| */ |
| for (int i = newLength - 1; i >= 0; --i) { |
| dvmMethodTraceAdd(thread, newStackTrace[i], METHOD_TRACE_ENTER, |
| cpuClockDiff, wallClockDiff); |
| } |
| } else { |
| /* |
| * If there's a previous stack trace, diff the traces and emit entry |
| * and exit events accordingly. |
| */ |
| int diffIndexOld = oldLength - 1; |
| int diffIndexNew = newLength - 1; |
| /* Iterate bottom-up until there's a difference between traces. */ |
| while (diffIndexOld >= 0 && diffIndexNew >= 0 && |
| oldStackTrace[diffIndexOld] == newStackTrace[diffIndexNew]) { |
| diffIndexOld--; |
| diffIndexNew--; |
| } |
| /* Iterate top-down over old trace until diff, emitting exit events. */ |
| for (int i = 0; i <= diffIndexOld; ++i) { |
| dvmMethodTraceAdd(thread, oldStackTrace[i], METHOD_TRACE_EXIT, |
| cpuClockDiff, wallClockDiff); |
| } |
| /* Iterate bottom-up over new trace from diff, emitting entry events. */ |
| for (int i = diffIndexNew; i >= 0; --i) { |
| dvmMethodTraceAdd(thread, newStackTrace[i], METHOD_TRACE_ENTER, |
| cpuClockDiff, wallClockDiff); |
| } |
| } |
| |
| /* Free the old stack trace and update the thread's stack trace sample. */ |
| free(oldStackTrace); |
| thread->stackTraceSample = newStackTrace; |
| thread->stackTraceSampleLength = newLength; |
| } |
| |
| /* |
| * Entry point for sampling thread. The sampling interval in microseconds is |
| * passed in as an argument. |
| */ |
| static void* runSamplingThread(void* arg) |
| { |
| int intervalUs = (int) arg; |
| while (gDvm.methodTrace.traceEnabled) { |
| dvmSuspendAllThreads(SUSPEND_FOR_SAMPLING); |
| |
| dvmLockThreadList(dvmThreadSelf()); |
| for (Thread *thread = gDvm.threadList; thread != NULL; thread = thread->next) { |
| getSample(thread); |
| } |
| dvmUnlockThreadList(); |
| |
| dvmResumeAllThreads(SUSPEND_FOR_SAMPLING); |
| |
| usleep(intervalUs); |
| } |
| return NULL; |
| } |
| |
| /* |
| * Boot-time init. |
| */ |
| bool dvmProfilingStartup() |
| { |
| /* |
| * Initialize "dmtrace" method profiling. |
| */ |
| memset(&gDvm.methodTrace, 0, sizeof(gDvm.methodTrace)); |
| dvmInitMutex(&gDvm.methodTrace.startStopLock); |
| pthread_cond_init(&gDvm.methodTrace.threadExitCond, NULL); |
| |
| assert(!dvmCheckException(dvmThreadSelf())); |
| |
| /* |
| * Allocate storage for instruction counters. |
| */ |
| gDvm.executedInstrCounts = (int*) calloc(kNumPackedOpcodes, sizeof(int)); |
| if (gDvm.executedInstrCounts == NULL) |
| return false; |
| |
| #ifdef UPDATE_MAGIC_PAGE |
| /* |
| * If we're running on the emulator, there's a magic page into which |
| * we can put interpreted method information. This allows interpreted |
| * methods to show up in the emulator's code traces. |
| * |
| * We could key this off of the "ro.kernel.qemu" property, but there's |
| * no real harm in doing this on a real device. |
| */ |
| int fd = open("/dev/qemu_trace", O_RDWR); |
| if (fd < 0) { |
| ALOGV("Unable to open /dev/qemu_trace"); |
| } else { |
| gDvm.emulatorTracePage = mmap(0, SYSTEM_PAGE_SIZE, PROT_READ|PROT_WRITE, |
| MAP_SHARED, fd, 0); |
| close(fd); |
| if (gDvm.emulatorTracePage == MAP_FAILED) { |
| ALOGE("Unable to mmap /dev/qemu_trace"); |
| gDvm.emulatorTracePage = NULL; |
| } else { |
| *(u4*) gDvm.emulatorTracePage = 0; |
| } |
| } |
| #else |
| assert(gDvm.emulatorTracePage == NULL); |
| #endif |
| |
| return true; |
| } |
| |
| /* |
| * Free up profiling resources. |
| */ |
| void dvmProfilingShutdown() |
| { |
| #ifdef UPDATE_MAGIC_PAGE |
| if (gDvm.emulatorTracePage != NULL) |
| munmap(gDvm.emulatorTracePage, SYSTEM_PAGE_SIZE); |
| #endif |
| free(gDvm.executedInstrCounts); |
| } |
| |
| /* |
| * Update the set of active profilers |
| */ |
| static void updateActiveProfilers(ExecutionSubModes newMode, bool enable) |
| { |
| int oldValue, newValue; |
| |
| // Update global count |
| do { |
| oldValue = gDvm.activeProfilers; |
| newValue = oldValue + (enable ? 1 : -1); |
| if (newValue < 0) { |
| ALOGE("Can't have %d active profilers", newValue); |
| dvmAbort(); |
| } |
| } while (android_atomic_release_cas(oldValue, newValue, |
| &gDvm.activeProfilers) != 0); |
| |
| // Tell the threads |
| if (enable) { |
| dvmEnableAllSubMode(newMode); |
| } else { |
| dvmDisableAllSubMode(newMode); |
| } |
| |
| #if defined(WITH_JIT) |
| dvmCompilerUpdateGlobalState(); |
| #endif |
| |
| ALOGD("+++ active profiler count now %d", newValue); |
| } |
| |
| |
| /* |
| * Reset the "cpuClockBase" field in all threads. |
| */ |
| static void resetCpuClockBase() |
| { |
| Thread* thread; |
| |
| dvmLockThreadList(NULL); |
| for (thread = gDvm.threadList; thread != NULL; thread = thread->next) { |
| thread->cpuClockBaseSet = false; |
| thread->cpuClockBase = 0; |
| } |
| dvmUnlockThreadList(); |
| } |
| |
| /* |
| * Free and reset the "stackTraceSample" field in all threads. |
| */ |
| static void freeThreadStackTraceSamples() |
| { |
| Thread* thread; |
| |
| dvmLockThreadList(NULL); |
| for (thread = gDvm.threadList; thread != NULL; thread = thread->next) { |
| free(thread->stackTraceSample); |
| thread->stackTraceSample = NULL; |
| } |
| dvmUnlockThreadList(); |
| } |
| |
| /* |
| * Dump the thread list to the specified file. |
| */ |
| static void dumpThreadList(FILE* fp) { |
| dvmLockThreadList(NULL); |
| for (Thread* thread = gDvm.threadList; thread != NULL; thread = thread->next) { |
| std::string threadName(dvmGetThreadName(thread)); |
| fprintf(fp, "%d\t%s\n", thread->threadId, threadName.c_str()); |
| } |
| dvmUnlockThreadList(); |
| } |
| |
| /* |
| * This is a dvmHashForeach callback. |
| */ |
| static int dumpMarkedMethods(void* vclazz, void* vfp) |
| { |
| DexStringCache stringCache; |
| ClassObject* clazz = (ClassObject*) vclazz; |
| FILE* fp = (FILE*) vfp; |
| Method* meth; |
| char* name; |
| int i; |
| |
| dexStringCacheInit(&stringCache); |
| |
| for (i = 0; i < clazz->virtualMethodCount; i++) { |
| meth = &clazz->virtualMethods[i]; |
| if (meth->inProfile) { |
| name = dvmDescriptorToName(meth->clazz->descriptor); |
| fprintf(fp, "0x%08x\t%s\t%s\t%s\t%s\t%d\n", (int) meth, |
| name, meth->name, |
| dexProtoGetMethodDescriptor(&meth->prototype, &stringCache), |
| dvmGetMethodSourceFile(meth), dvmLineNumFromPC(meth, 0)); |
| meth->inProfile = false; |
| free(name); |
| } |
| } |
| |
| for (i = 0; i < clazz->directMethodCount; i++) { |
| meth = &clazz->directMethods[i]; |
| if (meth->inProfile) { |
| name = dvmDescriptorToName(meth->clazz->descriptor); |
| fprintf(fp, "0x%08x\t%s\t%s\t%s\t%s\t%d\n", (int) meth, |
| name, meth->name, |
| dexProtoGetMethodDescriptor(&meth->prototype, &stringCache), |
| dvmGetMethodSourceFile(meth), dvmLineNumFromPC(meth, 0)); |
| meth->inProfile = false; |
| free(name); |
| } |
| } |
| |
| dexStringCacheRelease(&stringCache); |
| |
| return 0; |
| } |
| |
| /* |
| * Dump the list of "marked" methods to the specified file. |
| */ |
| static void dumpMethodList(FILE* fp) |
| { |
| dvmHashTableLock(gDvm.loadedClasses); |
| dvmHashForeach(gDvm.loadedClasses, dumpMarkedMethods, (void*) fp); |
| dvmHashTableUnlock(gDvm.loadedClasses); |
| } |
| |
| /* |
| * Start method tracing. Method tracing is global to the VM (i.e. we |
| * trace all threads). |
| * |
| * This opens the output file (if an already open fd has not been supplied, |
| * and we're not going direct to DDMS) and allocates the data buffer. This |
| * takes ownership of the file descriptor, closing it on completion. |
| * |
| * On failure, we throw an exception and return. |
| */ |
| void dvmMethodTraceStart(const char* traceFileName, int traceFd, int bufferSize, |
| int flags, bool directToDdms, bool samplingEnabled, int intervalUs) |
| { |
| MethodTraceState* state = &gDvm.methodTrace; |
| |
| assert(bufferSize > 0); |
| |
| dvmLockMutex(&state->startStopLock); |
| while (state->traceEnabled != 0) { |
| ALOGI("TRACE start requested, but already in progress; stopping"); |
| dvmUnlockMutex(&state->startStopLock); |
| dvmMethodTraceStop(); |
| dvmLockMutex(&state->startStopLock); |
| } |
| ALOGI("TRACE STARTED: '%s' %dKB", traceFileName, bufferSize / 1024); |
| |
| /* |
| * Allocate storage and open files. |
| * |
| * We don't need to initialize the buffer, but doing so might remove |
| * some fault overhead if the pages aren't mapped until touched. |
| */ |
| state->buf = (u1*) malloc(bufferSize); |
| if (state->buf == NULL) { |
| dvmThrowInternalError("buffer alloc failed"); |
| goto fail; |
| } |
| if (!directToDdms) { |
| if (traceFd < 0) { |
| state->traceFile = fopen(traceFileName, "w"); |
| } else { |
| state->traceFile = fdopen(traceFd, "w"); |
| } |
| if (state->traceFile == NULL) { |
| int err = errno; |
| ALOGE("Unable to open trace file '%s': %s", |
| traceFileName, strerror(err)); |
| dvmThrowExceptionFmt(gDvm.exRuntimeException, |
| "Unable to open trace file '%s': %s", |
| traceFileName, strerror(err)); |
| goto fail; |
| } |
| } |
| traceFd = -1; |
| memset(state->buf, (char)FILL_PATTERN, bufferSize); |
| |
| state->directToDdms = directToDdms; |
| state->bufferSize = bufferSize; |
| state->overflow = false; |
| |
| /* |
| * Enable alloc counts if we've been requested to do so. |
| */ |
| state->flags = flags; |
| if ((flags & TRACE_ALLOC_COUNTS) != 0) |
| dvmStartAllocCounting(); |
| |
| /* reset our notion of the start time for all CPU threads */ |
| resetCpuClockBase(); |
| |
| state->startWhen = getWallTimeInUsec(); |
| |
| if (useThreadCpuClock() && useWallClock()) { |
| state->traceVersion = 3; |
| state->recordSize = TRACE_REC_SIZE_DUAL_CLOCK; |
| } else { |
| state->traceVersion = 2; |
| state->recordSize = TRACE_REC_SIZE_SINGLE_CLOCK; |
| } |
| |
| state->samplingEnabled = samplingEnabled; |
| |
| /* |
| * Output the header. |
| */ |
| memset(state->buf, 0, TRACE_HEADER_LEN); |
| storeIntLE(state->buf + 0, TRACE_MAGIC); |
| storeShortLE(state->buf + 4, state->traceVersion); |
| storeShortLE(state->buf + 6, TRACE_HEADER_LEN); |
| storeLongLE(state->buf + 8, state->startWhen); |
| if (state->traceVersion >= 3) { |
| storeShortLE(state->buf + 16, state->recordSize); |
| } |
| state->curOffset = TRACE_HEADER_LEN; |
| |
| /* |
| * Set the "enabled" flag. Once we do this, threads will wait to be |
| * signaled before exiting, so we have to make sure we wake them up. |
| */ |
| android_atomic_release_store(true, &state->traceEnabled); |
| |
| /* |
| * ENHANCEMENT: To trace just a single thread, modify the |
| * following to take a Thread* argument, and set the appropriate |
| * interpBreak flags only on the target thread. |
| */ |
| if (samplingEnabled) { |
| updateActiveProfilers(kSubModeSampleTrace, true); |
| /* Start the sampling thread. */ |
| if (!dvmCreateInternalThread(&state->samplingThreadHandle, |
| "Sampling Thread", &runSamplingThread, (void*) intervalUs)) { |
| dvmThrowInternalError("failed to create sampling thread"); |
| goto fail; |
| } |
| } else { |
| updateActiveProfilers(kSubModeMethodTrace, true); |
| } |
| |
| dvmUnlockMutex(&state->startStopLock); |
| return; |
| |
| fail: |
| if (state->traceFile != NULL) { |
| fclose(state->traceFile); |
| state->traceFile = NULL; |
| } |
| if (state->buf != NULL) { |
| free(state->buf); |
| state->buf = NULL; |
| } |
| if (traceFd >= 0) |
| close(traceFd); |
| dvmUnlockMutex(&state->startStopLock); |
| } |
| |
| /* |
| * Run through the data buffer and pull out the methods that were visited. |
| * Set a mark so that we know which ones to output. |
| */ |
| static void markTouchedMethods(int endOffset) |
| { |
| u1* ptr = gDvm.methodTrace.buf + TRACE_HEADER_LEN; |
| u1* end = gDvm.methodTrace.buf + endOffset; |
| size_t recordSize = gDvm.methodTrace.recordSize; |
| unsigned int methodVal; |
| Method* method; |
| |
| while (ptr < end) { |
| methodVal = ptr[2] | (ptr[3] << 8) | (ptr[4] << 16) |
| | (ptr[5] << 24); |
| method = (Method*) METHOD_ID(methodVal); |
| |
| method->inProfile = true; |
| ptr += recordSize; |
| } |
| } |
| |
| /* |
| * Exercises the clocks in the same way they will be during profiling. |
| */ |
| static inline void measureClockOverhead() |
| { |
| #if defined(HAVE_POSIX_CLOCKS) |
| if (useThreadCpuClock()) { |
| getThreadCpuTimeInUsec(dvmThreadSelf()); |
| } |
| #endif |
| if (useWallClock()) { |
| getWallTimeInUsec(); |
| } |
| } |
| |
| /* |
| * Compute the amount of overhead in a clock call, in nsec. |
| * |
| * This value is going to vary depending on what else is going on in the |
| * system. When examined across several runs a pattern should emerge. |
| */ |
| static u4 getClockOverhead() |
| { |
| u8 calStart, calElapsed; |
| int i; |
| |
| calStart = getStopwatchClock(); |
| for (i = 1000 * 4; i > 0; i--) { |
| measureClockOverhead(); |
| measureClockOverhead(); |
| measureClockOverhead(); |
| measureClockOverhead(); |
| measureClockOverhead(); |
| measureClockOverhead(); |
| measureClockOverhead(); |
| measureClockOverhead(); |
| } |
| |
| calElapsed = getStopwatchClock() - calStart; |
| return (int) (calElapsed / (8*4)); |
| } |
| |
| /* |
| * Indicates if method tracing is active and what kind of tracing is active. |
| */ |
| TracingMode dvmGetMethodTracingMode() |
| { |
| const MethodTraceState* state = &gDvm.methodTrace; |
| if (!state->traceEnabled) { |
| return TRACING_INACTIVE; |
| } else if (state->samplingEnabled) { |
| return SAMPLE_PROFILING_ACTIVE; |
| } else { |
| return METHOD_TRACING_ACTIVE; |
| } |
| } |
| |
| /* |
| * Stop method tracing. We write the buffer to disk and generate a key |
| * file so we can interpret it. |
| */ |
| void dvmMethodTraceStop() |
| { |
| MethodTraceState* state = &gDvm.methodTrace; |
| bool samplingEnabled = state->samplingEnabled; |
| u8 elapsed; |
| |
| /* |
| * We need this to prevent somebody from starting a new trace while |
| * we're in the process of stopping the old. |
| */ |
| dvmLockMutex(&state->startStopLock); |
| |
| if (!state->traceEnabled) { |
| /* somebody already stopped it, or it was never started */ |
| ALOGD("TRACE stop requested, but not running"); |
| dvmUnlockMutex(&state->startStopLock); |
| return; |
| } else { |
| if (samplingEnabled) { |
| updateActiveProfilers(kSubModeSampleTrace, false); |
| } else { |
| updateActiveProfilers(kSubModeMethodTrace, false); |
| } |
| } |
| |
| /* compute elapsed time */ |
| elapsed = getWallTimeInUsec() - state->startWhen; |
| |
| /* |
| * Globally disable it, and allow other threads to notice. We want |
| * to stall here for at least as long as dvmMethodTraceAdd needs |
| * to finish. There's no real risk though -- it will take a while to |
| * write the data to disk, and we don't clear the buffer pointer until |
| * after that completes. |
| */ |
| state->traceEnabled = false; |
| ANDROID_MEMBAR_FULL(); |
| sched_yield(); |
| usleep(250 * 1000); |
| |
| if ((state->flags & TRACE_ALLOC_COUNTS) != 0) |
| dvmStopAllocCounting(); |
| |
| /* |
| * It's possible under some circumstances for a thread to have advanced |
| * the data pointer but not written the method value. It's possible |
| * (though less likely) for the data pointer to be advanced, or partial |
| * data written, while we're doing work here. |
| * |
| * To avoid seeing partially-written data, we grab state->curOffset here, |
| * and use our local copy from here on. We then scan through what's |
| * already written. If we see the fill pattern in what should be the |
| * method pointer, we cut things off early. (If we don't, we'll fail |
| * when we dereference the pointer.) |
| * |
| * There's a theoretical possibility of interrupting another thread |
| * after it has partially written the method pointer, in which case |
| * we'll likely crash when we dereference it. The possibility of |
| * this actually happening should be at or near zero. Fixing it |
| * completely could be done by writing the thread number last and |
| * using a sentinel value to indicate a partially-written record, |
| * but that requires memory barriers. |
| */ |
| int finalCurOffset = state->curOffset; |
| |
| size_t recordSize = state->recordSize; |
| if (finalCurOffset > TRACE_HEADER_LEN) { |
| u4 fillVal = METHOD_ID(FILL_PATTERN); |
| u1* scanPtr = state->buf + TRACE_HEADER_LEN; |
| |
| while (scanPtr < state->buf + finalCurOffset) { |
| u4 methodVal = scanPtr[2] | (scanPtr[3] << 8) | (scanPtr[4] << 16) |
| | (scanPtr[5] << 24); |
| if (METHOD_ID(methodVal) == fillVal) { |
| u1* scanBase = state->buf + TRACE_HEADER_LEN; |
| ALOGW("Found unfilled record at %d (of %d)", |
| (scanPtr - scanBase) / recordSize, |
| (finalCurOffset - TRACE_HEADER_LEN) / recordSize); |
| finalCurOffset = scanPtr - state->buf; |
| break; |
| } |
| |
| scanPtr += recordSize; |
| } |
| } |
| |
| ALOGI("TRACE STOPPED%s: writing %d records", |
| state->overflow ? " (NOTE: overflowed buffer)" : "", |
| (finalCurOffset - TRACE_HEADER_LEN) / recordSize); |
| if (gDvm.debuggerActive) { |
| ALOGW("WARNING: a debugger is active; method-tracing results " |
| "will be skewed"); |
| } |
| |
| /* |
| * Do a quick calibration test to see how expensive our clock call is. |
| */ |
| u4 clockNsec = getClockOverhead(); |
| |
| markTouchedMethods(finalCurOffset); |
| |
| char* memStreamPtr; |
| size_t memStreamSize; |
| if (state->directToDdms) { |
| assert(state->traceFile == NULL); |
| state->traceFile = open_memstream(&memStreamPtr, &memStreamSize); |
| if (state->traceFile == NULL) { |
| /* not expected */ |
| ALOGE("Unable to open memstream"); |
| dvmAbort(); |
| } |
| } |
| assert(state->traceFile != NULL); |
| |
| fprintf(state->traceFile, "%cversion\n", TOKEN_CHAR); |
| fprintf(state->traceFile, "%d\n", state->traceVersion); |
| fprintf(state->traceFile, "data-file-overflow=%s\n", |
| state->overflow ? "true" : "false"); |
| if (useThreadCpuClock()) { |
| if (useWallClock()) { |
| fprintf(state->traceFile, "clock=dual\n"); |
| } else { |
| fprintf(state->traceFile, "clock=thread-cpu\n"); |
| } |
| } else { |
| fprintf(state->traceFile, "clock=wall\n"); |
| } |
| fprintf(state->traceFile, "elapsed-time-usec=%llu\n", elapsed); |
| fprintf(state->traceFile, "num-method-calls=%d\n", |
| (finalCurOffset - TRACE_HEADER_LEN) / state->recordSize); |
| fprintf(state->traceFile, "clock-call-overhead-nsec=%d\n", clockNsec); |
| fprintf(state->traceFile, "vm=dalvik\n"); |
| if ((state->flags & TRACE_ALLOC_COUNTS) != 0) { |
| fprintf(state->traceFile, "alloc-count=%d\n", |
| gDvm.allocProf.allocCount); |
| fprintf(state->traceFile, "alloc-size=%d\n", |
| gDvm.allocProf.allocSize); |
| fprintf(state->traceFile, "gc-count=%d\n", |
| gDvm.allocProf.gcCount); |
| } |
| fprintf(state->traceFile, "%cthreads\n", TOKEN_CHAR); |
| dumpThreadList(state->traceFile); |
| fprintf(state->traceFile, "%cmethods\n", TOKEN_CHAR); |
| dumpMethodList(state->traceFile); |
| fprintf(state->traceFile, "%cend\n", TOKEN_CHAR); |
| |
| if (state->directToDdms) { |
| /* |
| * Data is in two places: memStreamPtr and state->buf. Send |
| * the whole thing to DDMS, wrapped in an MPSE packet. |
| */ |
| fflush(state->traceFile); |
| |
| struct iovec iov[2]; |
| iov[0].iov_base = memStreamPtr; |
| iov[0].iov_len = memStreamSize; |
| iov[1].iov_base = state->buf; |
| iov[1].iov_len = finalCurOffset; |
| dvmDbgDdmSendChunkV(CHUNK_TYPE("MPSE"), iov, 2); |
| } else { |
| /* append the profiling data */ |
| if (fwrite(state->buf, finalCurOffset, 1, state->traceFile) != 1) { |
| int err = errno; |
| ALOGE("trace fwrite(%d) failed: %s", |
| finalCurOffset, strerror(err)); |
| dvmThrowExceptionFmt(gDvm.exRuntimeException, |
| "Trace data write failed: %s", strerror(err)); |
| } |
| } |
| |
| /* done! */ |
| free(state->buf); |
| state->buf = NULL; |
| fclose(state->traceFile); |
| state->traceFile = NULL; |
| |
| /* free and clear sampling traces held by all threads */ |
| if (samplingEnabled) { |
| freeThreadStackTraceSamples(); |
| } |
| |
| /* wake any threads that were waiting for profiling to complete */ |
| dvmBroadcastCond(&state->threadExitCond); |
| dvmUnlockMutex(&state->startStopLock); |
| |
| /* make sure the sampling thread has stopped */ |
| if (samplingEnabled && |
| pthread_join(state->samplingThreadHandle, NULL) != 0) { |
| ALOGW("Sampling thread join failed"); |
| } |
| } |
| |
| /* |
| * Read clocks and generate time diffs for method trace events. |
| */ |
| void dvmMethodTraceReadClocks(Thread* self, u4* cpuClockDiff, |
| u4* wallClockDiff) |
| { |
| #if defined(HAVE_POSIX_CLOCKS) |
| if (useThreadCpuClock()) { |
| if (!self->cpuClockBaseSet) { |
| /* Initialize per-thread CPU clock base time on first use. */ |
| self->cpuClockBase = getThreadCpuTimeInUsec(self); |
| self->cpuClockBaseSet = true; |
| } else { |
| *cpuClockDiff = getThreadCpuTimeInUsec(self) - self->cpuClockBase; |
| } |
| } |
| #endif |
| if (useWallClock()) { |
| *wallClockDiff = getWallTimeInUsec() - gDvm.methodTrace.startWhen; |
| } |
| } |
| |
| /* |
| * We just did something with a method. Emit a record. |
| * |
| * Multiple threads may be banging on this all at once. We use atomic ops |
| * rather than mutexes for speed. |
| */ |
| void dvmMethodTraceAdd(Thread* self, const Method* method, int action, |
| u4 cpuClockDiff, u4 wallClockDiff) |
| { |
| MethodTraceState* state = &gDvm.methodTrace; |
| u4 methodVal; |
| int oldOffset, newOffset; |
| u1* ptr; |
| |
| assert(method != NULL); |
| |
| /* |
| * Advance "curOffset" atomically. |
| */ |
| do { |
| oldOffset = state->curOffset; |
| newOffset = oldOffset + state->recordSize; |
| if (newOffset > state->bufferSize) { |
| state->overflow = true; |
| return; |
| } |
| } while (android_atomic_release_cas(oldOffset, newOffset, |
| &state->curOffset) != 0); |
| |
| //assert(METHOD_ACTION((u4) method) == 0); |
| |
| methodVal = METHOD_COMBINE((u4) method, action); |
| |
| /* |
| * Write data into "oldOffset". |
| */ |
| ptr = state->buf + oldOffset; |
| *ptr++ = (u1) self->threadId; |
| *ptr++ = (u1) (self->threadId >> 8); |
| *ptr++ = (u1) methodVal; |
| *ptr++ = (u1) (methodVal >> 8); |
| *ptr++ = (u1) (methodVal >> 16); |
| *ptr++ = (u1) (methodVal >> 24); |
| |
| #if defined(HAVE_POSIX_CLOCKS) |
| if (useThreadCpuClock()) { |
| *ptr++ = (u1) cpuClockDiff; |
| *ptr++ = (u1) (cpuClockDiff >> 8); |
| *ptr++ = (u1) (cpuClockDiff >> 16); |
| *ptr++ = (u1) (cpuClockDiff >> 24); |
| } |
| #endif |
| |
| if (useWallClock()) { |
| *ptr++ = (u1) wallClockDiff; |
| *ptr++ = (u1) (wallClockDiff >> 8); |
| *ptr++ = (u1) (wallClockDiff >> 16); |
| *ptr++ = (u1) (wallClockDiff >> 24); |
| } |
| } |
| |
| |
| /* |
| * Register the METHOD_TRACE_ENTER action for the fast interpreter and |
| * JIT'ed code. |
| */ |
| void dvmFastMethodTraceEnter(const Method* method, Thread* self) |
| { |
| if (self->interpBreak.ctl.subMode & kSubModeMethodTrace) { |
| u4 cpuClockDiff = 0; |
| u4 wallClockDiff = 0; |
| dvmMethodTraceReadClocks(self, &cpuClockDiff, &wallClockDiff); |
| dvmMethodTraceAdd(self, method, METHOD_TRACE_ENTER, cpuClockDiff, |
| wallClockDiff); |
| } |
| } |
| |
| /* |
| * Register the METHOD_TRACE_EXIT action for the fast interpreter and |
| * JIT'ed code for methods. The about-to-return callee method can be |
| * retrieved from self->interpSave.method. |
| */ |
| void dvmFastMethodTraceExit(Thread* self) |
| { |
| if (self->interpBreak.ctl.subMode & kSubModeMethodTrace) { |
| u4 cpuClockDiff = 0; |
| u4 wallClockDiff = 0; |
| dvmMethodTraceReadClocks(self, &cpuClockDiff, &wallClockDiff); |
| dvmMethodTraceAdd(self, self->interpSave.method, |
| METHOD_TRACE_EXIT, cpuClockDiff, wallClockDiff); |
| } |
| } |
| |
| /* |
| * Register the METHOD_TRACE_EXIT action for the fast interpreter and |
| * JIT'ed code for JNI methods. The about-to-return JNI callee method is passed |
| * in explicitly. Also used for inline-execute. |
| */ |
| void dvmFastNativeMethodTraceExit(const Method* method, Thread* self) |
| { |
| if (self->interpBreak.ctl.subMode & kSubModeMethodTrace) { |
| u4 cpuClockDiff = 0; |
| u4 wallClockDiff = 0; |
| dvmMethodTraceReadClocks(self, &cpuClockDiff, &wallClockDiff); |
| dvmMethodTraceAdd(self, method, METHOD_TRACE_EXIT, cpuClockDiff, |
| wallClockDiff); |
| } |
| } |
| |
| /* |
| * We just did something with a method. Emit a record by setting a value |
| * in a magic memory location. |
| */ |
| void dvmEmitEmulatorTrace(const Method* method, int action) |
| { |
| #ifdef UPDATE_MAGIC_PAGE |
| /* |
| * We store the address of the Dalvik bytecodes to the memory-mapped |
| * trace page for normal methods. We also trace calls to native |
| * functions by storing the address of the native function to the |
| * trace page. |
| * Abstract methods don't have any bytecodes, so we don't trace them. |
| * (Abstract methods are never called, but in Dalvik they can be |
| * because we do a "late trap" to a native method to generate the |
| * abstract method exception.) |
| */ |
| if (dvmIsAbstractMethod(method)) |
| return; |
| |
| u4* pMagic = (u4*) gDvm.emulatorTracePage; |
| u4 addr; |
| |
| if (dvmIsNativeMethod(method)) { |
| /* |
| * The "action" parameter is one of: |
| * 0 = ENTER |
| * 1 = EXIT |
| * 2 = UNROLL |
| * To help the trace tools reconstruct the runtime stack containing |
| * a mix of normal plus native methods, we add 4 to the action if this |
| * is a native method. |
| */ |
| action += 4; |
| |
| /* |
| * Get the address of the native function. |
| * This isn't the right address -- how do I get it? |
| * Fortunately, the trace tools can get by without the address, but |
| * it would be nice to fix this. |
| */ |
| addr = (u4) method->nativeFunc; |
| } else { |
| /* |
| * The dexlist output shows the &DexCode.insns offset value, which |
| * is offset from the start of the base DEX header. Method.insns |
| * is the absolute address, effectively offset from the start of |
| * the optimized DEX header. We either need to return the |
| * optimized DEX base file address offset by the right amount, or |
| * take the "real" address and subtract off the size of the |
| * optimized DEX header. |
| * |
| * Would be nice to factor this out at dexlist time, but we can't count |
| * on having access to the correct optimized DEX file. |
| */ |
| assert(method->insns != NULL); |
| const DexOptHeader* pOptHdr = method->clazz->pDvmDex->pDexFile->pOptHeader; |
| addr = (u4) method->insns - pOptHdr->dexOffset; |
| } |
| |
| *(pMagic+action) = addr; |
| LOGVV("Set %p = 0x%08x (%s.%s)", |
| pMagic+action, addr, method->clazz->descriptor, method->name); |
| #endif |
| } |
| |
| /* |
| * The GC calls this when it's about to start. We add a marker to the |
| * trace output so the tool can exclude the GC cost from the results. |
| */ |
| void dvmMethodTraceGCBegin() |
| { |
| TRACE_METHOD_ENTER(dvmThreadSelf(), gDvm.methodTraceGcMethod); |
| } |
| void dvmMethodTraceGCEnd() |
| { |
| TRACE_METHOD_EXIT(dvmThreadSelf(), gDvm.methodTraceGcMethod); |
| } |
| |
| /* |
| * The class loader calls this when it's loading or initializing a class. |
| */ |
| void dvmMethodTraceClassPrepBegin() |
| { |
| TRACE_METHOD_ENTER(dvmThreadSelf(), gDvm.methodTraceClassPrepMethod); |
| } |
| void dvmMethodTraceClassPrepEnd() |
| { |
| TRACE_METHOD_EXIT(dvmThreadSelf(), gDvm.methodTraceClassPrepMethod); |
| } |
| |
| |
| /* |
| * Enable emulator trace info. |
| */ |
| void dvmEmulatorTraceStart() |
| { |
| /* If we could not map the emulator trace page, then do not enable tracing */ |
| if (gDvm.emulatorTracePage == NULL) |
| return; |
| |
| /* in theory we should make this an atomic inc; in practice not important */ |
| gDvm.emulatorTraceEnableCount++; |
| if (gDvm.emulatorTraceEnableCount == 1) |
| ALOGD("--- emulator method traces enabled"); |
| updateActiveProfilers(kSubModeEmulatorTrace, true); |
| } |
| |
| /* |
| * Disable emulator trace info. |
| */ |
| void dvmEmulatorTraceStop() |
| { |
| if (gDvm.emulatorTraceEnableCount == 0) { |
| ALOGE("ERROR: emulator tracing not enabled"); |
| return; |
| } |
| /* in theory we should make this an atomic inc; in practice not important */ |
| gDvm.emulatorTraceEnableCount--; |
| if (gDvm.emulatorTraceEnableCount == 0) |
| ALOGD("--- emulator method traces disabled"); |
| updateActiveProfilers(kSubModeEmulatorTrace, |
| (gDvm.emulatorTraceEnableCount != 0)); |
| } |
| |
| |
| /* |
| * Start instruction counting. |
| */ |
| void dvmStartInstructionCounting() |
| { |
| /* in theory we should make this an atomic inc; in practice not important */ |
| gDvm.instructionCountEnableCount++; |
| updateActiveProfilers(kSubModeInstCounting, true); |
| } |
| |
| /* |
| * Stop instruction counting. |
| */ |
| void dvmStopInstructionCounting() |
| { |
| if (gDvm.instructionCountEnableCount == 0) { |
| ALOGE("ERROR: instruction counting not enabled"); |
| dvmAbort(); |
| } |
| gDvm.instructionCountEnableCount--; |
| updateActiveProfilers(kSubModeInstCounting, |
| (gDvm.instructionCountEnableCount != 0)); |
| } |
| |
| |
| /* |
| * Start alloc counting. Note this doesn't affect the "active profilers" |
| * count, since the interpreter loop is not involved. |
| */ |
| void dvmStartAllocCounting() |
| { |
| gDvm.allocProf.enabled = true; |
| } |
| |
| /* |
| * Stop alloc counting. |
| */ |
| void dvmStopAllocCounting() |
| { |
| gDvm.allocProf.enabled = false; |
| } |