Clean up sampling tracing.
- Moved maps to fields within thread.
- Created temp trace field to lessen amount of traces allocated and freed.
Change-Id: I23fe25a85ad2894cc6917f87d4046cdececf0739
diff --git a/runtime/thread.cc b/runtime/thread.cc
index c4077de..8fc744d 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -961,6 +961,8 @@
jpeer_(NULL),
stack_begin_(NULL),
stack_size_(0),
+ stack_trace_sample_(NULL),
+ trace_clock_base_(0),
thin_lock_id_(0),
tid_(0),
wait_mutex_(new Mutex("a thread wait mutex")),
@@ -1077,6 +1079,7 @@
delete debug_invoke_req_;
delete instrumentation_stack_;
delete name_;
+ delete stack_trace_sample_;
TearDownAlternateSignalStack();
}
diff --git a/runtime/thread.h b/runtime/thread.h
index f16695d..40e3f5f 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -552,6 +552,22 @@
return instrumentation_stack_;
}
+ std::vector<mirror::ArtMethod*>* GetStackTraceSample() const {
+ return stack_trace_sample_;
+ }
+
+ void SetStackTraceSample(std::vector<mirror::ArtMethod*>* sample) {
+ stack_trace_sample_ = sample;
+ }
+
+ uint64_t GetTraceClockBase() const {
+ return trace_clock_base_;
+ }
+
+ void SetTraceClockBase(uint64_t clock_base) {
+ trace_clock_base_ = clock_base;
+ }
+
BaseMutex* GetHeldMutex(LockLevel level) const {
return held_mutexes_[level];
}
@@ -698,6 +714,12 @@
// Size of the stack
size_t stack_size_;
+ // Pointer to previous stack trace captured by sampling profiler.
+ std::vector<mirror::ArtMethod*>* stack_trace_sample_;
+
+ // The clock base used for tracing.
+ uint64_t trace_clock_base_;
+
// Thin lock thread id. This is a small integer used by the thin lock implementation.
// This is not to be confused with the native thread's tid, nor is it the value returned
// by java.lang.Thread.getId --- this is a distinct value, used only for locking. One
diff --git a/runtime/trace.cc b/runtime/trace.cc
index d435129..7dbd35a 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -87,7 +87,7 @@
class BuildStackTraceVisitor : public StackVisitor {
public:
explicit BuildStackTraceVisitor(Thread* thread) : StackVisitor(thread, NULL),
- method_trace_(new std::vector<mirror::ArtMethod*>) {}
+ method_trace_(Trace::AllocStackTrace()) {}
bool VisitFrame() {
mirror::ArtMethod* m = GetMethod();
@@ -126,6 +126,7 @@
bool Trace::sampling_enabled_ = true;
uint32_t Trace::sampling_interval_us_ = 10000;
pthread_t Trace::sampling_pthread_ = 0U;
+UniquePtr<std::vector<mirror::ArtMethod*> > Trace::temp_stack_trace_;
static mirror::ArtMethod* DecodeTraceMethodId(uint32_t tmid) {
return reinterpret_cast<mirror::ArtMethod*>(tmid & ~kTraceMethodActionMask);
@@ -142,12 +143,25 @@
return tmid;
}
+std::vector<mirror::ArtMethod*>* Trace::AllocStackTrace() {
+ if (temp_stack_trace_.get() != NULL) {
+ return temp_stack_trace_.release();
+ } else {
+ return new std::vector<mirror::ArtMethod*>();
+ }
+}
+
+void Trace::FreeStackTrace(std::vector<mirror::ArtMethod*>* stack_trace) {
+ stack_trace->clear();
+ temp_stack_trace_.reset(stack_trace);
+}
+
void Trace::SetDefaultClockSource(ProfilerClockSource clock_source) {
#if defined(HAVE_POSIX_CLOCKS)
default_clock_source_ = clock_source;
#else
if (clock_source != kProfilerClockSourceWall) {
- LOG(WARNING) << "Ignoring tracing request to use ";
+ LOG(WARNING) << "Ignoring tracing request to use CPU time.";
}
#endif
}
@@ -181,7 +195,8 @@
}
}
-static uint32_t GetClockOverhead(Trace* trace) {
+// Compute an average time taken to measure clocks.
+static uint32_t GetClockOverheadNanoSeconds(Trace* trace) {
Thread* self = Thread::Current();
uint64_t start = self->GetCpuMicroTime();
@@ -196,34 +211,34 @@
MeasureClockOverhead(trace);
}
- uint64_t elapsed = self->GetCpuMicroTime() - start;
- return uint32_t (elapsed / 32);
+ uint64_t elapsed_us = self->GetCpuMicroTime() - start;
+ return static_cast<uint32_t>(elapsed_us / 32);
}
// TODO: put this somewhere with the big-endian equivalent used by JDWP.
static void Append2LE(uint8_t* buf, uint16_t val) {
- *buf++ = (uint8_t) val;
- *buf++ = (uint8_t) (val >> 8);
+ *buf++ = static_cast<uint8_t>(val);
+ *buf++ = static_cast<uint8_t>(val >> 8);
}
// TODO: put this somewhere with the big-endian equivalent used by JDWP.
static void Append4LE(uint8_t* buf, uint32_t val) {
- *buf++ = (uint8_t) val;
- *buf++ = (uint8_t) (val >> 8);
- *buf++ = (uint8_t) (val >> 16);
- *buf++ = (uint8_t) (val >> 24);
+ *buf++ = static_cast<uint8_t>(val);
+ *buf++ = static_cast<uint8_t>(val >> 8);
+ *buf++ = static_cast<uint8_t>(val >> 16);
+ *buf++ = static_cast<uint8_t>(val >> 24);
}
// TODO: put this somewhere with the big-endian equivalent used by JDWP.
static void Append8LE(uint8_t* buf, uint64_t val) {
- *buf++ = (uint8_t) val;
- *buf++ = (uint8_t) (val >> 8);
- *buf++ = (uint8_t) (val >> 16);
- *buf++ = (uint8_t) (val >> 24);
- *buf++ = (uint8_t) (val >> 32);
- *buf++ = (uint8_t) (val >> 40);
- *buf++ = (uint8_t) (val >> 48);
- *buf++ = (uint8_t) (val >> 56);
+ *buf++ = static_cast<uint8_t>(val);
+ *buf++ = static_cast<uint8_t>(val >> 8);
+ *buf++ = static_cast<uint8_t>(val >> 16);
+ *buf++ = static_cast<uint8_t>(val >> 24);
+ *buf++ = static_cast<uint8_t>(val >> 32);
+ *buf++ = static_cast<uint8_t>(val >> 40);
+ *buf++ = static_cast<uint8_t>(val >> 48);
+ *buf++ = static_cast<uint8_t>(val >> 56);
}
static void GetSample(Thread* thread, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -234,14 +249,22 @@
the_trace->CompareAndUpdateStackTrace(thread, stack_trace);
}
+static void ClearThreadStackTraceAndClockBase(Thread* thread, void* arg) {
+ thread->SetTraceClockBase(0);
+ std::vector<mirror::ArtMethod*>* stack_trace = thread->GetStackTraceSample();
+ thread->SetStackTraceSample(NULL);
+ delete stack_trace;
+}
+
void Trace::CompareAndUpdateStackTrace(Thread* thread,
std::vector<mirror::ArtMethod*>* stack_trace) {
CHECK_EQ(pthread_self(), sampling_pthread_);
- SafeMap<Thread*, std::vector<mirror::ArtMethod*>*>::iterator map_it = thread_stack_trace_map_.find(thread);
- if (map_it == thread_stack_trace_map_.end()) {
- // If there's no existing stack trace in the map for this thread, log an entry event for all
+ std::vector<mirror::ArtMethod*>* old_stack_trace = thread->GetStackTraceSample();
+ // Update the thread's stack trace sample.
+ thread->SetStackTraceSample(stack_trace);
+ if (old_stack_trace == NULL) {
+ // If there's no previous stack trace sample for this thread, log an entry event for all
// methods in the trace.
- thread_stack_trace_map_.Put(thread, stack_trace);
for (std::vector<mirror::ArtMethod*>::reverse_iterator rit = stack_trace->rbegin();
rit != stack_trace->rend(); ++rit) {
LogMethodTraceEvent(thread, *rit, instrumentation::Instrumentation::kMethodEntered);
@@ -249,11 +272,8 @@
} else {
// If there's a previous stack trace for this thread, diff the traces and emit entry and exit
// events accordingly.
- std::vector<mirror::ArtMethod*>* old_stack_trace = map_it->second;
- thread_stack_trace_map_.Overwrite(thread, stack_trace);
std::vector<mirror::ArtMethod*>::reverse_iterator old_rit = old_stack_trace->rbegin();
std::vector<mirror::ArtMethod*>::reverse_iterator rit = stack_trace->rbegin();
-
// Iterate bottom-up over both traces until there's a difference between them.
while (old_rit != old_stack_trace->rend() && rit != stack_trace->rend() && *old_rit == *rit) {
old_rit++;
@@ -268,7 +288,7 @@
for (; rit != stack_trace->rend(); ++rit) {
LogMethodTraceEvent(thread, *rit, instrumentation::Instrumentation::kMethodEntered);
}
- delete old_stack_trace;
+ FreeStackTrace(old_stack_trace);
}
}
@@ -279,7 +299,7 @@
while (true) {
usleep(sampling_interval_us_);
-
+ ATRACE_BEGIN("Profile sampling");
Thread* self = Thread::Current();
Trace* the_trace;
{
@@ -296,6 +316,7 @@
runtime->GetThreadList()->ForEach(GetSample, the_trace);
}
runtime->GetThreadList()->ResumeAll();
+ ATRACE_END();
}
runtime->DetachCurrentThread();
@@ -379,7 +400,10 @@
if (the_trace != NULL) {
the_trace->FinishTracing();
- if (!sampling_enabled_) {
+ if (sampling_enabled_) {
+ MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
+ runtime->GetThreadList()->ForEach(ClearThreadStackTraceAndClockBase, NULL);
+ } else {
runtime->GetInstrumentation()->RemoveListener(the_trace,
instrumentation::Instrumentation::kMethodEntered |
instrumentation::Instrumentation::kMethodExited |
@@ -427,7 +451,6 @@
Trace::~Trace() {
CHECK_EQ(sampling_pthread_, static_cast<pthread_t>(0U));
- STLDeleteValues(&thread_stack_trace_map_);
}
static void DumpBuf(uint8_t* buf, size_t buf_size, ProfilerClockSource clock_source)
@@ -449,7 +472,7 @@
uint64_t elapsed = MicroTime() - start_time_;
size_t final_offset = cur_offset_;
- uint32_t clock_overhead = GetClockOverhead(this);
+ uint32_t clock_overhead_ns = GetClockOverheadNanoSeconds(this);
if ((flags_ & kTraceCountAllocs) != 0) {
Runtime::Current()->SetStatsEnabled(false);
@@ -475,7 +498,7 @@
os << StringPrintf("elapsed-time-usec=%llu\n", elapsed);
size_t num_records = (final_offset - kTraceHeaderLength) / GetRecordSize(clock_source_);
os << StringPrintf("num-method-calls=%zd\n", num_records);
- os << StringPrintf("clock-call-overhead-nsec=%d\n", clock_overhead);
+ os << StringPrintf("clock-call-overhead-nsec=%d\n", clock_overhead_ns);
os << StringPrintf("vm=art\n");
if ((flags_ & kTraceCountAllocs) != 0) {
os << StringPrintf("alloc-count=%d\n", Runtime::Current()->GetStat(KIND_ALLOCATED_OBJECTS));
@@ -578,16 +601,14 @@
ptr += 6;
if (UseThreadCpuClock()) {
- // TODO: this isn't vaguely thread safe.
- SafeMap<Thread*, uint64_t>::iterator it = thread_clock_base_map_.find(thread);
+ uint64_t clock_base = thread->GetTraceClockBase();
uint32_t thread_clock_diff = 0;
- if (UNLIKELY(it == thread_clock_base_map_.end())) {
- // First event, the diff is 0, record the base time in the map.
+ if (UNLIKELY(clock_base == 0)) {
+ // First event, record the base time in the map.
uint64_t time = thread->GetCpuMicroTime();
- thread_clock_base_map_.Put(thread, time);
+ thread->SetTraceClockBase(time);
} else {
- uint64_t thread_clock_base = it->second;
- thread_clock_diff = thread->GetCpuMicroTime() - thread_clock_base;
+ thread_clock_diff = thread->GetCpuMicroTime() - clock_base;
}
Append4LE(ptr, thread_clock_diff);
ptr += 4;
diff --git a/runtime/trace.h b/runtime/trace.h
index 5733929..9fe2cc6 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -83,6 +83,11 @@
mirror::Throwable* exception_object)
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+ // Reuse an old stack trace if it exists, otherwise allocate a new one.
+ static std::vector<mirror::ArtMethod*>* AllocStackTrace();
+ // Clear and store an old stack trace for later use.
+ static void FreeStackTrace(std::vector<mirror::ArtMethod*>* stack_trace);
+
~Trace();
private:
@@ -116,11 +121,8 @@
// Sampling thread, non-zero when sampling.
static pthread_t sampling_pthread_;
- // Maps a thread to its most recent stack trace sample.
- SafeMap<Thread*, std::vector<mirror::ArtMethod*>*> thread_stack_trace_map_;
-
- // Maps a thread to its clock base.
- SafeMap<Thread*, uint64_t> thread_clock_base_map_;
+ // Used to remember an unused stack trace to avoid re-allocation during sampling.
+ static UniquePtr<std::vector<mirror::ArtMethod*> > temp_stack_trace_;
// File to write trace data out to, NULL if direct to ddms.
UniquePtr<File> trace_file_;