| // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "base/tracked_objects.h" |
| |
| #include <limits.h> |
| #include <stdlib.h> |
| |
| #include "base/atomicops.h" |
| #include "base/base_switches.h" |
| #include "base/command_line.h" |
| #include "base/compiler_specific.h" |
| #include "base/debug/leak_annotations.h" |
| #include "base/logging.h" |
| #include "base/process/process_handle.h" |
| #include "base/strings/stringprintf.h" |
| #include "base/third_party/valgrind/memcheck.h" |
| #include "base/tracking_info.h" |
| #include "build/build_config.h" |
| |
| using base::TimeDelta; |
| |
| namespace base { |
| class TimeDelta; |
| } |
| |
| namespace tracked_objects { |
| |
| namespace { |
| // When ThreadData is first initialized, should we start in an ACTIVE state to |
| // record all of the startup-time tasks, or should we start up DEACTIVATED, so |
| // that we only record after parsing the command line flag --enable-tracking. |
| // Note that the flag may force either state, so this really controls only the |
| // period of time up until that flag is parsed. If there is no flag seen, then |
| // this state may prevail for much or all of the process lifetime. |
| const ThreadData::Status kInitialStartupState = ThreadData::PROFILING_ACTIVE; |
| |
| // Possible states of the profiler timing enabledness. |
| enum { |
| UNDEFINED_TIMING, |
| ENABLED_TIMING, |
| DISABLED_TIMING, |
| }; |
| |
| // State of the profiler timing enabledness. |
| base::subtle::Atomic32 g_profiler_timing_enabled = UNDEFINED_TIMING; |
| |
| // Returns whether profiler timing is enabled. The default is true, but this |
| // may be overridden by a command-line flag. Some platforms may |
| // programmatically set this command-line flag to the "off" value if it's not |
| // specified. |
| // This in turn can be overridden by explicitly calling |
| // ThreadData::EnableProfilerTiming, say, based on a field trial. |
| inline bool IsProfilerTimingEnabled() { |
| // Reading |g_profiler_timing_enabled| is done without barrier because |
| // multiple initialization is not an issue while the barrier can be relatively |
| // costly given that this method is sometimes called in a tight loop. |
| base::subtle::Atomic32 current_timing_enabled = |
| base::subtle::NoBarrier_Load(&g_profiler_timing_enabled); |
| if (current_timing_enabled == UNDEFINED_TIMING) { |
| if (!base::CommandLine::InitializedForCurrentProcess()) |
| return true; |
| current_timing_enabled = |
| (base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII( |
| switches::kProfilerTiming) == |
| switches::kProfilerTimingDisabledValue) |
| ? DISABLED_TIMING |
| : ENABLED_TIMING; |
| base::subtle::NoBarrier_Store(&g_profiler_timing_enabled, |
| current_timing_enabled); |
| } |
| return current_timing_enabled == ENABLED_TIMING; |
| } |
| |
| } // namespace |
| |
| //------------------------------------------------------------------------------ |
| // DeathData tallies durations when a death takes place. |
| |
| DeathData::DeathData() |
| : count_(0), |
| sample_probability_count_(0), |
| run_duration_sum_(0), |
| queue_duration_sum_(0), |
| run_duration_max_(0), |
| queue_duration_max_(0), |
| run_duration_sample_(0), |
| queue_duration_sample_(0), |
| last_phase_snapshot_(nullptr) { |
| } |
| |
| DeathData::DeathData(const DeathData& other) |
| : count_(other.count_), |
| sample_probability_count_(other.sample_probability_count_), |
| run_duration_sum_(other.run_duration_sum_), |
| queue_duration_sum_(other.queue_duration_sum_), |
| run_duration_max_(other.run_duration_max_), |
| queue_duration_max_(other.queue_duration_max_), |
| run_duration_sample_(other.run_duration_sample_), |
| queue_duration_sample_(other.queue_duration_sample_), |
| last_phase_snapshot_(nullptr) { |
| // This constructor will be used by std::map when adding new DeathData values |
| // to the map. At that point, last_phase_snapshot_ is still NULL, so we don't |
| // need to worry about ownership transfer. |
| DCHECK(other.last_phase_snapshot_ == nullptr); |
| } |
| |
| DeathData::~DeathData() { |
| while (last_phase_snapshot_) { |
| const DeathDataPhaseSnapshot* snapshot = last_phase_snapshot_; |
| last_phase_snapshot_ = snapshot->prev; |
| delete snapshot; |
| } |
| } |
| |
| // TODO(jar): I need to see if this macro to optimize branching is worth using. |
| // |
| // This macro has no branching, so it is surely fast, and is equivalent to: |
| // if (assign_it) |
| // target = source; |
| // We use a macro rather than a template to force this to inline. |
| // Related code for calculating max is discussed on the web. |
| #define CONDITIONAL_ASSIGN(assign_it, target, source) \ |
| ((target) ^= ((target) ^ (source)) & -static_cast<int32_t>(assign_it)) |
| |
| void DeathData::RecordDeath(const int32_t queue_duration, |
| const int32_t run_duration, |
| const uint32_t random_number) { |
| // We'll just clamp at INT_MAX, but we should note this in the UI as such. |
| if (count_ < INT_MAX) |
| base::subtle::NoBarrier_Store(&count_, count_ + 1); |
| |
| int sample_probability_count = |
| base::subtle::NoBarrier_Load(&sample_probability_count_); |
| if (sample_probability_count < INT_MAX) |
| ++sample_probability_count; |
| base::subtle::NoBarrier_Store(&sample_probability_count_, |
| sample_probability_count); |
| |
| base::subtle::NoBarrier_Store(&queue_duration_sum_, |
| queue_duration_sum_ + queue_duration); |
| base::subtle::NoBarrier_Store(&run_duration_sum_, |
| run_duration_sum_ + run_duration); |
| |
| if (queue_duration_max() < queue_duration) |
| base::subtle::NoBarrier_Store(&queue_duration_max_, queue_duration); |
| if (run_duration_max() < run_duration) |
| base::subtle::NoBarrier_Store(&run_duration_max_, run_duration); |
| |
| // Take a uniformly distributed sample over all durations ever supplied during |
| // the current profiling phase. |
| // The probability that we (instead) use this new sample is |
| // 1/sample_probability_count_. This results in a completely uniform selection |
| // of the sample (at least when we don't clamp sample_probability_count_... |
| // but that should be inconsequentially likely). We ignore the fact that we |
| // correlated our selection of a sample to the run and queue times (i.e., we |
| // used them to generate random_number). |
| CHECK_GT(sample_probability_count, 0); |
| if (0 == (random_number % sample_probability_count)) { |
| base::subtle::NoBarrier_Store(&queue_duration_sample_, queue_duration); |
| base::subtle::NoBarrier_Store(&run_duration_sample_, run_duration); |
| } |
| } |
| |
| void DeathData::OnProfilingPhaseCompleted(int profiling_phase) { |
| // Snapshotting and storing current state. |
| last_phase_snapshot_ = new DeathDataPhaseSnapshot( |
| profiling_phase, count(), run_duration_sum(), run_duration_max(), |
| run_duration_sample(), queue_duration_sum(), queue_duration_max(), |
| queue_duration_sample(), last_phase_snapshot_); |
| |
| // Not touching fields for which a delta can be computed by comparing with a |
| // snapshot from the previous phase. Resetting other fields. Sample values |
| // will be reset upon next death recording because sample_probability_count_ |
| // is set to 0. |
| // We avoid resetting to 0 in favor of deltas whenever possible. The reason |
| // is that for incrementable fields, resetting to 0 from the snapshot thread |
| // potentially in parallel with incrementing in the death thread may result in |
| // significant data corruption that has a potential to grow with time. Not |
| // resetting incrementable fields and using deltas will cause any |
| // off-by-little corruptions to be likely fixed at the next snapshot. |
| // The max values are not incrementable, and cannot be deduced using deltas |
| // for a given phase. Hence, we have to reset them to 0. But the potential |
| // damage is limited to getting the previous phase's max to apply for the next |
| // phase, and the error doesn't have a potential to keep growing with new |
| // resets. |
| // sample_probability_count_ is incrementable, but must be reset to 0 at the |
| // phase end, so that we start a new uniformly randomized sample selection |
| // after the reset. These fields are updated using atomics. However, race |
| // conditions are possible since these are updated individually and not |
| // together atomically, resulting in the values being mutually inconsistent. |
| // The damage is limited to selecting a wrong sample, which is not something |
| // that can cause accumulating or cascading effects. |
| // If there were no inconsistencies caused by race conditions, we never send a |
| // sample for the previous phase in the next phase's snapshot because |
| // ThreadData::SnapshotExecutedTasks doesn't send deltas with 0 count. |
| base::subtle::NoBarrier_Store(&sample_probability_count_, 0); |
| base::subtle::NoBarrier_Store(&run_duration_max_, 0); |
| base::subtle::NoBarrier_Store(&queue_duration_max_, 0); |
| } |
| |
| //------------------------------------------------------------------------------ |
| DeathDataSnapshot::DeathDataSnapshot() |
| : count(-1), |
| run_duration_sum(-1), |
| run_duration_max(-1), |
| run_duration_sample(-1), |
| queue_duration_sum(-1), |
| queue_duration_max(-1), |
| queue_duration_sample(-1) { |
| } |
| |
| DeathDataSnapshot::DeathDataSnapshot(int count, |
| int32_t run_duration_sum, |
| int32_t run_duration_max, |
| int32_t run_duration_sample, |
| int32_t queue_duration_sum, |
| int32_t queue_duration_max, |
| int32_t queue_duration_sample) |
| : count(count), |
| run_duration_sum(run_duration_sum), |
| run_duration_max(run_duration_max), |
| run_duration_sample(run_duration_sample), |
| queue_duration_sum(queue_duration_sum), |
| queue_duration_max(queue_duration_max), |
| queue_duration_sample(queue_duration_sample) {} |
| |
| DeathDataSnapshot::~DeathDataSnapshot() { |
| } |
| |
| DeathDataSnapshot DeathDataSnapshot::Delta( |
| const DeathDataSnapshot& older) const { |
| return DeathDataSnapshot(count - older.count, |
| run_duration_sum - older.run_duration_sum, |
| run_duration_max, run_duration_sample, |
| queue_duration_sum - older.queue_duration_sum, |
| queue_duration_max, queue_duration_sample); |
| } |
| |
| //------------------------------------------------------------------------------ |
| BirthOnThread::BirthOnThread(const Location& location, |
| const ThreadData& current) |
| : location_(location), |
| birth_thread_(¤t) { |
| } |
| |
| //------------------------------------------------------------------------------ |
| BirthOnThreadSnapshot::BirthOnThreadSnapshot() { |
| } |
| |
| BirthOnThreadSnapshot::BirthOnThreadSnapshot(const BirthOnThread& birth) |
| : location(birth.location()), |
| thread_name(birth.birth_thread()->thread_name()) { |
| } |
| |
| BirthOnThreadSnapshot::~BirthOnThreadSnapshot() { |
| } |
| |
| //------------------------------------------------------------------------------ |
| Births::Births(const Location& location, const ThreadData& current) |
| : BirthOnThread(location, current), |
| birth_count_(1) { } |
| |
| int Births::birth_count() const { return birth_count_; } |
| |
| void Births::RecordBirth() { ++birth_count_; } |
| |
| //------------------------------------------------------------------------------ |
| // ThreadData maintains the central data for all births and deaths on a single |
| // thread. |
| |
| // TODO(jar): We should pull all these static vars together, into a struct, and |
| // optimize layout so that we benefit from locality of reference during accesses |
| // to them. |
| |
| // static |
| ThreadData::NowFunction* ThreadData::now_function_for_testing_ = NULL; |
| |
| // A TLS slot which points to the ThreadData instance for the current thread. |
| // We do a fake initialization here (zeroing out data), and then the real |
| // in-place construction happens when we call tls_index_.Initialize(). |
| // static |
| base::ThreadLocalStorage::StaticSlot ThreadData::tls_index_ = TLS_INITIALIZER; |
| |
| // static |
| int ThreadData::worker_thread_data_creation_count_ = 0; |
| |
| // static |
| int ThreadData::cleanup_count_ = 0; |
| |
| // static |
| int ThreadData::incarnation_counter_ = 0; |
| |
| // static |
| ThreadData* ThreadData::all_thread_data_list_head_ = NULL; |
| |
| // static |
| ThreadData* ThreadData::first_retired_worker_ = NULL; |
| |
| // static |
| base::LazyInstance<base::Lock>::Leaky |
| ThreadData::list_lock_ = LAZY_INSTANCE_INITIALIZER; |
| |
| // static |
| base::subtle::Atomic32 ThreadData::status_ = ThreadData::UNINITIALIZED; |
| |
| ThreadData::ThreadData(const std::string& suggested_name) |
| : next_(NULL), |
| next_retired_worker_(NULL), |
| worker_thread_number_(0), |
| incarnation_count_for_pool_(-1), |
| current_stopwatch_(NULL) { |
| DCHECK_GE(suggested_name.size(), 0u); |
| thread_name_ = suggested_name; |
| PushToHeadOfList(); // Which sets real incarnation_count_for_pool_. |
| } |
| |
| ThreadData::ThreadData(int thread_number) |
| : next_(NULL), |
| next_retired_worker_(NULL), |
| worker_thread_number_(thread_number), |
| incarnation_count_for_pool_(-1), |
| current_stopwatch_(NULL) { |
| CHECK_GT(thread_number, 0); |
| base::StringAppendF(&thread_name_, "WorkerThread-%d", thread_number); |
| PushToHeadOfList(); // Which sets real incarnation_count_for_pool_. |
| } |
| |
| ThreadData::~ThreadData() { |
| } |
| |
| void ThreadData::PushToHeadOfList() { |
| // Toss in a hint of randomness (atop the uniniitalized value). |
| (void)VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(&random_number_, |
| sizeof(random_number_)); |
| MSAN_UNPOISON(&random_number_, sizeof(random_number_)); |
| random_number_ += static_cast<uint32_t>(this - static_cast<ThreadData*>(0)); |
| random_number_ ^= (Now() - TrackedTime()).InMilliseconds(); |
| |
| DCHECK(!next_); |
| base::AutoLock lock(*list_lock_.Pointer()); |
| incarnation_count_for_pool_ = incarnation_counter_; |
| next_ = all_thread_data_list_head_; |
| all_thread_data_list_head_ = this; |
| } |
| |
| // static |
| ThreadData* ThreadData::first() { |
| base::AutoLock lock(*list_lock_.Pointer()); |
| return all_thread_data_list_head_; |
| } |
| |
| ThreadData* ThreadData::next() const { return next_; } |
| |
| // static |
| void ThreadData::InitializeThreadContext(const std::string& suggested_name) { |
| Initialize(); |
| ThreadData* current_thread_data = |
| reinterpret_cast<ThreadData*>(tls_index_.Get()); |
| if (current_thread_data) |
| return; // Browser tests instigate this. |
| current_thread_data = new ThreadData(suggested_name); |
| tls_index_.Set(current_thread_data); |
| } |
| |
| // static |
| ThreadData* ThreadData::Get() { |
| if (!tls_index_.initialized()) |
| return NULL; // For unittests only. |
| ThreadData* registered = reinterpret_cast<ThreadData*>(tls_index_.Get()); |
| if (registered) |
| return registered; |
| |
| // We must be a worker thread, since we didn't pre-register. |
| ThreadData* worker_thread_data = NULL; |
| int worker_thread_number = 0; |
| { |
| base::AutoLock lock(*list_lock_.Pointer()); |
| if (first_retired_worker_) { |
| worker_thread_data = first_retired_worker_; |
| first_retired_worker_ = first_retired_worker_->next_retired_worker_; |
| worker_thread_data->next_retired_worker_ = NULL; |
| } else { |
| worker_thread_number = ++worker_thread_data_creation_count_; |
| } |
| } |
| |
| // If we can't find a previously used instance, then we have to create one. |
| if (!worker_thread_data) { |
| DCHECK_GT(worker_thread_number, 0); |
| worker_thread_data = new ThreadData(worker_thread_number); |
| } |
| DCHECK_GT(worker_thread_data->worker_thread_number_, 0); |
| |
| tls_index_.Set(worker_thread_data); |
| return worker_thread_data; |
| } |
| |
| // static |
| void ThreadData::OnThreadTermination(void* thread_data) { |
| DCHECK(thread_data); // TLS should *never* call us with a NULL. |
| // We must NOT do any allocations during this callback. There is a chance |
| // that the allocator is no longer active on this thread. |
| reinterpret_cast<ThreadData*>(thread_data)->OnThreadTerminationCleanup(); |
| } |
| |
| void ThreadData::OnThreadTerminationCleanup() { |
| // The list_lock_ was created when we registered the callback, so it won't be |
| // allocated here despite the lazy reference. |
| base::AutoLock lock(*list_lock_.Pointer()); |
| if (incarnation_counter_ != incarnation_count_for_pool_) |
| return; // ThreadData was constructed in an earlier unit test. |
| ++cleanup_count_; |
| // Only worker threads need to be retired and reused. |
| if (!worker_thread_number_) { |
| return; |
| } |
| // We must NOT do any allocations during this callback. |
| // Using the simple linked lists avoids all allocations. |
| DCHECK_EQ(this->next_retired_worker_, reinterpret_cast<ThreadData*>(NULL)); |
| this->next_retired_worker_ = first_retired_worker_; |
| first_retired_worker_ = this; |
| } |
| |
| // static |
| void ThreadData::Snapshot(int current_profiling_phase, |
| ProcessDataSnapshot* process_data_snapshot) { |
| // Get an unchanging copy of a ThreadData list. |
| ThreadData* my_list = ThreadData::first(); |
| |
| // Gather data serially. |
| // This hackish approach *can* get some slightly corrupt tallies, as we are |
| // grabbing values without the protection of a lock, but it has the advantage |
| // of working even with threads that don't have message loops. If a user |
| // sees any strangeness, they can always just run their stats gathering a |
| // second time. |
| BirthCountMap birth_counts; |
| for (ThreadData* thread_data = my_list; thread_data; |
| thread_data = thread_data->next()) { |
| thread_data->SnapshotExecutedTasks(current_profiling_phase, |
| &process_data_snapshot->phased_snapshots, |
| &birth_counts); |
| } |
| |
| // Add births that are still active -- i.e. objects that have tallied a birth, |
| // but have not yet tallied a matching death, and hence must be either |
| // running, queued up, or being held in limbo for future posting. |
| auto* current_phase_tasks = |
| &process_data_snapshot->phased_snapshots[current_profiling_phase].tasks; |
| for (const auto& birth_count : birth_counts) { |
| if (birth_count.second > 0) { |
| current_phase_tasks->push_back( |
| TaskSnapshot(BirthOnThreadSnapshot(*birth_count.first), |
| DeathDataSnapshot(birth_count.second, 0, 0, 0, 0, 0, 0), |
| "Still_Alive")); |
| } |
| } |
| } |
| |
| // static |
| void ThreadData::OnProfilingPhaseCompleted(int profiling_phase) { |
| // Get an unchanging copy of a ThreadData list. |
| ThreadData* my_list = ThreadData::first(); |
| |
| // Add snapshots for all instances of death data in all threads serially. |
| // This hackish approach *can* get some slightly corrupt tallies, as we are |
| // grabbing values without the protection of a lock, but it has the advantage |
| // of working even with threads that don't have message loops. Any corruption |
| // shouldn't cause "cascading damage" to anything else (in later phases). |
| for (ThreadData* thread_data = my_list; thread_data; |
| thread_data = thread_data->next()) { |
| thread_data->OnProfilingPhaseCompletedOnThread(profiling_phase); |
| } |
| } |
| |
| Births* ThreadData::TallyABirth(const Location& location) { |
| BirthMap::iterator it = birth_map_.find(location); |
| Births* child; |
| if (it != birth_map_.end()) { |
| child = it->second; |
| child->RecordBirth(); |
| } else { |
| child = new Births(location, *this); // Leak this. |
| // Lock since the map may get relocated now, and other threads sometimes |
| // snapshot it (but they lock before copying it). |
| base::AutoLock lock(map_lock_); |
| birth_map_[location] = child; |
| } |
| |
| return child; |
| } |
| |
| void ThreadData::TallyADeath(const Births& births, |
| int32_t queue_duration, |
| const TaskStopwatch& stopwatch) { |
| int32_t run_duration = stopwatch.RunDurationMs(); |
| |
| // Stir in some randomness, plus add constant in case durations are zero. |
| const uint32_t kSomePrimeNumber = 2147483647; |
| random_number_ += queue_duration + run_duration + kSomePrimeNumber; |
| // An address is going to have some randomness to it as well ;-). |
| random_number_ ^= |
| static_cast<uint32_t>(&births - reinterpret_cast<Births*>(0)); |
| |
| DeathMap::iterator it = death_map_.find(&births); |
| DeathData* death_data; |
| if (it != death_map_.end()) { |
| death_data = &it->second; |
| } else { |
| base::AutoLock lock(map_lock_); // Lock as the map may get relocated now. |
| death_data = &death_map_[&births]; |
| } // Release lock ASAP. |
| death_data->RecordDeath(queue_duration, run_duration, random_number_); |
| } |
| |
| // static |
| Births* ThreadData::TallyABirthIfActive(const Location& location) { |
| if (!TrackingStatus()) |
| return NULL; |
| ThreadData* current_thread_data = Get(); |
| if (!current_thread_data) |
| return NULL; |
| return current_thread_data->TallyABirth(location); |
| } |
| |
| // static |
| void ThreadData::TallyRunOnNamedThreadIfTracking( |
| const base::TrackingInfo& completed_task, |
| const TaskStopwatch& stopwatch) { |
| // Even if we have been DEACTIVATED, we will process any pending births so |
| // that our data structures (which counted the outstanding births) remain |
| // consistent. |
| const Births* births = completed_task.birth_tally; |
| if (!births) |
| return; |
| ThreadData* current_thread_data = stopwatch.GetThreadData(); |
| if (!current_thread_data) |
| return; |
| |
| // Watch out for a race where status_ is changing, and hence one or both |
| // of start_of_run or end_of_run is zero. In that case, we didn't bother to |
| // get a time value since we "weren't tracking" and we were trying to be |
| // efficient by not calling for a genuine time value. For simplicity, we'll |
| // use a default zero duration when we can't calculate a true value. |
| TrackedTime start_of_run = stopwatch.StartTime(); |
| int32_t queue_duration = 0; |
| if (!start_of_run.is_null()) { |
| queue_duration = (start_of_run - completed_task.EffectiveTimePosted()) |
| .InMilliseconds(); |
| } |
| current_thread_data->TallyADeath(*births, queue_duration, stopwatch); |
| } |
| |
| // static |
| void ThreadData::TallyRunOnWorkerThreadIfTracking( |
| const Births* births, |
| const TrackedTime& time_posted, |
| const TaskStopwatch& stopwatch) { |
| // Even if we have been DEACTIVATED, we will process any pending births so |
| // that our data structures (which counted the outstanding births) remain |
| // consistent. |
| if (!births) |
| return; |
| |
| // TODO(jar): Support the option to coalesce all worker-thread activity under |
| // one ThreadData instance that uses locks to protect *all* access. This will |
| // reduce memory (making it provably bounded), but run incrementally slower |
| // (since we'll use locks on TallyABirth and TallyADeath). The good news is |
| // that the locks on TallyADeath will be *after* the worker thread has run, |
| // and hence nothing will be waiting for the completion (... besides some |
| // other thread that might like to run). Also, the worker threads tasks are |
| // generally longer, and hence the cost of the lock may perchance be amortized |
| // over the long task's lifetime. |
| ThreadData* current_thread_data = stopwatch.GetThreadData(); |
| if (!current_thread_data) |
| return; |
| |
| TrackedTime start_of_run = stopwatch.StartTime(); |
| int32_t queue_duration = 0; |
| if (!start_of_run.is_null()) { |
| queue_duration = (start_of_run - time_posted).InMilliseconds(); |
| } |
| current_thread_data->TallyADeath(*births, queue_duration, stopwatch); |
| } |
| |
| // static |
| void ThreadData::TallyRunInAScopedRegionIfTracking( |
| const Births* births, |
| const TaskStopwatch& stopwatch) { |
| // Even if we have been DEACTIVATED, we will process any pending births so |
| // that our data structures (which counted the outstanding births) remain |
| // consistent. |
| if (!births) |
| return; |
| |
| ThreadData* current_thread_data = stopwatch.GetThreadData(); |
| if (!current_thread_data) |
| return; |
| |
| int32_t queue_duration = 0; |
| current_thread_data->TallyADeath(*births, queue_duration, stopwatch); |
| } |
| |
| void ThreadData::SnapshotExecutedTasks( |
| int current_profiling_phase, |
| PhasedProcessDataSnapshotMap* phased_snapshots, |
| BirthCountMap* birth_counts) { |
| // Get copy of data, so that the data will not change during the iterations |
| // and processing. |
| BirthMap birth_map; |
| DeathsSnapshot deaths; |
| SnapshotMaps(current_profiling_phase, &birth_map, &deaths); |
| |
| for (const auto& birth : birth_map) { |
| (*birth_counts)[birth.second] += birth.second->birth_count(); |
| } |
| |
| for (const auto& death : deaths) { |
| (*birth_counts)[death.first] -= death.first->birth_count(); |
| |
| // For the current death data, walk through all its snapshots, starting from |
| // the current one, then from the previous profiling phase etc., and for |
| // each snapshot calculate the delta between the snapshot and the previous |
| // phase, if any. Store the deltas in the result. |
| for (const DeathDataPhaseSnapshot* phase = &death.second; phase; |
| phase = phase->prev) { |
| const DeathDataSnapshot& death_data = |
| phase->prev ? phase->death_data.Delta(phase->prev->death_data) |
| : phase->death_data; |
| |
| if (death_data.count > 0) { |
| (*phased_snapshots)[phase->profiling_phase].tasks.push_back( |
| TaskSnapshot(BirthOnThreadSnapshot(*death.first), death_data, |
| thread_name())); |
| } |
| } |
| } |
| } |
| |
| // This may be called from another thread. |
| void ThreadData::SnapshotMaps(int profiling_phase, |
| BirthMap* birth_map, |
| DeathsSnapshot* deaths) { |
| base::AutoLock lock(map_lock_); |
| |
| for (const auto& birth : birth_map_) |
| (*birth_map)[birth.first] = birth.second; |
| |
| for (const auto& death : death_map_) { |
| deaths->push_back(std::make_pair( |
| death.first, |
| DeathDataPhaseSnapshot(profiling_phase, death.second.count(), |
| death.second.run_duration_sum(), |
| death.second.run_duration_max(), |
| death.second.run_duration_sample(), |
| death.second.queue_duration_sum(), |
| death.second.queue_duration_max(), |
| death.second.queue_duration_sample(), |
| death.second.last_phase_snapshot()))); |
| } |
| } |
| |
| void ThreadData::OnProfilingPhaseCompletedOnThread(int profiling_phase) { |
| base::AutoLock lock(map_lock_); |
| |
| for (auto& death : death_map_) { |
| death.second.OnProfilingPhaseCompleted(profiling_phase); |
| } |
| } |
| |
| void ThreadData::Initialize() { |
| if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED) |
| return; // Someone else did the initialization. |
| // Due to racy lazy initialization in tests, we'll need to recheck status_ |
| // after we acquire the lock. |
| |
| // Ensure that we don't double initialize tls. We are called when single |
| // threaded in the product, but some tests may be racy and lazy about our |
| // initialization. |
| base::AutoLock lock(*list_lock_.Pointer()); |
| if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED) |
| return; // Someone raced in here and beat us. |
| |
| // Perform the "real" TLS initialization now, and leave it intact through |
| // process termination. |
| if (!tls_index_.initialized()) { // Testing may have initialized this. |
| DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), UNINITIALIZED); |
| tls_index_.Initialize(&ThreadData::OnThreadTermination); |
| DCHECK(tls_index_.initialized()); |
| } else { |
| // TLS was initialzed for us earlier. |
| DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), DORMANT_DURING_TESTS); |
| } |
| |
| // Incarnation counter is only significant to testing, as it otherwise will |
| // never again change in this process. |
| ++incarnation_counter_; |
| |
| // The lock is not critical for setting status_, but it doesn't hurt. It also |
| // ensures that if we have a racy initialization, that we'll bail as soon as |
| // we get the lock earlier in this method. |
| base::subtle::Release_Store(&status_, kInitialStartupState); |
| DCHECK(base::subtle::NoBarrier_Load(&status_) != UNINITIALIZED); |
| } |
| |
| // static |
| void ThreadData::InitializeAndSetTrackingStatus(Status status) { |
| DCHECK_GE(status, DEACTIVATED); |
| DCHECK_LE(status, PROFILING_ACTIVE); |
| |
| Initialize(); // No-op if already initialized. |
| |
| if (status > DEACTIVATED) |
| status = PROFILING_ACTIVE; |
| base::subtle::Release_Store(&status_, status); |
| } |
| |
| // static |
| ThreadData::Status ThreadData::status() { |
| return static_cast<ThreadData::Status>(base::subtle::Acquire_Load(&status_)); |
| } |
| |
| // static |
| bool ThreadData::TrackingStatus() { |
| return base::subtle::Acquire_Load(&status_) > DEACTIVATED; |
| } |
| |
| // static |
| void ThreadData::EnableProfilerTiming() { |
| base::subtle::NoBarrier_Store(&g_profiler_timing_enabled, ENABLED_TIMING); |
| } |
| |
| // static |
| TrackedTime ThreadData::Now() { |
| if (now_function_for_testing_) |
| return TrackedTime::FromMilliseconds((*now_function_for_testing_)()); |
| if (IsProfilerTimingEnabled() && TrackingStatus()) |
| return TrackedTime::Now(); |
| return TrackedTime(); // Super fast when disabled, or not compiled. |
| } |
| |
| // static |
| void ThreadData::EnsureCleanupWasCalled(int major_threads_shutdown_count) { |
| base::AutoLock lock(*list_lock_.Pointer()); |
| if (worker_thread_data_creation_count_ == 0) |
| return; // We haven't really run much, and couldn't have leaked. |
| |
| // TODO(jar): until this is working on XP, don't run the real test. |
| #if 0 |
| // Verify that we've at least shutdown/cleanup the major namesd threads. The |
| // caller should tell us how many thread shutdowns should have taken place by |
| // now. |
| CHECK_GT(cleanup_count_, major_threads_shutdown_count); |
| #endif |
| } |
| |
| // static |
| void ThreadData::ShutdownSingleThreadedCleanup(bool leak) { |
| // This is only called from test code, where we need to cleanup so that |
| // additional tests can be run. |
| // We must be single threaded... but be careful anyway. |
| InitializeAndSetTrackingStatus(DEACTIVATED); |
| |
| ThreadData* thread_data_list; |
| { |
| base::AutoLock lock(*list_lock_.Pointer()); |
| thread_data_list = all_thread_data_list_head_; |
| all_thread_data_list_head_ = NULL; |
| ++incarnation_counter_; |
| // To be clean, break apart the retired worker list (though we leak them). |
| while (first_retired_worker_) { |
| ThreadData* worker = first_retired_worker_; |
| CHECK_GT(worker->worker_thread_number_, 0); |
| first_retired_worker_ = worker->next_retired_worker_; |
| worker->next_retired_worker_ = NULL; |
| } |
| } |
| |
| // Put most global static back in pristine shape. |
| worker_thread_data_creation_count_ = 0; |
| cleanup_count_ = 0; |
| tls_index_.Set(NULL); |
| // Almost UNINITIALIZED. |
| base::subtle::Release_Store(&status_, DORMANT_DURING_TESTS); |
| |
| // To avoid any chance of racing in unit tests, which is the only place we |
| // call this function, we may sometimes leak all the data structures we |
| // recovered, as they may still be in use on threads from prior tests! |
| if (leak) { |
| ThreadData* thread_data = thread_data_list; |
| while (thread_data) { |
| ANNOTATE_LEAKING_OBJECT_PTR(thread_data); |
| thread_data = thread_data->next(); |
| } |
| return; |
| } |
| |
| // When we want to cleanup (on a single thread), here is what we do. |
| |
| // Do actual recursive delete in all ThreadData instances. |
| while (thread_data_list) { |
| ThreadData* next_thread_data = thread_data_list; |
| thread_data_list = thread_data_list->next(); |
| |
| for (BirthMap::iterator it = next_thread_data->birth_map_.begin(); |
| next_thread_data->birth_map_.end() != it; ++it) |
| delete it->second; // Delete the Birth Records. |
| delete next_thread_data; // Includes all Death Records. |
| } |
| } |
| |
| //------------------------------------------------------------------------------ |
| TaskStopwatch::TaskStopwatch() |
| : wallclock_duration_ms_(0), |
| current_thread_data_(NULL), |
| excluded_duration_ms_(0), |
| parent_(NULL) { |
| #if DCHECK_IS_ON() |
| state_ = CREATED; |
| child_ = NULL; |
| #endif |
| } |
| |
| TaskStopwatch::~TaskStopwatch() { |
| #if DCHECK_IS_ON() |
| DCHECK(state_ != RUNNING); |
| DCHECK(child_ == NULL); |
| #endif |
| } |
| |
| void TaskStopwatch::Start() { |
| #if DCHECK_IS_ON() |
| DCHECK(state_ == CREATED); |
| state_ = RUNNING; |
| #endif |
| |
| start_time_ = ThreadData::Now(); |
| |
| current_thread_data_ = ThreadData::Get(); |
| if (!current_thread_data_) |
| return; |
| |
| parent_ = current_thread_data_->current_stopwatch_; |
| #if DCHECK_IS_ON() |
| if (parent_) { |
| DCHECK(parent_->state_ == RUNNING); |
| DCHECK(parent_->child_ == NULL); |
| parent_->child_ = this; |
| } |
| #endif |
| current_thread_data_->current_stopwatch_ = this; |
| } |
| |
| void TaskStopwatch::Stop() { |
| const TrackedTime end_time = ThreadData::Now(); |
| #if DCHECK_IS_ON() |
| DCHECK(state_ == RUNNING); |
| state_ = STOPPED; |
| DCHECK(child_ == NULL); |
| #endif |
| |
| if (!start_time_.is_null() && !end_time.is_null()) { |
| wallclock_duration_ms_ = (end_time - start_time_).InMilliseconds(); |
| } |
| |
| if (!current_thread_data_) |
| return; |
| |
| DCHECK(current_thread_data_->current_stopwatch_ == this); |
| current_thread_data_->current_stopwatch_ = parent_; |
| if (!parent_) |
| return; |
| |
| #if DCHECK_IS_ON() |
| DCHECK(parent_->state_ == RUNNING); |
| DCHECK(parent_->child_ == this); |
| parent_->child_ = NULL; |
| #endif |
| parent_->excluded_duration_ms_ += wallclock_duration_ms_; |
| parent_ = NULL; |
| } |
| |
| TrackedTime TaskStopwatch::StartTime() const { |
| #if DCHECK_IS_ON() |
| DCHECK(state_ != CREATED); |
| #endif |
| |
| return start_time_; |
| } |
| |
| int32_t TaskStopwatch::RunDurationMs() const { |
| #if DCHECK_IS_ON() |
| DCHECK(state_ == STOPPED); |
| #endif |
| |
| return wallclock_duration_ms_ - excluded_duration_ms_; |
| } |
| |
| ThreadData* TaskStopwatch::GetThreadData() const { |
| #if DCHECK_IS_ON() |
| DCHECK(state_ != CREATED); |
| #endif |
| |
| return current_thread_data_; |
| } |
| |
| //------------------------------------------------------------------------------ |
| // DeathDataPhaseSnapshot |
| |
| DeathDataPhaseSnapshot::DeathDataPhaseSnapshot( |
| int profiling_phase, |
| int count, |
| int32_t run_duration_sum, |
| int32_t run_duration_max, |
| int32_t run_duration_sample, |
| int32_t queue_duration_sum, |
| int32_t queue_duration_max, |
| int32_t queue_duration_sample, |
| const DeathDataPhaseSnapshot* prev) |
| : profiling_phase(profiling_phase), |
| death_data(count, |
| run_duration_sum, |
| run_duration_max, |
| run_duration_sample, |
| queue_duration_sum, |
| queue_duration_max, |
| queue_duration_sample), |
| prev(prev) {} |
| |
| //------------------------------------------------------------------------------ |
| // TaskSnapshot |
| |
| TaskSnapshot::TaskSnapshot() { |
| } |
| |
| TaskSnapshot::TaskSnapshot(const BirthOnThreadSnapshot& birth, |
| const DeathDataSnapshot& death_data, |
| const std::string& death_thread_name) |
| : birth(birth), |
| death_data(death_data), |
| death_thread_name(death_thread_name) { |
| } |
| |
| TaskSnapshot::~TaskSnapshot() { |
| } |
| |
| //------------------------------------------------------------------------------ |
| // ProcessDataPhaseSnapshot |
| |
| ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot() { |
| } |
| |
| ProcessDataPhaseSnapshot::~ProcessDataPhaseSnapshot() { |
| } |
| |
| //------------------------------------------------------------------------------ |
| // ProcessDataPhaseSnapshot |
| |
| ProcessDataSnapshot::ProcessDataSnapshot() |
| #if !defined(OS_NACL) |
| : process_id(base::GetCurrentProcId()) { |
| #else |
| : process_id(base::kNullProcessId) { |
| #endif |
| } |
| |
| ProcessDataSnapshot::~ProcessDataSnapshot() { |
| } |
| |
| } // namespace tracked_objects |