blob: 4534e6e58fd7f0a71f29a39152a711713f4d98ad [file] [log] [blame]
Daniel Eratb8cf9492015-07-06 13:18:13 -06001// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/tracked_objects.h"
6
7#include <limits.h>
8#include <stdlib.h>
9
10#include "base/atomicops.h"
11#include "base/base_switches.h"
12#include "base/command_line.h"
13#include "base/compiler_specific.h"
Daniel Eratb8cf9492015-07-06 13:18:13 -060014#include "base/logging.h"
15#include "base/process/process_handle.h"
16#include "base/profiler/alternate_timer.h"
17#include "base/strings/stringprintf.h"
Daniel Eratb8cf9492015-07-06 13:18:13 -060018#include "base/tracking_info.h"
Christopher Wiley0124f202015-12-19 19:13:08 -080019#include "third_party/valgrind/memcheck.h"
Daniel Eratb8cf9492015-07-06 13:18:13 -060020
21using base::TimeDelta;
22
23namespace base {
24class TimeDelta;
25}
26
27namespace tracked_objects {
28
29namespace {
30// When ThreadData is first initialized, should we start in an ACTIVE state to
31// record all of the startup-time tasks, or should we start up DEACTIVATED, so
32// that we only record after parsing the command line flag --enable-tracking.
33// Note that the flag may force either state, so this really controls only the
34// period of time up until that flag is parsed. If there is no flag seen, then
35// this state may prevail for much or all of the process lifetime.
36const ThreadData::Status kInitialStartupState = ThreadData::PROFILING_ACTIVE;
37
38// Control whether an alternate time source (Now() function) is supported by
39// the ThreadData class. This compile time flag should be set to true if we
40// want other modules (such as a memory allocator, or a thread-specific CPU time
41// clock) to be able to provide a thread-specific Now() function. Without this
42// compile-time flag, the code will only support the wall-clock time. This flag
43// can be flipped to efficiently disable this path (if there is a performance
44// problem with its presence).
45static const bool kAllowAlternateTimeSourceHandling = true;
46
47// Possible states of the profiler timing enabledness.
48enum {
49 UNDEFINED_TIMING,
50 ENABLED_TIMING,
51 DISABLED_TIMING,
52};
53
54// State of the profiler timing enabledness.
55base::subtle::Atomic32 g_profiler_timing_enabled = UNDEFINED_TIMING;
56
57// Returns whether profiler timing is enabled. The default is true, but this
58// may be overridden by a command-line flag. Some platforms may
59// programmatically set this command-line flag to the "off" value if it's not
60// specified.
61// This in turn can be overridden by explicitly calling
62// ThreadData::EnableProfilerTiming, say, based on a field trial.
63inline bool IsProfilerTimingEnabled() {
64 // Reading |g_profiler_timing_enabled| is done without barrier because
65 // multiple initialization is not an issue while the barrier can be relatively
66 // costly given that this method is sometimes called in a tight loop.
67 base::subtle::Atomic32 current_timing_enabled =
68 base::subtle::NoBarrier_Load(&g_profiler_timing_enabled);
69 if (current_timing_enabled == UNDEFINED_TIMING) {
70 if (!base::CommandLine::InitializedForCurrentProcess())
71 return true;
72 current_timing_enabled =
73 (base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
74 switches::kProfilerTiming) ==
75 switches::kProfilerTimingDisabledValue)
76 ? DISABLED_TIMING
77 : ENABLED_TIMING;
78 base::subtle::NoBarrier_Store(&g_profiler_timing_enabled,
79 current_timing_enabled);
80 }
81 return current_timing_enabled == ENABLED_TIMING;
82}
83
84} // namespace
85
86//------------------------------------------------------------------------------
87// DeathData tallies durations when a death takes place.
88
89DeathData::DeathData()
90 : count_(0),
91 sample_probability_count_(0),
92 run_duration_sum_(0),
93 queue_duration_sum_(0),
94 run_duration_max_(0),
95 queue_duration_max_(0),
96 run_duration_sample_(0),
97 queue_duration_sample_(0),
98 last_phase_snapshot_(nullptr) {
99}
100
101DeathData::DeathData(const DeathData& other)
102 : count_(other.count_),
103 sample_probability_count_(other.sample_probability_count_),
104 run_duration_sum_(other.run_duration_sum_),
105 queue_duration_sum_(other.queue_duration_sum_),
106 run_duration_max_(other.run_duration_max_),
107 queue_duration_max_(other.queue_duration_max_),
108 run_duration_sample_(other.run_duration_sample_),
109 queue_duration_sample_(other.queue_duration_sample_),
110 last_phase_snapshot_(nullptr) {
111 // This constructor will be used by std::map when adding new DeathData values
112 // to the map. At that point, last_phase_snapshot_ is still NULL, so we don't
113 // need to worry about ownership transfer.
114 DCHECK(other.last_phase_snapshot_ == nullptr);
115}
116
117DeathData::~DeathData() {
118 while (last_phase_snapshot_) {
119 const DeathDataPhaseSnapshot* snapshot = last_phase_snapshot_;
120 last_phase_snapshot_ = snapshot->prev;
121 delete snapshot;
122 }
123}
124
125// TODO(jar): I need to see if this macro to optimize branching is worth using.
126//
127// This macro has no branching, so it is surely fast, and is equivalent to:
128// if (assign_it)
129// target = source;
130// We use a macro rather than a template to force this to inline.
131// Related code for calculating max is discussed on the web.
132#define CONDITIONAL_ASSIGN(assign_it, target, source) \
133 ((target) ^= ((target) ^ (source)) & -static_cast<int32>(assign_it))
134
135void DeathData::RecordDeath(const int32 queue_duration,
136 const int32 run_duration,
137 const uint32 random_number) {
138 // We'll just clamp at INT_MAX, but we should note this in the UI as such.
139 if (count_ < INT_MAX)
140 ++count_;
141
142 int sample_probability_count = sample_probability_count_;
143 if (sample_probability_count < INT_MAX)
144 ++sample_probability_count;
145 sample_probability_count_ = sample_probability_count;
146
147 queue_duration_sum_ += queue_duration;
148 run_duration_sum_ += run_duration;
149
150 if (queue_duration_max_ < queue_duration)
151 queue_duration_max_ = queue_duration;
152 if (run_duration_max_ < run_duration)
153 run_duration_max_ = run_duration;
154
155 // Take a uniformly distributed sample over all durations ever supplied during
156 // the current profiling phase.
157 // The probability that we (instead) use this new sample is
158 // 1/sample_probability_count_. This results in a completely uniform selection
159 // of the sample (at least when we don't clamp sample_probability_count_...
160 // but that should be inconsequentially likely). We ignore the fact that we
161 // correlated our selection of a sample to the run and queue times (i.e., we
162 // used them to generate random_number).
163 CHECK_GT(sample_probability_count, 0);
164 if (0 == (random_number % sample_probability_count)) {
165 queue_duration_sample_ = queue_duration;
166 run_duration_sample_ = run_duration;
167 }
168}
169
170void DeathData::OnProfilingPhaseCompleted(int profiling_phase) {
171 // Snapshotting and storing current state.
172 last_phase_snapshot_ = new DeathDataPhaseSnapshot(
173 profiling_phase, count_, run_duration_sum_, run_duration_max_,
174 run_duration_sample_, queue_duration_sum_, queue_duration_max_,
175 queue_duration_sample_, last_phase_snapshot_);
176
177 // Not touching fields for which a delta can be computed by comparing with a
178 // snapshot from the previous phase. Resetting other fields. Sample values
179 // will be reset upon next death recording because sample_probability_count_
180 // is set to 0.
181 // We avoid resetting to 0 in favor of deltas whenever possible. The reason
182 // is that for incrementable fields, resetting to 0 from the snapshot thread
183 // potentially in parallel with incrementing in the death thread may result in
184 // significant data corruption that has a potential to grow with time. Not
185 // resetting incrementable fields and using deltas will cause any
186 // off-by-little corruptions to be likely fixed at the next snapshot.
187 // The max values are not incrementable, and cannot be deduced using deltas
188 // for a given phase. Hence, we have to reset them to 0. But the potential
189 // damage is limited to getting the previous phase's max to apply for the next
190 // phase, and the error doesn't have a potential to keep growing with new
191 // resets.
192 // sample_probability_count_ is incrementable, but must be reset to 0 at the
193 // phase end, so that we start a new uniformly randomized sample selection
194 // after the reset. Corruptions due to race conditions are possible, but the
195 // damage is limited to selecting a wrong sample, which is not something that
196 // can cause accumulating or cascading effects.
197 // If there were no corruptions caused by race conditions, we never send a
198 // sample for the previous phase in the next phase's snapshot because
199 // ThreadData::SnapshotExecutedTasks doesn't send deltas with 0 count.
200 sample_probability_count_ = 0;
201 run_duration_max_ = 0;
202 queue_duration_max_ = 0;
203}
204
205//------------------------------------------------------------------------------
206DeathDataSnapshot::DeathDataSnapshot()
207 : count(-1),
208 run_duration_sum(-1),
209 run_duration_max(-1),
210 run_duration_sample(-1),
211 queue_duration_sum(-1),
212 queue_duration_max(-1),
213 queue_duration_sample(-1) {
214}
215
216DeathDataSnapshot::DeathDataSnapshot(int count,
217 int32 run_duration_sum,
218 int32 run_duration_max,
219 int32 run_duration_sample,
220 int32 queue_duration_sum,
221 int32 queue_duration_max,
222 int32 queue_duration_sample)
223 : count(count),
224 run_duration_sum(run_duration_sum),
225 run_duration_max(run_duration_max),
226 run_duration_sample(run_duration_sample),
227 queue_duration_sum(queue_duration_sum),
228 queue_duration_max(queue_duration_max),
229 queue_duration_sample(queue_duration_sample) {
230}
231
232DeathDataSnapshot::~DeathDataSnapshot() {
233}
234
235DeathDataSnapshot DeathDataSnapshot::Delta(
236 const DeathDataSnapshot& older) const {
237 return DeathDataSnapshot(count - older.count,
238 run_duration_sum - older.run_duration_sum,
239 run_duration_max, run_duration_sample,
240 queue_duration_sum - older.queue_duration_sum,
241 queue_duration_max, queue_duration_sample);
242}
243
244//------------------------------------------------------------------------------
245BirthOnThread::BirthOnThread(const Location& location,
246 const ThreadData& current)
247 : location_(location),
248 birth_thread_(&current) {
249}
250
251//------------------------------------------------------------------------------
252BirthOnThreadSnapshot::BirthOnThreadSnapshot() {
253}
254
255BirthOnThreadSnapshot::BirthOnThreadSnapshot(const BirthOnThread& birth)
256 : location(birth.location()),
257 thread_name(birth.birth_thread()->thread_name()) {
258}
259
260BirthOnThreadSnapshot::~BirthOnThreadSnapshot() {
261}
262
263//------------------------------------------------------------------------------
264Births::Births(const Location& location, const ThreadData& current)
265 : BirthOnThread(location, current),
266 birth_count_(1) { }
267
268int Births::birth_count() const { return birth_count_; }
269
270void Births::RecordBirth() { ++birth_count_; }
271
272//------------------------------------------------------------------------------
273// ThreadData maintains the central data for all births and deaths on a single
274// thread.
275
276// TODO(jar): We should pull all these static vars together, into a struct, and
277// optimize layout so that we benefit from locality of reference during accesses
278// to them.
279
280// static
281NowFunction* ThreadData::now_function_ = NULL;
282
283// static
284bool ThreadData::now_function_is_time_ = false;
285
286// A TLS slot which points to the ThreadData instance for the current thread.
287// We do a fake initialization here (zeroing out data), and then the real
288// in-place construction happens when we call tls_index_.Initialize().
289// static
290base::ThreadLocalStorage::StaticSlot ThreadData::tls_index_ = TLS_INITIALIZER;
291
292// static
293int ThreadData::worker_thread_data_creation_count_ = 0;
294
295// static
296int ThreadData::cleanup_count_ = 0;
297
298// static
299int ThreadData::incarnation_counter_ = 0;
300
301// static
302ThreadData* ThreadData::all_thread_data_list_head_ = NULL;
303
304// static
305ThreadData* ThreadData::first_retired_worker_ = NULL;
306
307// static
308base::LazyInstance<base::Lock>::Leaky
309 ThreadData::list_lock_ = LAZY_INSTANCE_INITIALIZER;
310
311// static
312ThreadData::Status ThreadData::status_ = ThreadData::UNINITIALIZED;
313
314ThreadData::ThreadData(const std::string& suggested_name)
315 : next_(NULL),
316 next_retired_worker_(NULL),
317 worker_thread_number_(0),
318 incarnation_count_for_pool_(-1),
319 current_stopwatch_(NULL) {
320 DCHECK_GE(suggested_name.size(), 0u);
321 thread_name_ = suggested_name;
322 PushToHeadOfList(); // Which sets real incarnation_count_for_pool_.
323}
324
325ThreadData::ThreadData(int thread_number)
326 : next_(NULL),
327 next_retired_worker_(NULL),
328 worker_thread_number_(thread_number),
329 incarnation_count_for_pool_(-1),
330 current_stopwatch_(NULL) {
331 CHECK_GT(thread_number, 0);
332 base::StringAppendF(&thread_name_, "WorkerThread-%d", thread_number);
333 PushToHeadOfList(); // Which sets real incarnation_count_for_pool_.
334}
335
336ThreadData::~ThreadData() {
337}
338
339void ThreadData::PushToHeadOfList() {
340 // Toss in a hint of randomness (atop the uniniitalized value).
341 (void)VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(&random_number_,
342 sizeof(random_number_));
343 MSAN_UNPOISON(&random_number_, sizeof(random_number_));
344 random_number_ += static_cast<uint32>(this - static_cast<ThreadData*>(0));
345 random_number_ ^= (Now() - TrackedTime()).InMilliseconds();
346
347 DCHECK(!next_);
348 base::AutoLock lock(*list_lock_.Pointer());
349 incarnation_count_for_pool_ = incarnation_counter_;
350 next_ = all_thread_data_list_head_;
351 all_thread_data_list_head_ = this;
352}
353
354// static
355ThreadData* ThreadData::first() {
356 base::AutoLock lock(*list_lock_.Pointer());
357 return all_thread_data_list_head_;
358}
359
360ThreadData* ThreadData::next() const { return next_; }
361
362// static
363void ThreadData::InitializeThreadContext(const std::string& suggested_name) {
364 Initialize();
365 ThreadData* current_thread_data =
366 reinterpret_cast<ThreadData*>(tls_index_.Get());
367 if (current_thread_data)
368 return; // Browser tests instigate this.
369 current_thread_data = new ThreadData(suggested_name);
370 tls_index_.Set(current_thread_data);
371}
372
373// static
374ThreadData* ThreadData::Get() {
375 if (!tls_index_.initialized())
376 return NULL; // For unittests only.
377 ThreadData* registered = reinterpret_cast<ThreadData*>(tls_index_.Get());
378 if (registered)
379 return registered;
380
381 // We must be a worker thread, since we didn't pre-register.
382 ThreadData* worker_thread_data = NULL;
383 int worker_thread_number = 0;
384 {
385 base::AutoLock lock(*list_lock_.Pointer());
386 if (first_retired_worker_) {
387 worker_thread_data = first_retired_worker_;
388 first_retired_worker_ = first_retired_worker_->next_retired_worker_;
389 worker_thread_data->next_retired_worker_ = NULL;
390 } else {
391 worker_thread_number = ++worker_thread_data_creation_count_;
392 }
393 }
394
395 // If we can't find a previously used instance, then we have to create one.
396 if (!worker_thread_data) {
397 DCHECK_GT(worker_thread_number, 0);
398 worker_thread_data = new ThreadData(worker_thread_number);
399 }
400 DCHECK_GT(worker_thread_data->worker_thread_number_, 0);
401
402 tls_index_.Set(worker_thread_data);
403 return worker_thread_data;
404}
405
406// static
407void ThreadData::OnThreadTermination(void* thread_data) {
408 DCHECK(thread_data); // TLS should *never* call us with a NULL.
409 // We must NOT do any allocations during this callback. There is a chance
410 // that the allocator is no longer active on this thread.
411 reinterpret_cast<ThreadData*>(thread_data)->OnThreadTerminationCleanup();
412}
413
414void ThreadData::OnThreadTerminationCleanup() {
415 // The list_lock_ was created when we registered the callback, so it won't be
416 // allocated here despite the lazy reference.
417 base::AutoLock lock(*list_lock_.Pointer());
418 if (incarnation_counter_ != incarnation_count_for_pool_)
419 return; // ThreadData was constructed in an earlier unit test.
420 ++cleanup_count_;
421 // Only worker threads need to be retired and reused.
422 if (!worker_thread_number_) {
423 return;
424 }
425 // We must NOT do any allocations during this callback.
426 // Using the simple linked lists avoids all allocations.
427 DCHECK_EQ(this->next_retired_worker_, reinterpret_cast<ThreadData*>(NULL));
428 this->next_retired_worker_ = first_retired_worker_;
429 first_retired_worker_ = this;
430}
431
432// static
433void ThreadData::Snapshot(int current_profiling_phase,
434 ProcessDataSnapshot* process_data_snapshot) {
435 // Get an unchanging copy of a ThreadData list.
436 ThreadData* my_list = ThreadData::first();
437
438 // Gather data serially.
439 // This hackish approach *can* get some slightly corrupt tallies, as we are
440 // grabbing values without the protection of a lock, but it has the advantage
441 // of working even with threads that don't have message loops. If a user
442 // sees any strangeness, they can always just run their stats gathering a
443 // second time.
444 BirthCountMap birth_counts;
445 for (ThreadData* thread_data = my_list; thread_data;
446 thread_data = thread_data->next()) {
447 thread_data->SnapshotExecutedTasks(current_profiling_phase,
448 &process_data_snapshot->phased_snapshots,
449 &birth_counts);
450 }
451
452 // Add births that are still active -- i.e. objects that have tallied a birth,
453 // but have not yet tallied a matching death, and hence must be either
454 // running, queued up, or being held in limbo for future posting.
455 auto* current_phase_tasks =
456 &process_data_snapshot->phased_snapshots[current_profiling_phase].tasks;
457 for (const auto& birth_count : birth_counts) {
458 if (birth_count.second > 0) {
459 current_phase_tasks->push_back(
460 TaskSnapshot(BirthOnThreadSnapshot(*birth_count.first),
461 DeathDataSnapshot(birth_count.second, 0, 0, 0, 0, 0, 0),
462 "Still_Alive"));
463 }
464 }
465}
466
467// static
468void ThreadData::OnProfilingPhaseCompleted(int profiling_phase) {
469 // Get an unchanging copy of a ThreadData list.
470 ThreadData* my_list = ThreadData::first();
471
472 // Add snapshots for all instances of death data in all threads serially.
473 // This hackish approach *can* get some slightly corrupt tallies, as we are
474 // grabbing values without the protection of a lock, but it has the advantage
475 // of working even with threads that don't have message loops. Any corruption
476 // shouldn't cause "cascading damage" to anything else (in later phases).
477 for (ThreadData* thread_data = my_list; thread_data;
478 thread_data = thread_data->next()) {
479 thread_data->OnProfilingPhaseCompletedOnThread(profiling_phase);
480 }
481}
482
483Births* ThreadData::TallyABirth(const Location& location) {
484 BirthMap::iterator it = birth_map_.find(location);
485 Births* child;
486 if (it != birth_map_.end()) {
487 child = it->second;
488 child->RecordBirth();
489 } else {
490 child = new Births(location, *this); // Leak this.
491 // Lock since the map may get relocated now, and other threads sometimes
492 // snapshot it (but they lock before copying it).
493 base::AutoLock lock(map_lock_);
494 birth_map_[location] = child;
495 }
496
497 return child;
498}
499
500void ThreadData::TallyADeath(const Births& births,
501 int32 queue_duration,
502 const TaskStopwatch& stopwatch) {
503 int32 run_duration = stopwatch.RunDurationMs();
504
505 // Stir in some randomness, plus add constant in case durations are zero.
506 const uint32 kSomePrimeNumber = 2147483647;
507 random_number_ += queue_duration + run_duration + kSomePrimeNumber;
508 // An address is going to have some randomness to it as well ;-).
509 random_number_ ^= static_cast<uint32>(&births - reinterpret_cast<Births*>(0));
510
511 // We don't have queue durations without OS timer. OS timer is automatically
512 // used for task-post-timing, so the use of an alternate timer implies all
513 // queue times are invalid, unless it was explicitly said that we can trust
514 // the alternate timer.
515 if (kAllowAlternateTimeSourceHandling &&
516 now_function_ &&
517 !now_function_is_time_) {
518 queue_duration = 0;
519 }
520
521 DeathMap::iterator it = death_map_.find(&births);
522 DeathData* death_data;
523 if (it != death_map_.end()) {
524 death_data = &it->second;
525 } else {
526 base::AutoLock lock(map_lock_); // Lock as the map may get relocated now.
527 death_data = &death_map_[&births];
528 } // Release lock ASAP.
529 death_data->RecordDeath(queue_duration, run_duration, random_number_);
530}
531
532// static
533Births* ThreadData::TallyABirthIfActive(const Location& location) {
534 if (!TrackingStatus())
535 return NULL;
536 ThreadData* current_thread_data = Get();
537 if (!current_thread_data)
538 return NULL;
539 return current_thread_data->TallyABirth(location);
540}
541
542// static
543void ThreadData::TallyRunOnNamedThreadIfTracking(
544 const base::TrackingInfo& completed_task,
545 const TaskStopwatch& stopwatch) {
546 // Even if we have been DEACTIVATED, we will process any pending births so
547 // that our data structures (which counted the outstanding births) remain
548 // consistent.
549 const Births* births = completed_task.birth_tally;
550 if (!births)
551 return;
552 ThreadData* current_thread_data = stopwatch.GetThreadData();
553 if (!current_thread_data)
554 return;
555
556 // Watch out for a race where status_ is changing, and hence one or both
557 // of start_of_run or end_of_run is zero. In that case, we didn't bother to
558 // get a time value since we "weren't tracking" and we were trying to be
559 // efficient by not calling for a genuine time value. For simplicity, we'll
560 // use a default zero duration when we can't calculate a true value.
561 TrackedTime start_of_run = stopwatch.StartTime();
562 int32 queue_duration = 0;
563 if (!start_of_run.is_null()) {
564 queue_duration = (start_of_run - completed_task.EffectiveTimePosted())
565 .InMilliseconds();
566 }
567 current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
568}
569
570// static
571void ThreadData::TallyRunOnWorkerThreadIfTracking(
572 const Births* births,
573 const TrackedTime& time_posted,
574 const TaskStopwatch& stopwatch) {
575 // Even if we have been DEACTIVATED, we will process any pending births so
576 // that our data structures (which counted the outstanding births) remain
577 // consistent.
578 if (!births)
579 return;
580
581 // TODO(jar): Support the option to coalesce all worker-thread activity under
582 // one ThreadData instance that uses locks to protect *all* access. This will
583 // reduce memory (making it provably bounded), but run incrementally slower
584 // (since we'll use locks on TallyABirth and TallyADeath). The good news is
585 // that the locks on TallyADeath will be *after* the worker thread has run,
586 // and hence nothing will be waiting for the completion (... besides some
587 // other thread that might like to run). Also, the worker threads tasks are
588 // generally longer, and hence the cost of the lock may perchance be amortized
589 // over the long task's lifetime.
590 ThreadData* current_thread_data = stopwatch.GetThreadData();
591 if (!current_thread_data)
592 return;
593
594 TrackedTime start_of_run = stopwatch.StartTime();
595 int32 queue_duration = 0;
596 if (!start_of_run.is_null()) {
597 queue_duration = (start_of_run - time_posted).InMilliseconds();
598 }
599 current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
600}
601
602// static
603void ThreadData::TallyRunInAScopedRegionIfTracking(
604 const Births* births,
605 const TaskStopwatch& stopwatch) {
606 // Even if we have been DEACTIVATED, we will process any pending births so
607 // that our data structures (which counted the outstanding births) remain
608 // consistent.
609 if (!births)
610 return;
611
612 ThreadData* current_thread_data = stopwatch.GetThreadData();
613 if (!current_thread_data)
614 return;
615
616 int32 queue_duration = 0;
617 current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
618}
619
620void ThreadData::SnapshotExecutedTasks(
621 int current_profiling_phase,
622 PhasedProcessDataSnapshotMap* phased_snapshots,
623 BirthCountMap* birth_counts) {
624 // Get copy of data, so that the data will not change during the iterations
625 // and processing.
626 BirthMap birth_map;
627 DeathsSnapshot deaths;
628 SnapshotMaps(current_profiling_phase, &birth_map, &deaths);
629
630 for (const auto& birth : birth_map) {
631 (*birth_counts)[birth.second] += birth.second->birth_count();
632 }
633
634 for (const auto& death : deaths) {
635 (*birth_counts)[death.first] -= death.first->birth_count();
636
637 // For the current death data, walk through all its snapshots, starting from
638 // the current one, then from the previous profiling phase etc., and for
639 // each snapshot calculate the delta between the snapshot and the previous
640 // phase, if any. Store the deltas in the result.
641 for (const DeathDataPhaseSnapshot* phase = &death.second; phase;
642 phase = phase->prev) {
643 const DeathDataSnapshot& death_data =
644 phase->prev ? phase->death_data.Delta(phase->prev->death_data)
645 : phase->death_data;
646
647 if (death_data.count > 0) {
648 (*phased_snapshots)[phase->profiling_phase].tasks.push_back(
649 TaskSnapshot(BirthOnThreadSnapshot(*death.first), death_data,
650 thread_name()));
651 }
652 }
653 }
654}
655
656// This may be called from another thread.
657void ThreadData::SnapshotMaps(int profiling_phase,
658 BirthMap* birth_map,
659 DeathsSnapshot* deaths) {
660 base::AutoLock lock(map_lock_);
661
662 for (const auto& birth : birth_map_)
663 (*birth_map)[birth.first] = birth.second;
664
665 for (const auto& death : death_map_) {
666 deaths->push_back(std::make_pair(
667 death.first,
668 DeathDataPhaseSnapshot(profiling_phase, death.second.count(),
669 death.second.run_duration_sum(),
670 death.second.run_duration_max(),
671 death.second.run_duration_sample(),
672 death.second.queue_duration_sum(),
673 death.second.queue_duration_max(),
674 death.second.queue_duration_sample(),
675 death.second.last_phase_snapshot())));
676 }
677}
678
679void ThreadData::OnProfilingPhaseCompletedOnThread(int profiling_phase) {
680 base::AutoLock lock(map_lock_);
681
682 for (auto& death : death_map_) {
683 death.second.OnProfilingPhaseCompleted(profiling_phase);
684 }
685}
686
687static void OptionallyInitializeAlternateTimer() {
688 NowFunction* alternate_time_source = GetAlternateTimeSource();
689 if (alternate_time_source)
690 ThreadData::SetAlternateTimeSource(alternate_time_source);
691}
692
693void ThreadData::Initialize() {
694 if (status_ >= DEACTIVATED)
695 return; // Someone else did the initialization.
696 // Due to racy lazy initialization in tests, we'll need to recheck status_
697 // after we acquire the lock.
698
699 // Ensure that we don't double initialize tls. We are called when single
700 // threaded in the product, but some tests may be racy and lazy about our
701 // initialization.
702 base::AutoLock lock(*list_lock_.Pointer());
703 if (status_ >= DEACTIVATED)
704 return; // Someone raced in here and beat us.
705
706 // Put an alternate timer in place if the environment calls for it, such as
707 // for tracking TCMalloc allocations. This insertion is idempotent, so we
708 // don't mind if there is a race, and we'd prefer not to be in a lock while
709 // doing this work.
710 if (kAllowAlternateTimeSourceHandling)
711 OptionallyInitializeAlternateTimer();
712
713 // Perform the "real" TLS initialization now, and leave it intact through
714 // process termination.
715 if (!tls_index_.initialized()) { // Testing may have initialized this.
716 DCHECK_EQ(status_, UNINITIALIZED);
717 tls_index_.Initialize(&ThreadData::OnThreadTermination);
718 DCHECK(tls_index_.initialized());
719 } else {
720 // TLS was initialzed for us earlier.
721 DCHECK_EQ(status_, DORMANT_DURING_TESTS);
722 }
723
724 // Incarnation counter is only significant to testing, as it otherwise will
725 // never again change in this process.
726 ++incarnation_counter_;
727
728 // The lock is not critical for setting status_, but it doesn't hurt. It also
729 // ensures that if we have a racy initialization, that we'll bail as soon as
730 // we get the lock earlier in this method.
731 status_ = kInitialStartupState;
732 DCHECK(status_ != UNINITIALIZED);
733}
734
735// static
736void ThreadData::InitializeAndSetTrackingStatus(Status status) {
737 DCHECK_GE(status, DEACTIVATED);
738 DCHECK_LE(status, PROFILING_ACTIVE);
739
740 Initialize(); // No-op if already initialized.
741
742 if (status > DEACTIVATED)
743 status = PROFILING_ACTIVE;
744 status_ = status;
745}
746
747// static
748ThreadData::Status ThreadData::status() {
749 return status_;
750}
751
752// static
753bool ThreadData::TrackingStatus() {
754 return status_ > DEACTIVATED;
755}
756
757// static
758void ThreadData::SetAlternateTimeSource(NowFunction* now_function) {
759 DCHECK(now_function);
760 if (kAllowAlternateTimeSourceHandling)
761 now_function_ = now_function;
762}
763
764// static
765void ThreadData::EnableProfilerTiming() {
766 base::subtle::NoBarrier_Store(&g_profiler_timing_enabled, ENABLED_TIMING);
767}
768
769// static
770TrackedTime ThreadData::Now() {
771 if (kAllowAlternateTimeSourceHandling && now_function_)
772 return TrackedTime::FromMilliseconds((*now_function_)());
773 if (IsProfilerTimingEnabled() && TrackingStatus())
774 return TrackedTime::Now();
775 return TrackedTime(); // Super fast when disabled, or not compiled.
776}
777
778// static
779void ThreadData::EnsureCleanupWasCalled(int major_threads_shutdown_count) {
780 base::AutoLock lock(*list_lock_.Pointer());
781 if (worker_thread_data_creation_count_ == 0)
782 return; // We haven't really run much, and couldn't have leaked.
783
784 // TODO(jar): until this is working on XP, don't run the real test.
785#if 0
786 // Verify that we've at least shutdown/cleanup the major namesd threads. The
787 // caller should tell us how many thread shutdowns should have taken place by
788 // now.
789 CHECK_GT(cleanup_count_, major_threads_shutdown_count);
790#endif
791}
792
793// static
794void ThreadData::ShutdownSingleThreadedCleanup(bool leak) {
795 // This is only called from test code, where we need to cleanup so that
796 // additional tests can be run.
797 // We must be single threaded... but be careful anyway.
798 InitializeAndSetTrackingStatus(DEACTIVATED);
799
800 ThreadData* thread_data_list;
801 {
802 base::AutoLock lock(*list_lock_.Pointer());
803 thread_data_list = all_thread_data_list_head_;
804 all_thread_data_list_head_ = NULL;
805 ++incarnation_counter_;
806 // To be clean, break apart the retired worker list (though we leak them).
807 while (first_retired_worker_) {
808 ThreadData* worker = first_retired_worker_;
809 CHECK_GT(worker->worker_thread_number_, 0);
810 first_retired_worker_ = worker->next_retired_worker_;
811 worker->next_retired_worker_ = NULL;
812 }
813 }
814
815 // Put most global static back in pristine shape.
816 worker_thread_data_creation_count_ = 0;
817 cleanup_count_ = 0;
818 tls_index_.Set(NULL);
819 status_ = DORMANT_DURING_TESTS; // Almost UNINITIALIZED.
820
821 // To avoid any chance of racing in unit tests, which is the only place we
822 // call this function, we may sometimes leak all the data structures we
823 // recovered, as they may still be in use on threads from prior tests!
824 if (leak) {
825 ThreadData* thread_data = thread_data_list;
826 while (thread_data) {
Daniel Eratb8cf9492015-07-06 13:18:13 -0600827 thread_data = thread_data->next();
828 }
829 return;
830 }
831
832 // When we want to cleanup (on a single thread), here is what we do.
833
834 // Do actual recursive delete in all ThreadData instances.
835 while (thread_data_list) {
836 ThreadData* next_thread_data = thread_data_list;
837 thread_data_list = thread_data_list->next();
838
839 for (BirthMap::iterator it = next_thread_data->birth_map_.begin();
840 next_thread_data->birth_map_.end() != it; ++it)
841 delete it->second; // Delete the Birth Records.
842 delete next_thread_data; // Includes all Death Records.
843 }
844}
845
846//------------------------------------------------------------------------------
847TaskStopwatch::TaskStopwatch()
848 : wallclock_duration_ms_(0),
849 current_thread_data_(NULL),
850 excluded_duration_ms_(0),
851 parent_(NULL) {
852#if DCHECK_IS_ON()
853 state_ = CREATED;
854 child_ = NULL;
855#endif
856}
857
858TaskStopwatch::~TaskStopwatch() {
859#if DCHECK_IS_ON()
860 DCHECK(state_ != RUNNING);
861 DCHECK(child_ == NULL);
862#endif
863}
864
865void TaskStopwatch::Start() {
866#if DCHECK_IS_ON()
867 DCHECK(state_ == CREATED);
868 state_ = RUNNING;
869#endif
870
871 start_time_ = ThreadData::Now();
872
873 current_thread_data_ = ThreadData::Get();
874 if (!current_thread_data_)
875 return;
876
877 parent_ = current_thread_data_->current_stopwatch_;
878#if DCHECK_IS_ON()
879 if (parent_) {
880 DCHECK(parent_->state_ == RUNNING);
881 DCHECK(parent_->child_ == NULL);
882 parent_->child_ = this;
883 }
884#endif
885 current_thread_data_->current_stopwatch_ = this;
886}
887
888void TaskStopwatch::Stop() {
889 const TrackedTime end_time = ThreadData::Now();
890#if DCHECK_IS_ON()
891 DCHECK(state_ == RUNNING);
892 state_ = STOPPED;
893 DCHECK(child_ == NULL);
894#endif
895
896 if (!start_time_.is_null() && !end_time.is_null()) {
897 wallclock_duration_ms_ = (end_time - start_time_).InMilliseconds();
898 }
899
900 if (!current_thread_data_)
901 return;
902
903 DCHECK(current_thread_data_->current_stopwatch_ == this);
904 current_thread_data_->current_stopwatch_ = parent_;
905 if (!parent_)
906 return;
907
908#if DCHECK_IS_ON()
909 DCHECK(parent_->state_ == RUNNING);
910 DCHECK(parent_->child_ == this);
911 parent_->child_ = NULL;
912#endif
913 parent_->excluded_duration_ms_ += wallclock_duration_ms_;
914 parent_ = NULL;
915}
916
917TrackedTime TaskStopwatch::StartTime() const {
918#if DCHECK_IS_ON()
919 DCHECK(state_ != CREATED);
920#endif
921
922 return start_time_;
923}
924
925int32 TaskStopwatch::RunDurationMs() const {
926#if DCHECK_IS_ON()
927 DCHECK(state_ == STOPPED);
928#endif
929
930 return wallclock_duration_ms_ - excluded_duration_ms_;
931}
932
933ThreadData* TaskStopwatch::GetThreadData() const {
934#if DCHECK_IS_ON()
935 DCHECK(state_ != CREATED);
936#endif
937
938 return current_thread_data_;
939}
940
941//------------------------------------------------------------------------------
942// DeathDataPhaseSnapshot
943
944DeathDataPhaseSnapshot::DeathDataPhaseSnapshot(
945 int profiling_phase,
946 int count,
947 int32 run_duration_sum,
948 int32 run_duration_max,
949 int32 run_duration_sample,
950 int32 queue_duration_sum,
951 int32 queue_duration_max,
952 int32 queue_duration_sample,
953 const DeathDataPhaseSnapshot* prev)
954 : profiling_phase(profiling_phase),
955 death_data(count,
956 run_duration_sum,
957 run_duration_max,
958 run_duration_sample,
959 queue_duration_sum,
960 queue_duration_max,
961 queue_duration_sample),
962 prev(prev) {
963}
964
965//------------------------------------------------------------------------------
966// TaskSnapshot
967
968TaskSnapshot::TaskSnapshot() {
969}
970
971TaskSnapshot::TaskSnapshot(const BirthOnThreadSnapshot& birth,
972 const DeathDataSnapshot& death_data,
973 const std::string& death_thread_name)
974 : birth(birth),
975 death_data(death_data),
976 death_thread_name(death_thread_name) {
977}
978
979TaskSnapshot::~TaskSnapshot() {
980}
981
982//------------------------------------------------------------------------------
983// ProcessDataPhaseSnapshot
984
985ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot() {
986}
987
988ProcessDataPhaseSnapshot::~ProcessDataPhaseSnapshot() {
989}
990
991//------------------------------------------------------------------------------
992// ProcessDataPhaseSnapshot
993
994ProcessDataSnapshot::ProcessDataSnapshot()
995#if !defined(OS_NACL)
996 : process_id(base::GetCurrentProcId()) {
997#else
998 : process_id(base::kNullProcessId) {
999#endif
1000}
1001
1002ProcessDataSnapshot::~ProcessDataSnapshot() {
1003}
1004
1005} // namespace tracked_objects