blob: b4dede7ba8f9d0ced58fcc23ccfc94425daf82ae [file] [log] [blame]
license.botf003cfe2008-08-24 09:55:55 +09001// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
initial.commit3f4a7322008-07-27 06:49:38 +09004
5// Histogram is an object that aggregates statistics, and can summarize them in
6// various forms, including ASCII graphical, HTML, and numerically (as a
7// vector of numbers corresponding to each of the aggregating buckets).
8// See header file for details and examples.
9
10#include "base/histogram.h"
11
12#include <math.h>
13#include <string>
14
15#include "base/logging.h"
16#include "base/scoped_ptr.h"
17#include "base/string_util.h"
18
19typedef Histogram::Count Count;
20
darin@google.com12d40bb2008-08-20 03:36:23 +090021// static
22const int Histogram::kHexRangePrintingFlag = 0x8000;
23
initial.commit3f4a7322008-07-27 06:49:38 +090024Histogram::Histogram(const wchar_t* name, Sample minimum,
25 Sample maximum, size_t bucket_count)
26 : StatsRate(name),
27 histogram_name_(WideToASCII(name)),
28 declared_min_(minimum),
29 declared_max_(maximum),
30 bucket_count_(bucket_count),
31 flags_(0),
32 ranges_(bucket_count + 1, 0),
33 sample_(),
34 registered_(false) {
35 Initialize();
36}
37
38Histogram::Histogram(const wchar_t* name, TimeDelta minimum,
39 TimeDelta maximum, size_t bucket_count)
40 : StatsRate(name),
41 histogram_name_(WideToASCII(name)),
42 declared_min_(static_cast<int> (minimum.InMilliseconds())),
43 declared_max_(static_cast<int> (maximum.InMilliseconds())),
44 bucket_count_(bucket_count),
45 flags_(0),
46 ranges_(bucket_count + 1, 0),
47 sample_(),
48 registered_(false) {
49 Initialize();
50}
51
52Histogram::~Histogram() {
53 if (registered_)
54 StatisticsRecorder::UnRegister(*this);
55 // Just to make sure most derived class did this properly...
56 DCHECK(ValidateBucketRanges());
57}
58
59
60// Hooks to override stats counter methods. This ensures that we gather all
61// input the stats counter sees.
62void Histogram::Add(int value) {
63 if (!registered_)
64 registered_ = StatisticsRecorder::Register(*this);
65 if (value >= kSampleType_MAX)
66 value = kSampleType_MAX - 1;
67 StatsRate::Add(value);
68 if (value < 0)
69 value = 0;
70 size_t index = BucketIndex(value);
71 DCHECK(value >= ranges(index));
72 DCHECK(value < ranges(index + 1));
73 Accumulate(value, 1, index);
74}
75
76// The following methods provide a graphical histogram display.
77void Histogram::WriteHTMLGraph(std::string* output) const {
78 // TBD(jar) Write a nice HTML bar chart, with divs an mouse-overs etc.
79 output->append("<PRE>");
80 WriteAscii(true, "<br>", output);
81 output->append("</PRE>");
82}
83
84void Histogram::WriteAscii(bool graph_it, const std::string& newline,
85 std::string* output) const {
86 // Get local (stack) copies of all effectively volatile class data so that we
87 // are consistent across our output activities.
88 SampleSet snapshot;
89 SnapshotSample(&snapshot);
90 Count sample_count = snapshot.TotalCount();
91
92 WriteAsciiHeader(snapshot, sample_count, output);
93 output->append(newline);
94
95 // Prepare to normalize graphical rendering of bucket contents.
96 double max_size = 0;
97 if (graph_it)
98 max_size = GetPeakBucketSize(snapshot);
99
100 // Calculate space needed to print bucket range numbers. Leave room to print
101 // nearly the largest bucket range without sliding over the histogram.
102 size_t largest_non_empty_bucket = bucket_count_ - 1;
103 while (0 == sample_.counts(largest_non_empty_bucket)) {
104 if (0 == largest_non_empty_bucket)
105 break; // All buckets are empty.
106 largest_non_empty_bucket--;
107 }
108
109 // Calculate largest print width needed for any of our bucket range displays.
110 size_t print_width = 1;
111 for (size_t i = 0; i < bucket_count_; ++i) {
112 if (snapshot.counts(i)) {
113 size_t width = GetAsciiBucketRange(i).size() + 1;
114 if (width > print_width)
115 print_width = width;
116 }
117 }
118
119 int64 remaining = sample_count;
120 int64 past = 0;
121 // Output the actual histogram graph.
122 for (size_t i = 0; i < bucket_count_; i++) {
123 Count current = snapshot.counts(i);
124 if (!current && !PrintEmptyBucket(i))
125 continue;
126 remaining -= current;
127 StringAppendF(output, "%#*s ", print_width, GetAsciiBucketRange(i).c_str());
128 if (0 == current && i < bucket_count_ - 1 && 0 == snapshot.counts(i + 1)) {
129 while (i < bucket_count_ - 1 && 0 == snapshot.counts(i + 1))
130 i++;
131 output->append("... ");
132 output->append(newline);
133 continue; // No reason to plot emptiness.
134 }
135 double current_size = GetBucketSize(current, i);
136 if (graph_it)
137 WriteAsciiBucketGraph(current_size, max_size, output);
138 WriteAsciiBucketContext(past, current, remaining, i, output);
139 output->append(newline);
140 past += current;
141 }
142 DCHECK(past == sample_count);
143}
144
145bool Histogram::ValidateBucketRanges() const {
146 // Standard assertions that all bucket ranges should satisfy.
147 DCHECK(ranges_.size() == bucket_count_ + 1);
148 DCHECK(0 == ranges_[0]);
149 DCHECK(declared_min() == ranges_[1]);
150 DCHECK(declared_max() == ranges_[bucket_count_ - 1]);
151 DCHECK(kSampleType_MAX == ranges_[bucket_count_]);
152 return true;
153}
154
155void Histogram::Initialize() {
156 sample_.Resize(*this);
157 if (declared_min_ <= 0)
158 declared_min_ = 1;
159 if (declared_max_ >= kSampleType_MAX)
160 declared_max_ = kSampleType_MAX - 1;
161 DCHECK(declared_min_ > 0); // We provide underflow bucket.
162 DCHECK(declared_min_ < declared_max_);
163 DCHECK(1 < bucket_count_);
164 size_t maximal_bucket_count = declared_max_ - declared_min_ + 2;
165 DCHECK(bucket_count_ <= maximal_bucket_count);
166 DCHECK(0 == ranges_[0]);
167 ranges_[bucket_count_] = kSampleType_MAX;
168 InitializeBucketRange();
169 DCHECK(ValidateBucketRanges());
170 registered_ = StatisticsRecorder::Register(*this);
171}
172
173// Calculate what range of values are held in each bucket.
174// We have to be careful that we don't pick a ratio between starting points in
175// consecutive buckets that is sooo small, that the integer bounds are the same
176// (effectively making one bucket get no values). We need to avoid:
177// (ranges_[i] == ranges_[i + 1]
178// To avoid that, we just do a fine-grained bucket width as far as we need to
179// until we get a ratio that moves us along at least 2 units at a time. From
180// that bucket onward we do use the exponential growth of buckets.
181void Histogram::InitializeBucketRange() {
182 double log_max = log(static_cast<double>(declared_max()));
183 double log_ratio;
184 double log_next;
185 size_t bucket_index = 1;
186 Sample current = declared_min();
187 SetBucketRange(bucket_index, current);
188 while (bucket_count() > ++bucket_index) {
189 double log_current;
190 log_current = log(static_cast<double>(current));
191 // Calculate the count'th root of the range.
192 log_ratio = (log_max - log_current) / (bucket_count() - bucket_index);
193 // See where the next bucket would start.
194 log_next = log_current + log_ratio;
195 int next;
196 next = static_cast<int>(floor(exp(log_next) + 0.5));
197 if (next > current)
198 current = next;
199 else
200 current++; // Just do a narrow bucket, and keep trying.
201 SetBucketRange(bucket_index, current);
202 }
203
204 DCHECK(bucket_count() == bucket_index);
205}
206
207size_t Histogram::BucketIndex(Sample value) const {
208 // Use simple binary search. This is very general, but there are better
209 // approaches if we knew that the buckets were linearly distributed.
210 DCHECK(ranges(0) <= value);
211 DCHECK(ranges(bucket_count()) > value);
212 size_t under = 0;
213 size_t over = bucket_count();
214 size_t mid;
215
216 do {
217 DCHECK(over >= under);
218 mid = (over + under)/2;
219 if (mid == under)
220 break;
221 if (ranges(mid) <= value)
222 under = mid;
223 else
224 over = mid;
225 } while (true);
226
227 DCHECK(ranges(mid) <= value && ranges(mid+1) > value);
228 return mid;
229}
230
231// Use the actual bucket widths (like a linear histogram) until the widths get
232// over some transition value, and then use that transition width. Exponentials
233// get so big so fast (and we don't expect to see a lot of entries in the large
234// buckets), so we need this to make it possible to see what is going on and
235// not have 0-graphical-height buckets.
236double Histogram::GetBucketSize(Count current, size_t i) const {
237 DCHECK(ranges(i + 1) > ranges(i));
238 static const double kTransitionWidth = 5;
239 double denominator = ranges(i + 1) - ranges(i);
240 if (denominator > kTransitionWidth)
241 denominator = kTransitionWidth; // Stop trying to normalize.
242 return current/denominator;
243}
244
245//------------------------------------------------------------------------------
246// The following two methods can be overridden to provide a thread safe
247// version of this class. The cost of locking is low... but an error in each
248// of these methods has minimal impact. For now, I'll leave this unlocked,
249// and I don't believe I can loose more than a count or two.
250// The vectors are NOT reallocated, so there is no risk of them moving around.
251
252// Update histogram data with new sample.
253void Histogram::Accumulate(Sample value, Count count, size_t index) {
254 // Note locking not done in this version!!!
255 sample_.Accumulate(value, count, index);
256}
257
258// Do a safe atomic snapshot of sample data.
259// This implementation assumes we are on a safe single thread.
260void Histogram::SnapshotSample(SampleSet* sample) const {
261 // Note locking not done in this version!!!
262 *sample = sample_;
263}
264
265//------------------------------------------------------------------------------
266// Accessor methods
267
268void Histogram::SetBucketRange(size_t i, Sample value) {
269 DCHECK(bucket_count_ > i);
270 ranges_[i] = value;
271}
272
273//------------------------------------------------------------------------------
274// Private methods
275
276double Histogram::GetPeakBucketSize(const SampleSet& snapshot) const {
277 double max = 0;
278 for (size_t i = 0; i < bucket_count_ ; i++) {
279 double current_size = GetBucketSize(snapshot.counts(i), i);
280 if (current_size > max)
281 max = current_size;
282 }
283 return max;
284}
285
286void Histogram::WriteAsciiHeader(const SampleSet& snapshot,
287 Count sample_count,
288 std::string* output) const {
289 StringAppendF(output,
290 "Histogram: %s recorded %ld samples",
291 histogram_name().c_str(),
292 sample_count);
293 if (0 == sample_count) {
294 DCHECK(0 == snapshot.sum());
295 } else {
296 double average = static_cast<float>(snapshot.sum()) / sample_count;
297 double variance = static_cast<float>(snapshot.square_sum())/sample_count
298 - average * average;
299 double standard_deviation = sqrt(variance);
300
301 StringAppendF(output,
302 ", average = %.1f, standard deviation = %.1f",
303 average, standard_deviation);
304 }
305 if (flags_ & ~kHexRangePrintingFlag )
306 StringAppendF(output, " (flags = 0x%x)", flags_ & ~kHexRangePrintingFlag);
307}
308
309void Histogram::WriteAsciiBucketContext(const int64 past,
310 const Count current,
311 const int64 remaining,
312 const size_t i,
313 std::string* output) const {
314 double scaled_sum = (past + current + remaining) / 100.0;
315 WriteAsciiBucketValue(current, scaled_sum, output);
316 if (0 < i) {
317 double percentage = past / scaled_sum;
318 StringAppendF(output, " {%3.1f%%}", percentage);
319 }
320}
321
322const std::string Histogram::GetAsciiBucketRange(size_t i) const {
323 std::string result;
324 if (kHexRangePrintingFlag & flags_)
325 StringAppendF(&result, "%#x", ranges_[i]);
326 else
327 StringAppendF(&result, "%d", ranges_[i]);
328 return result;
329}
330
331void Histogram::WriteAsciiBucketValue(Count current, double scaled_sum,
332 std::string* output) const {
333 StringAppendF(output, " (%d = %3.1f%%)", current, current/scaled_sum);
334}
335
336void Histogram::WriteAsciiBucketGraph(double current_size, double max_size,
337 std::string* output) const {
338 const int k_line_length = 72; // Maximal horizontal width of graph.
339 int x_count = static_cast<int>(k_line_length * (current_size / max_size)
340 + 0.5);
341 int x_remainder = k_line_length - x_count;
342
343 while (0 < x_count--)
344 output->append("-");
345 output->append("O");
346 while (0 < x_remainder--)
347 output->append(" ");
348}
349
350//------------------------------------------------------------------------------
351// Methods for the Histogram::SampleSet class
352//------------------------------------------------------------------------------
353
354Histogram::SampleSet::SampleSet()
355 : counts_(),
356 sum_(0),
357 square_sum_(0) {
358}
359
360void Histogram::SampleSet::Resize(const Histogram& histogram) {
361 counts_.resize(histogram.bucket_count(), 0);
362}
363
364void Histogram::SampleSet::CheckSize(const Histogram& histogram) const {
365 DCHECK(counts_.size() == histogram.bucket_count());
366}
367
368
369void Histogram::SampleSet::Accumulate(Sample value, Count count,
370 size_t index) {
371 DCHECK(count == 1 || count == -1);
372 counts_[index] += count;
373 sum_ += count * value;
374 square_sum_ += (count * value) * static_cast<int64>(value);
375 DCHECK(counts_[index] >= 0);
376 DCHECK(sum_ >= 0);
377 DCHECK(square_sum_ >= 0);
378}
379
380Count Histogram::SampleSet::TotalCount() const {
381 Count total = 0;
382 for (Counts::const_iterator it = counts_.begin();
383 it != counts_.end();
384 it++) {
385 total += *it;
386 }
387 return total;
388}
389
390void Histogram::SampleSet::Add(const SampleSet& other) {
391 DCHECK(counts_.size() == other.counts_.size());
392 sum_ += other.sum_;
393 square_sum_ += other.square_sum_;
394 for (size_t index = 0; index < counts_.size(); index++)
395 counts_[index] += other.counts_[index];
396}
397
398void Histogram::SampleSet::Subtract(const SampleSet& other) {
399 DCHECK(counts_.size() == other.counts_.size());
400 // Note: Race conditions in snapshotting a sum or square_sum may lead to
401 // (temporary) negative values when snapshots are later combined (and deltas
402 // calculated). As a result, we don't currently CHCEK() for positive values.
403 sum_ -= other.sum_;
404 square_sum_ -= other.square_sum_;
405 for (size_t index = 0; index < counts_.size(); index++) {
406 counts_[index] -= other.counts_[index];
407 DCHECK(counts_[index] >= 0);
408 }
409}
410
411//------------------------------------------------------------------------------
412// LinearHistogram: This histogram uses a traditional set of evenly spaced
413// buckets.
414//------------------------------------------------------------------------------
415
416LinearHistogram::LinearHistogram(const wchar_t* name,
417 Sample minimum, Sample maximum, size_t bucket_count)
418 : Histogram(name, minimum >= 1 ? minimum : 1, maximum, bucket_count) {
419 InitializeBucketRange();
420 DCHECK(ValidateBucketRanges());
421}
422
423LinearHistogram::LinearHistogram(const wchar_t* name,
424 TimeDelta minimum, TimeDelta maximum, size_t bucket_count)
425 : Histogram(name, minimum >= TimeDelta::FromMilliseconds(1) ?
426 minimum : TimeDelta::FromMilliseconds(1),
427 maximum, bucket_count) {
428 // Do a "better" (different) job at init than a base classes did...
429 InitializeBucketRange();
430 DCHECK(ValidateBucketRanges());
431}
432
433void LinearHistogram::SetRangeDescriptions(const DescriptionPair descriptions[]) {
434 for (int i =0; descriptions[i].description; ++i) {
435 bucket_description_[descriptions[i].sample] = descriptions[i].description;
436 }
437}
438
439const std::string LinearHistogram::GetAsciiBucketRange(size_t i) const {
440 int range = ranges(i);
441 BucketDescriptionMap::const_iterator it = bucket_description_.find(range);
442 if (it == bucket_description_.end())
443 return Histogram::GetAsciiBucketRange(i);
444 return it->second;
445}
446
447bool LinearHistogram::PrintEmptyBucket(size_t index) const {
448 return bucket_description_.find(ranges(index)) == bucket_description_.end();
449}
450
451
452void LinearHistogram::InitializeBucketRange() {
453 DCHECK(0 < declared_min()); // 0 is the underflow bucket here.
454 double min = declared_min();
455 double max = declared_max();
456 size_t i;
457 for (i = 1; i < bucket_count(); i++) {
458 double linear_range = (min * (bucket_count() -1 - i) + max * (i - 1)) /
459 (bucket_count() - 2);
460 SetBucketRange(i, static_cast<int> (linear_range + 0.5));
461 }
462}
463
464// Find bucket to increment for sample value.
465size_t LinearHistogram::BucketIndex(Sample value) const {
466 if (value < declared_min()) return 0;
467 if (value >= declared_max()) return bucket_count() - 1;
468 size_t index;
469 index = static_cast<size_t>(((value - declared_min()) * (bucket_count() - 2))
470 / (declared_max() - declared_min()) + 1);
471 DCHECK(1 <= index && bucket_count() > index);
472 return index;
473}
474
475double LinearHistogram::GetBucketSize(Count current, size_t i) const {
476 DCHECK(ranges(i + 1) > ranges(i));
477 // Adjacent buckets with different widths would have "surprisingly" many (few)
478 // samples in a histogram if we didn't normalize this way.
479 double denominator = ranges(i + 1) - ranges(i);
480 return current/denominator;
481}
482
483//------------------------------------------------------------------------------
484// This section provides implementation for ThreadSafeHistogram.
485//------------------------------------------------------------------------------
486
487ThreadSafeHistogram::ThreadSafeHistogram(const wchar_t* name, Sample minimum,
488 Sample maximum, size_t bucket_count)
489 : Histogram(name, minimum, maximum, bucket_count),
490 lock_() {
491 }
492
493void ThreadSafeHistogram::Remove(int value) {
494 if (value >= kSampleType_MAX)
495 value = kSampleType_MAX - 1;
496 StatsRate::Add(-value);
497 size_t index = BucketIndex(value);
498 Accumulate(value, -1, index);
499}
500
501void ThreadSafeHistogram::Accumulate(Sample value, Count count, size_t index) {
502 AutoLock lock(lock_);
503 Histogram::Accumulate(value, count, index);
504}
505
506void ThreadSafeHistogram::SnapshotSample(SampleSet* sample) {
507 AutoLock lock(lock_);
508 Histogram::SnapshotSample(sample);
509};
510
511
512//------------------------------------------------------------------------------
513// The next section handles global (central) support for all histograms, as well
514// as startup/teardown of this service.
515//------------------------------------------------------------------------------
516
517// This singleton instance should be started during the single threaded portion
518// of main(), and hence it is not thread safe. It initializes globals to
519// provide support for all future calls.
520StatisticsRecorder::StatisticsRecorder() {
521 DCHECK(!histograms_);
522 lock_ = new Lock;
523 histograms_ = new HistogramMap;
524}
525
526StatisticsRecorder::~StatisticsRecorder() {
527 DCHECK(histograms_);
528
529 if (dump_on_exit_) {
530 std::string output;
531 WriteGraph("", &output);
532 LOG(INFO) << output;
533 }
534
535 // Clean up.
536 delete histograms_;
537 histograms_ = NULL;
538 delete lock_;
539 lock_ = NULL;
540}
541
542// static
543bool StatisticsRecorder::WasStarted() {
544 return NULL != histograms_;
545}
546
547// static
548bool StatisticsRecorder::Register(const Histogram& histogram) {
549 if (!histograms_)
550 return false;
551 const std::string name = histogram.histogram_name();
552 AutoLock auto_lock(*lock_);
553 DCHECK(histograms_->end() == histograms_->find(name)); // Only register once.
554 (*histograms_)[name] = &histogram;
555 return true;
556}
557
558// static
559void StatisticsRecorder::UnRegister(const Histogram& histogram) {
560 if (!histograms_)
561 return;
562 const std::string name = histogram.histogram_name();
563 AutoLock auto_lock(*lock_);
564 DCHECK(histograms_->end() != histograms_->find(name));
565 histograms_->erase(name);
566 if (dump_on_exit_) {
567 std::string output;
568 histogram.WriteAscii(true, "\n", &output);
569 LOG(INFO) << output;
570 }
571}
572
573// static
574void StatisticsRecorder::WriteHTMLGraph(const std::string& query,
575 std::string* output) {
576 if (!histograms_)
577 return;
578 output->append("<html><head><title>About Histograms");
579 if (!query.empty())
580 output->append(" - " + query);
581 output->append("</title>"
582 // We'd like the following no-cache... but it doesn't work.
583 // "<META HTTP-EQUIV=\"Pragma\" CONTENT=\"no-cache\">"
584 "</head><body>");
585
586 Histograms snapshot;
587 GetSnapshot(query, &snapshot);
588 for (Histograms::iterator it = snapshot.begin();
589 it != snapshot.end();
590 it++) {
591 (*it)->WriteHTMLGraph(output);
592 output->append("<br><hr><br>");
593 }
594 output->append("</body></html>");
595}
596
597// static
598void StatisticsRecorder::WriteGraph(const std::string& query,
599 std::string* output) {
600 if (!histograms_)
601 return;
602 if (query.length())
603 StringAppendF(output, "Collections of histograms for %s\n", query.c_str());
604 else
605 output->append("Collections of all histograms\n");
606
607 Histograms snapshot;
608 GetSnapshot(query, &snapshot);
609 for (Histograms::iterator it = snapshot.begin();
610 it != snapshot.end();
611 it++) {
612 (*it)->WriteAscii(true, "\n", output);
613 output->append("\n");
614 }
615}
616
617// static
618void StatisticsRecorder::GetHistograms(Histograms* output) {
619 if (!histograms_)
620 return;
621 AutoLock auto_lock(*lock_);
622 for (HistogramMap::iterator it = histograms_->begin();
623 histograms_->end() != it;
624 it++) {
625 output->push_back(it->second);
626 }
627}
628
629// private static
630void StatisticsRecorder::GetSnapshot(const std::string& query,
631 Histograms* snapshot) {
632 AutoLock auto_lock(*lock_);
633 for (HistogramMap::iterator it = histograms_->begin();
634 histograms_->end() != it;
635 it++) {
636 if (it->first.find(query) != std::string::npos)
637 snapshot->push_back(it->second);
638 }
639}
640
641// static
642StatisticsRecorder::HistogramMap* StatisticsRecorder::histograms_ = NULL;
643// static
644Lock* StatisticsRecorder::lock_ = NULL;
645// static
646bool StatisticsRecorder::dump_on_exit_ = false;
license.botf003cfe2008-08-24 09:55:55 +0900647