blob: d1f43afe4ae4e2e2b886dd24efe719bf40f13679 [file] [log] [blame]
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001/** @file kmp_stats.cpp
2 * Statistics gathering and processing.
3 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
Jim Cownie4cc4bb42014-10-07 16:25:50 +000015#include "kmp.h"
16#include "kmp_str.h"
17#include "kmp_lock.h"
18#include "kmp_stats.h"
19
20#include <algorithm>
21#include <sstream>
22#include <iomanip>
23#include <stdlib.h> // for atexit
24
25#define STRINGIZE2(x) #x
26#define STRINGIZE(x) STRINGIZE2(x)
27
28#define expandName(name,flags,ignore) {STRINGIZE(name),flags},
29statInfo timeStat::timerInfo[] = {
30 KMP_FOREACH_TIMER(expandName,0)
31 {0,0}
32};
33const statInfo counter::counterInfo[] = {
34 KMP_FOREACH_COUNTER(expandName,0)
35 {0,0}
36};
37#undef expandName
38
39#define expandName(ignore1,ignore2,ignore3) {0.0,0.0,0.0},
40kmp_stats_output_module::rgb_color kmp_stats_output_module::timerColorInfo[] = {
41 KMP_FOREACH_TIMER(expandName,0)
42 {0.0,0.0,0.0}
43};
44#undef expandName
45
46const kmp_stats_output_module::rgb_color kmp_stats_output_module::globalColorArray[] = {
47 {1.0, 0.0, 0.0}, // red
48 {1.0, 0.6, 0.0}, // orange
49 {1.0, 1.0, 0.0}, // yellow
50 {0.0, 1.0, 0.0}, // green
51 {0.0, 0.0, 1.0}, // blue
52 {0.6, 0.2, 0.8}, // purple
53 {1.0, 0.0, 1.0}, // magenta
54 {0.0, 0.4, 0.2}, // dark green
55 {1.0, 1.0, 0.6}, // light yellow
56 {0.6, 0.4, 0.6}, // dirty purple
57 {0.0, 1.0, 1.0}, // cyan
58 {1.0, 0.4, 0.8}, // pink
59 {0.5, 0.5, 0.5}, // grey
60 {0.8, 0.7, 0.5}, // brown
61 {0.6, 0.6, 1.0}, // light blue
62 {1.0, 0.7, 0.5}, // peach
63 {0.8, 0.5, 1.0}, // lavender
64 {0.6, 0.0, 0.0}, // dark red
65 {0.7, 0.6, 0.0}, // gold
66 {0.0, 0.0, 0.0} // black
67};
68
69// Ensure that the atexit handler only runs once.
70static uint32_t statsPrinted = 0;
71
72// output interface
73static kmp_stats_output_module __kmp_stats_global_output;
74
75/* ****************************************************** */
76/* ************* statistic member functions ************* */
77
78void statistic::addSample(double sample)
79{
80 double delta = sample - meanVal;
81
82 sampleCount = sampleCount + 1;
83 meanVal = meanVal + delta/sampleCount;
84 m2 = m2 + delta*(sample - meanVal);
85
86 minVal = std::min(minVal, sample);
87 maxVal = std::max(maxVal, sample);
88}
89
90statistic & statistic::operator+= (const statistic & other)
91{
92 if (sampleCount == 0)
93 {
94 *this = other;
95 return *this;
96 }
97
98 uint64_t newSampleCount = sampleCount + other.sampleCount;
99 double dnsc = double(newSampleCount);
100 double dsc = double(sampleCount);
101 double dscBydnsc = dsc/dnsc;
102 double dosc = double(other.sampleCount);
103 double delta = other.meanVal - meanVal;
104
105 // Try to order these calculations to avoid overflows.
106 // If this were Fortran, then the compiler would not be able to re-order over brackets.
107 // In C++ it may be legal to do that (we certainly hope it doesn't, and CC+ Programming Language 2nd edition
108 // suggests it shouldn't, since it says that exploitation of associativity can only be made if the operation
109 // really is associative (which floating addition isn't...)).
110 meanVal = meanVal*dscBydnsc + other.meanVal*(1-dscBydnsc);
111 m2 = m2 + other.m2 + dscBydnsc*dosc*delta*delta;
112 minVal = std::min (minVal, other.minVal);
113 maxVal = std::max (maxVal, other.maxVal);
114 sampleCount = newSampleCount;
115
116
117 return *this;
118}
119
120void statistic::scale(double factor)
121{
122 minVal = minVal*factor;
123 maxVal = maxVal*factor;
124 meanVal= meanVal*factor;
125 m2 = m2*factor*factor;
126 return;
127}
128
129std::string statistic::format(char unit, bool total) const
130{
131 std::string result = formatSI(sampleCount,9,' ');
132
133 result = result + std::string(", ") + formatSI(minVal, 9, unit);
134 result = result + std::string(", ") + formatSI(meanVal, 9, unit);
135 result = result + std::string(", ") + formatSI(maxVal, 9, unit);
136 if (total)
137 result = result + std::string(", ") + formatSI(meanVal*sampleCount, 9, unit);
138 result = result + std::string(", ") + formatSI(getSD(), 9, unit);
139
140 return result;
141}
142
143/* ********************************************************** */
144/* ************* explicitTimer member functions ************* */
145
146void explicitTimer::start(timer_e timerEnumValue) {
147 startTime = tsc_tick_count::now();
148 if(timeStat::logEvent(timerEnumValue)) {
149 __kmp_stats_thread_ptr->incrementNestValue();
150 }
151 return;
152}
153
154void explicitTimer::stop(timer_e timerEnumValue) {
155 if (startTime.getValue() == 0)
156 return;
157
158 tsc_tick_count finishTime = tsc_tick_count::now();
159
160 //stat->addSample ((tsc_tick_count::now() - startTime).ticks());
161 stat->addSample ((finishTime - startTime).ticks());
162
163 if(timeStat::logEvent(timerEnumValue)) {
164 __kmp_stats_thread_ptr->push_event(startTime.getValue() - __kmp_stats_start_time.getValue(), finishTime.getValue() - __kmp_stats_start_time.getValue(), __kmp_stats_thread_ptr->getNestValue(), timerEnumValue);
165 __kmp_stats_thread_ptr->decrementNestValue();
166 }
167
168 /* We accept the risk that we drop a sample because it really did start at t==0. */
169 startTime = 0;
170 return;
171}
172
173/* ******************************************************************* */
174/* ************* kmp_stats_event_vector member functions ************* */
175
176void kmp_stats_event_vector::deallocate() {
177 __kmp_free(events);
178 internal_size = 0;
179 allocated_size = 0;
180 events = NULL;
181}
182
183// This function is for qsort() which requires the compare function to return
184// either a negative number if event1 < event2, a positive number if event1 > event2
185// or zero if event1 == event2.
186// This sorts by start time (lowest to highest).
187int compare_two_events(const void* event1, const void* event2) {
188 kmp_stats_event* ev1 = (kmp_stats_event*)event1;
189 kmp_stats_event* ev2 = (kmp_stats_event*)event2;
190
191 if(ev1->getStart() < ev2->getStart()) return -1;
192 else if(ev1->getStart() > ev2->getStart()) return 1;
193 else return 0;
194}
195
196void kmp_stats_event_vector::sort() {
197 qsort(events, internal_size, sizeof(kmp_stats_event), compare_two_events);
198}
199
200/* *********************************************************** */
201/* ************* kmp_stats_list member functions ************* */
202
203// returns a pointer to newly created stats node
204kmp_stats_list* kmp_stats_list::push_back(int gtid) {
205 kmp_stats_list* newnode = (kmp_stats_list*)__kmp_allocate(sizeof(kmp_stats_list));
206 // placement new, only requires space and pointer and initializes (so __kmp_allocate instead of C++ new[] is used)
207 new (newnode) kmp_stats_list();
208 newnode->setGtid(gtid);
209 newnode->prev = this->prev;
210 newnode->next = this;
211 newnode->prev->next = newnode;
212 newnode->next->prev = newnode;
213 return newnode;
214}
215void kmp_stats_list::deallocate() {
216 kmp_stats_list* ptr = this->next;
217 kmp_stats_list* delptr = this->next;
218 while(ptr != this) {
219 delptr = ptr;
220 ptr=ptr->next;
221 // placement new means we have to explicitly call destructor.
222 delptr->_event_vector.deallocate();
223 delptr->~kmp_stats_list();
224 __kmp_free(delptr);
225 }
226}
227kmp_stats_list::iterator kmp_stats_list::begin() {
228 kmp_stats_list::iterator it;
229 it.ptr = this->next;
230 return it;
231}
232kmp_stats_list::iterator kmp_stats_list::end() {
233 kmp_stats_list::iterator it;
234 it.ptr = this;
235 return it;
236}
237int kmp_stats_list::size() {
238 int retval;
239 kmp_stats_list::iterator it;
240 for(retval=0, it=begin(); it!=end(); it++, retval++) {}
241 return retval;
242}
243
244/* ********************************************************************* */
245/* ************* kmp_stats_list::iterator member functions ************* */
246
247kmp_stats_list::iterator::iterator() : ptr(NULL) {}
248kmp_stats_list::iterator::~iterator() {}
249kmp_stats_list::iterator kmp_stats_list::iterator::operator++() {
250 this->ptr = this->ptr->next;
251 return *this;
252}
253kmp_stats_list::iterator kmp_stats_list::iterator::operator++(int dummy) {
254 this->ptr = this->ptr->next;
255 return *this;
256}
257kmp_stats_list::iterator kmp_stats_list::iterator::operator--() {
258 this->ptr = this->ptr->prev;
259 return *this;
260}
261kmp_stats_list::iterator kmp_stats_list::iterator::operator--(int dummy) {
262 this->ptr = this->ptr->prev;
263 return *this;
264}
265bool kmp_stats_list::iterator::operator!=(const kmp_stats_list::iterator & rhs) {
266 return this->ptr!=rhs.ptr;
267}
268bool kmp_stats_list::iterator::operator==(const kmp_stats_list::iterator & rhs) {
269 return this->ptr==rhs.ptr;
270}
271kmp_stats_list* kmp_stats_list::iterator::operator*() const {
272 return this->ptr;
273}
274
275/* *************************************************************** */
276/* ************* kmp_stats_output_module functions ************** */
277
278const char* kmp_stats_output_module::outputFileName = NULL;
279const char* kmp_stats_output_module::eventsFileName = NULL;
280const char* kmp_stats_output_module::plotFileName = NULL;
281int kmp_stats_output_module::printPerThreadFlag = 0;
282int kmp_stats_output_module::printPerThreadEventsFlag = 0;
283
284// init() is called very near the beginning of execution time in the constructor of __kmp_stats_global_output
285void kmp_stats_output_module::init()
286{
287 char * statsFileName = getenv("KMP_STATS_FILE");
288 eventsFileName = getenv("KMP_STATS_EVENTS_FILE");
289 plotFileName = getenv("KMP_STATS_PLOT_FILE");
290 char * threadStats = getenv("KMP_STATS_THREADS");
291 char * threadEvents = getenv("KMP_STATS_EVENTS");
292
293 // set the stats output filenames based on environment variables and defaults
294 outputFileName = statsFileName;
295 eventsFileName = eventsFileName ? eventsFileName : "events.dat";
296 plotFileName = plotFileName ? plotFileName : "events.plt";
297
298 // set the flags based on environment variables matching: true, on, 1, .true. , .t. , yes
299 printPerThreadFlag = __kmp_str_match_true(threadStats);
300 printPerThreadEventsFlag = __kmp_str_match_true(threadEvents);
301
302 if(printPerThreadEventsFlag) {
303 // assigns a color to each timer for printing
304 setupEventColors();
305 } else {
306 // will clear flag so that no event will be logged
307 timeStat::clearEventFlags();
308 }
309
310 return;
311}
312
313void kmp_stats_output_module::setupEventColors() {
314 int i;
315 int globalColorIndex = 0;
316 int numGlobalColors = sizeof(globalColorArray) / sizeof(rgb_color);
317 for(i=0;i<TIMER_LAST;i++) {
318 if(timeStat::logEvent((timer_e)i)) {
319 timerColorInfo[i] = globalColorArray[globalColorIndex];
320 globalColorIndex = (globalColorIndex+1)%numGlobalColors;
321 }
322 }
323 return;
324}
325
326void kmp_stats_output_module::printStats(FILE *statsOut, statistic const * theStats, bool areTimers)
327{
328 if (areTimers)
329 {
330 // Check if we have useful timers, since we don't print zero value timers we need to avoid
331 // printing a header and then no data.
332 bool haveTimers = false;
333 for (int s = 0; s<TIMER_LAST; s++)
334 {
335 if (theStats[s].getCount() != 0)
336 {
337 haveTimers = true;
338 break;
339 }
340 }
341 if (!haveTimers)
342 return;
343 }
344
345 // Print
346 const char * title = areTimers ? "Timer, SampleCount," : "Counter, ThreadCount,";
347 fprintf (statsOut, "%s Min, Mean, Max, Total, SD\n", title);
348 if (areTimers) {
349 for (int s = 0; s<TIMER_LAST; s++) {
350 statistic const * stat = &theStats[s];
351 if (stat->getCount() != 0) {
352 char tag = timeStat::noUnits(timer_e(s)) ? ' ' : 'T';
353 fprintf (statsOut, "%-25s, %s\n", timeStat::name(timer_e(s)), stat->format(tag, true).c_str());
354 }
355 }
356 } else { // Counters
357 for (int s = 0; s<COUNTER_LAST; s++) {
358 statistic const * stat = &theStats[s];
359 fprintf (statsOut, "%-25s, %s\n", counter::name(counter_e(s)), stat->format(' ', true).c_str());
360 }
361 }
362}
363
364void kmp_stats_output_module::printCounters(FILE * statsOut, counter const * theCounters)
365{
366 // We print all the counters even if they are zero.
367 // That makes it easier to slice them into a spreadsheet if you need to.
368 fprintf (statsOut, "\nCounter, Count\n");
369 for (int c = 0; c<COUNTER_LAST; c++) {
370 counter const * stat = &theCounters[c];
371 fprintf (statsOut, "%-25s, %s\n", counter::name(counter_e(c)), formatSI(stat->getValue(), 9, ' ').c_str());
372 }
373}
374
375void kmp_stats_output_module::printEvents(FILE* eventsOut, kmp_stats_event_vector* theEvents, int gtid) {
376 // sort by start time before printing
377 theEvents->sort();
378 for (int i = 0; i < theEvents->size(); i++) {
379 kmp_stats_event ev = theEvents->at(i);
380 rgb_color color = getEventColor(ev.getTimerName());
381 fprintf(eventsOut, "%d %lu %lu %1.1f rgb(%1.1f,%1.1f,%1.1f) %s\n",
382 gtid,
383 ev.getStart(),
384 ev.getStop(),
385 1.2 - (ev.getNestLevel() * 0.2),
386 color.r, color.g, color.b,
387 timeStat::name(ev.getTimerName())
388 );
389 }
390 return;
391}
392
393void kmp_stats_output_module::windupExplicitTimers()
394{
395 // Wind up any explicit timers. We assume that it's fair at this point to just walk all the explcit timers in all threads
396 // and say "it's over".
397 // If the timer wasn't running, this won't record anything anyway.
398 kmp_stats_list::iterator it;
399 for(it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) {
400 for (int timer=0; timer<EXPLICIT_TIMER_LAST; timer++) {
401 (*it)->getExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer);
402 }
403 }
404}
405
406void kmp_stats_output_module::printPloticusFile() {
407 int i;
408 int size = __kmp_stats_list.size();
409 FILE* plotOut = fopen(plotFileName, "w+");
410
411 fprintf(plotOut, "#proc page\n"
412 " pagesize: 15 10\n"
413 " scale: 1.0\n\n");
414
415 fprintf(plotOut, "#proc getdata\n"
416 " file: %s\n\n",
417 eventsFileName);
418
419 fprintf(plotOut, "#proc areadef\n"
420 " title: OpenMP Sampling Timeline\n"
421 " titledetails: align=center size=16\n"
422 " rectangle: 1 1 13 9\n"
423 " xautorange: datafield=2,3\n"
424 " yautorange: -1 %d\n\n",
425 size);
426
427 fprintf(plotOut, "#proc xaxis\n"
428 " stubs: inc\n"
429 " stubdetails: size=12\n"
430 " label: Time (ticks)\n"
431 " labeldetails: size=14\n\n");
432
433 fprintf(plotOut, "#proc yaxis\n"
434 " stubs: inc 1\n"
435 " stubrange: 0 %d\n"
436 " stubdetails: size=12\n"
437 " label: Thread #\n"
438 " labeldetails: size=14\n\n",
439 size-1);
440
441 fprintf(plotOut, "#proc bars\n"
442 " exactcolorfield: 5\n"
443 " axis: x\n"
444 " locfield: 1\n"
445 " segmentfields: 2 3\n"
446 " barwidthfield: 4\n\n");
447
448 // create legend entries corresponding to the timer color
449 for(i=0;i<TIMER_LAST;i++) {
450 if(timeStat::logEvent((timer_e)i)) {
451 rgb_color c = getEventColor((timer_e)i);
452 fprintf(plotOut, "#proc legendentry\n"
453 " sampletype: color\n"
454 " label: %s\n"
455 " details: rgb(%1.1f,%1.1f,%1.1f)\n\n",
456 timeStat::name((timer_e)i),
457 c.r, c.g, c.b);
458
459 }
460 }
461
462 fprintf(plotOut, "#proc legend\n"
463 " format: down\n"
464 " location: max max\n\n");
465 fclose(plotOut);
466 return;
467}
468
469void kmp_stats_output_module::outputStats(const char* heading)
470{
471 statistic allStats[TIMER_LAST];
472 statistic allCounters[COUNTER_LAST];
473
474 // stop all the explicit timers for all threads
475 windupExplicitTimers();
476
477 FILE * eventsOut;
478 FILE * statsOut = outputFileName ? fopen (outputFileName, "a+") : stderr;
479
480 if (eventPrintingEnabled()) {
481 eventsOut = fopen(eventsFileName, "w+");
482 }
483
484 if (!statsOut)
485 statsOut = stderr;
486
487 fprintf(statsOut, "%s\n",heading);
488 // Accumulate across threads.
489 kmp_stats_list::iterator it;
490 for (it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) {
491 int t = (*it)->getGtid();
492 // Output per thread stats if requested.
493 if (perThreadPrintingEnabled()) {
494 fprintf (statsOut, "Thread %d\n", t);
495 printStats(statsOut, (*it)->getTimers(), true);
496 printCounters(statsOut, (*it)->getCounters());
497 fprintf(statsOut,"\n");
498 }
499 // Output per thread events if requested.
500 if (eventPrintingEnabled()) {
501 kmp_stats_event_vector events = (*it)->getEventVector();
502 printEvents(eventsOut, &events, t);
503 }
504
505 for (int s = 0; s<TIMER_LAST; s++) {
506 // See if we should ignore this timer when aggregating
507 if ((timeStat::masterOnly(timer_e(s)) && (t != 0)) || // Timer is only valid on the master and this thread is a worker
508 (timeStat::workerOnly(timer_e(s)) && (t == 0)) || // Timer is only valid on a worker and this thread is the master
509 timeStat::synthesized(timer_e(s)) // It's a synthesized stat, so there's no raw data for it.
510 )
511 {
512 continue;
513 }
514
515 statistic * threadStat = (*it)->getTimer(timer_e(s));
516 allStats[s] += *threadStat;
517 }
518
519 // Special handling for synthesized statistics.
520 // These just have to be coded specially here for now.
Jonathan Peyton45be4502015-08-11 21:36:41 +0000521 // At present we only have a few:
522 // The total parallel work done in each thread.
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000523 // The variance here makes it easy to see load imbalance over the whole program (though, of course,
524 // it's possible to have a code with awful load balance in every parallel region but perfect load
525 // balance oever the whole program.)
Jonathan Peyton45be4502015-08-11 21:36:41 +0000526 // The time spent in barriers in each thread.
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000527 allStats[TIMER_Total_work].addSample ((*it)->getTimer(TIMER_OMP_work)->getTotal());
528
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000529 // Time in explicit barriers.
530 allStats[TIMER_Total_barrier].addSample ((*it)->getTimer(TIMER_OMP_barrier)->getTotal());
531
532 for (int c = 0; c<COUNTER_LAST; c++) {
533 if (counter::masterOnly(counter_e(c)) && t != 0)
534 continue;
535 allCounters[c].addSample ((*it)->getCounter(counter_e(c))->getValue());
536 }
537 }
538
539 if (eventPrintingEnabled()) {
540 printPloticusFile();
541 fclose(eventsOut);
542 }
543
544 fprintf (statsOut, "Aggregate for all threads\n");
545 printStats (statsOut, &allStats[0], true);
546 fprintf (statsOut, "\n");
547 printStats (statsOut, &allCounters[0], false);
548
549 if (statsOut != stderr)
550 fclose(statsOut);
551
552}
553
554/* ************************************************** */
555/* ************* exported C functions ************** */
556
557// no name mangling for these functions, we want the c files to be able to get at these functions
558extern "C" {
559
560void __kmp_reset_stats()
561{
562 kmp_stats_list::iterator it;
563 for(it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) {
564 timeStat * timers = (*it)->getTimers();
565 counter * counters = (*it)->getCounters();
566 explicitTimer * eTimers = (*it)->getExplicitTimers();
567
568 for (int t = 0; t<TIMER_LAST; t++)
569 timers[t].reset();
570
571 for (int c = 0; c<COUNTER_LAST; c++)
572 counters[c].reset();
573
574 for (int t=0; t<EXPLICIT_TIMER_LAST; t++)
575 eTimers[t].reset();
576
577 // reset the event vector so all previous events are "erased"
578 (*it)->resetEventVector();
579
580 // May need to restart the explicit timers in thread zero?
581 }
582 KMP_START_EXPLICIT_TIMER(OMP_serial);
583 KMP_START_EXPLICIT_TIMER(OMP_start_end);
584}
585
586// This function will reset all stats and stop all threads' explicit timers if they haven't been stopped already.
587void __kmp_output_stats(const char * heading)
588{
589 __kmp_stats_global_output.outputStats(heading);
590 __kmp_reset_stats();
591}
592
593void __kmp_accumulate_stats_at_exit(void)
594{
595 // Only do this once.
596 if (KMP_XCHG_FIXED32(&statsPrinted, 1) != 0)
597 return;
598
599 __kmp_output_stats("Statistics on exit");
600 return;
601}
602
603void __kmp_stats_init(void)
604{
605 return;
606}
607
608} // extern "C"
609