blob: b249e2e635c565a59a6cc0a79090d07f3794381b [file] [log] [blame]
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001/** @file kmp_stats.cpp
2 * Statistics gathering and processing.
3 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
Jim Cownie4cc4bb42014-10-07 16:25:50 +000015#include "kmp.h"
16#include "kmp_str.h"
17#include "kmp_lock.h"
18#include "kmp_stats.h"
19
20#include <algorithm>
21#include <sstream>
22#include <iomanip>
23#include <stdlib.h> // for atexit
24
25#define STRINGIZE2(x) #x
26#define STRINGIZE(x) STRINGIZE2(x)
27
28#define expandName(name,flags,ignore) {STRINGIZE(name),flags},
29statInfo timeStat::timerInfo[] = {
30 KMP_FOREACH_TIMER(expandName,0)
31 {0,0}
32};
33const statInfo counter::counterInfo[] = {
34 KMP_FOREACH_COUNTER(expandName,0)
35 {0,0}
36};
37#undef expandName
38
39#define expandName(ignore1,ignore2,ignore3) {0.0,0.0,0.0},
40kmp_stats_output_module::rgb_color kmp_stats_output_module::timerColorInfo[] = {
41 KMP_FOREACH_TIMER(expandName,0)
42 {0.0,0.0,0.0}
43};
44#undef expandName
45
46const kmp_stats_output_module::rgb_color kmp_stats_output_module::globalColorArray[] = {
47 {1.0, 0.0, 0.0}, // red
48 {1.0, 0.6, 0.0}, // orange
49 {1.0, 1.0, 0.0}, // yellow
50 {0.0, 1.0, 0.0}, // green
51 {0.0, 0.0, 1.0}, // blue
52 {0.6, 0.2, 0.8}, // purple
53 {1.0, 0.0, 1.0}, // magenta
54 {0.0, 0.4, 0.2}, // dark green
55 {1.0, 1.0, 0.6}, // light yellow
56 {0.6, 0.4, 0.6}, // dirty purple
57 {0.0, 1.0, 1.0}, // cyan
58 {1.0, 0.4, 0.8}, // pink
59 {0.5, 0.5, 0.5}, // grey
60 {0.8, 0.7, 0.5}, // brown
61 {0.6, 0.6, 1.0}, // light blue
62 {1.0, 0.7, 0.5}, // peach
63 {0.8, 0.5, 1.0}, // lavender
64 {0.6, 0.0, 0.0}, // dark red
65 {0.7, 0.6, 0.0}, // gold
66 {0.0, 0.0, 0.0} // black
67};
68
69// Ensure that the atexit handler only runs once.
70static uint32_t statsPrinted = 0;
71
72// output interface
73static kmp_stats_output_module __kmp_stats_global_output;
74
75/* ****************************************************** */
76/* ************* statistic member functions ************* */
77
78void statistic::addSample(double sample)
79{
80 double delta = sample - meanVal;
81
82 sampleCount = sampleCount + 1;
83 meanVal = meanVal + delta/sampleCount;
84 m2 = m2 + delta*(sample - meanVal);
85
86 minVal = std::min(minVal, sample);
87 maxVal = std::max(maxVal, sample);
88}
89
90statistic & statistic::operator+= (const statistic & other)
91{
92 if (sampleCount == 0)
93 {
94 *this = other;
95 return *this;
96 }
97
98 uint64_t newSampleCount = sampleCount + other.sampleCount;
99 double dnsc = double(newSampleCount);
100 double dsc = double(sampleCount);
101 double dscBydnsc = dsc/dnsc;
102 double dosc = double(other.sampleCount);
103 double delta = other.meanVal - meanVal;
104
105 // Try to order these calculations to avoid overflows.
106 // If this were Fortran, then the compiler would not be able to re-order over brackets.
107 // In C++ it may be legal to do that (we certainly hope it doesn't, and CC+ Programming Language 2nd edition
108 // suggests it shouldn't, since it says that exploitation of associativity can only be made if the operation
109 // really is associative (which floating addition isn't...)).
110 meanVal = meanVal*dscBydnsc + other.meanVal*(1-dscBydnsc);
111 m2 = m2 + other.m2 + dscBydnsc*dosc*delta*delta;
112 minVal = std::min (minVal, other.minVal);
113 maxVal = std::max (maxVal, other.maxVal);
114 sampleCount = newSampleCount;
115
116
117 return *this;
118}
119
120void statistic::scale(double factor)
121{
122 minVal = minVal*factor;
123 maxVal = maxVal*factor;
124 meanVal= meanVal*factor;
125 m2 = m2*factor*factor;
126 return;
127}
128
129std::string statistic::format(char unit, bool total) const
130{
131 std::string result = formatSI(sampleCount,9,' ');
Jonathan Peytonc1a7c972016-03-03 21:24:13 +0000132
133 if (sampleCount == 0)
134 {
135 result = result + std::string(", ") + formatSI(0.0, 9, unit);
136 result = result + std::string(", ") + formatSI(0.0, 9, unit);
137 result = result + std::string(", ") + formatSI(0.0, 9, unit);
138 if (total)
139 result = result + std::string(", ") + formatSI(0.0, 9, unit);
140 result = result + std::string(", ") + formatSI(0.0, 9, unit);
141 }
142 else
143 {
144 result = result + std::string(", ") + formatSI(minVal, 9, unit);
145 result = result + std::string(", ") + formatSI(meanVal, 9, unit);
146 result = result + std::string(", ") + formatSI(maxVal, 9, unit);
147 if (total)
148 result = result + std::string(", ") + formatSI(meanVal*sampleCount, 9, unit);
149 result = result + std::string(", ") + formatSI(getSD(), 9, unit);
150 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000151 return result;
152}
153
154/* ********************************************************** */
155/* ************* explicitTimer member functions ************* */
156
157void explicitTimer::start(timer_e timerEnumValue) {
158 startTime = tsc_tick_count::now();
159 if(timeStat::logEvent(timerEnumValue)) {
160 __kmp_stats_thread_ptr->incrementNestValue();
161 }
162 return;
163}
164
165void explicitTimer::stop(timer_e timerEnumValue) {
166 if (startTime.getValue() == 0)
167 return;
168
169 tsc_tick_count finishTime = tsc_tick_count::now();
170
171 //stat->addSample ((tsc_tick_count::now() - startTime).ticks());
172 stat->addSample ((finishTime - startTime).ticks());
173
174 if(timeStat::logEvent(timerEnumValue)) {
175 __kmp_stats_thread_ptr->push_event(startTime.getValue() - __kmp_stats_start_time.getValue(), finishTime.getValue() - __kmp_stats_start_time.getValue(), __kmp_stats_thread_ptr->getNestValue(), timerEnumValue);
176 __kmp_stats_thread_ptr->decrementNestValue();
177 }
178
179 /* We accept the risk that we drop a sample because it really did start at t==0. */
180 startTime = 0;
181 return;
182}
183
184/* ******************************************************************* */
185/* ************* kmp_stats_event_vector member functions ************* */
186
187void kmp_stats_event_vector::deallocate() {
188 __kmp_free(events);
189 internal_size = 0;
190 allocated_size = 0;
191 events = NULL;
192}
193
194// This function is for qsort() which requires the compare function to return
195// either a negative number if event1 < event2, a positive number if event1 > event2
196// or zero if event1 == event2.
197// This sorts by start time (lowest to highest).
198int compare_two_events(const void* event1, const void* event2) {
199 kmp_stats_event* ev1 = (kmp_stats_event*)event1;
200 kmp_stats_event* ev2 = (kmp_stats_event*)event2;
201
202 if(ev1->getStart() < ev2->getStart()) return -1;
203 else if(ev1->getStart() > ev2->getStart()) return 1;
204 else return 0;
205}
206
207void kmp_stats_event_vector::sort() {
208 qsort(events, internal_size, sizeof(kmp_stats_event), compare_two_events);
209}
210
211/* *********************************************************** */
212/* ************* kmp_stats_list member functions ************* */
213
214// returns a pointer to newly created stats node
215kmp_stats_list* kmp_stats_list::push_back(int gtid) {
216 kmp_stats_list* newnode = (kmp_stats_list*)__kmp_allocate(sizeof(kmp_stats_list));
217 // placement new, only requires space and pointer and initializes (so __kmp_allocate instead of C++ new[] is used)
218 new (newnode) kmp_stats_list();
219 newnode->setGtid(gtid);
220 newnode->prev = this->prev;
221 newnode->next = this;
222 newnode->prev->next = newnode;
223 newnode->next->prev = newnode;
224 return newnode;
225}
226void kmp_stats_list::deallocate() {
227 kmp_stats_list* ptr = this->next;
228 kmp_stats_list* delptr = this->next;
229 while(ptr != this) {
230 delptr = ptr;
231 ptr=ptr->next;
232 // placement new means we have to explicitly call destructor.
233 delptr->_event_vector.deallocate();
234 delptr->~kmp_stats_list();
235 __kmp_free(delptr);
236 }
237}
238kmp_stats_list::iterator kmp_stats_list::begin() {
239 kmp_stats_list::iterator it;
240 it.ptr = this->next;
241 return it;
242}
243kmp_stats_list::iterator kmp_stats_list::end() {
244 kmp_stats_list::iterator it;
245 it.ptr = this;
246 return it;
247}
248int kmp_stats_list::size() {
249 int retval;
250 kmp_stats_list::iterator it;
251 for(retval=0, it=begin(); it!=end(); it++, retval++) {}
252 return retval;
253}
254
255/* ********************************************************************* */
256/* ************* kmp_stats_list::iterator member functions ************* */
257
258kmp_stats_list::iterator::iterator() : ptr(NULL) {}
259kmp_stats_list::iterator::~iterator() {}
260kmp_stats_list::iterator kmp_stats_list::iterator::operator++() {
261 this->ptr = this->ptr->next;
262 return *this;
263}
264kmp_stats_list::iterator kmp_stats_list::iterator::operator++(int dummy) {
265 this->ptr = this->ptr->next;
266 return *this;
267}
268kmp_stats_list::iterator kmp_stats_list::iterator::operator--() {
269 this->ptr = this->ptr->prev;
270 return *this;
271}
272kmp_stats_list::iterator kmp_stats_list::iterator::operator--(int dummy) {
273 this->ptr = this->ptr->prev;
274 return *this;
275}
276bool kmp_stats_list::iterator::operator!=(const kmp_stats_list::iterator & rhs) {
277 return this->ptr!=rhs.ptr;
278}
279bool kmp_stats_list::iterator::operator==(const kmp_stats_list::iterator & rhs) {
280 return this->ptr==rhs.ptr;
281}
282kmp_stats_list* kmp_stats_list::iterator::operator*() const {
283 return this->ptr;
284}
285
286/* *************************************************************** */
287/* ************* kmp_stats_output_module functions ************** */
288
289const char* kmp_stats_output_module::outputFileName = NULL;
290const char* kmp_stats_output_module::eventsFileName = NULL;
291const char* kmp_stats_output_module::plotFileName = NULL;
292int kmp_stats_output_module::printPerThreadFlag = 0;
293int kmp_stats_output_module::printPerThreadEventsFlag = 0;
294
295// init() is called very near the beginning of execution time in the constructor of __kmp_stats_global_output
296void kmp_stats_output_module::init()
297{
298 char * statsFileName = getenv("KMP_STATS_FILE");
299 eventsFileName = getenv("KMP_STATS_EVENTS_FILE");
300 plotFileName = getenv("KMP_STATS_PLOT_FILE");
301 char * threadStats = getenv("KMP_STATS_THREADS");
302 char * threadEvents = getenv("KMP_STATS_EVENTS");
303
304 // set the stats output filenames based on environment variables and defaults
305 outputFileName = statsFileName;
306 eventsFileName = eventsFileName ? eventsFileName : "events.dat";
307 plotFileName = plotFileName ? plotFileName : "events.plt";
308
309 // set the flags based on environment variables matching: true, on, 1, .true. , .t. , yes
310 printPerThreadFlag = __kmp_str_match_true(threadStats);
311 printPerThreadEventsFlag = __kmp_str_match_true(threadEvents);
312
313 if(printPerThreadEventsFlag) {
314 // assigns a color to each timer for printing
315 setupEventColors();
316 } else {
317 // will clear flag so that no event will be logged
318 timeStat::clearEventFlags();
319 }
320
321 return;
322}
323
324void kmp_stats_output_module::setupEventColors() {
325 int i;
326 int globalColorIndex = 0;
327 int numGlobalColors = sizeof(globalColorArray) / sizeof(rgb_color);
328 for(i=0;i<TIMER_LAST;i++) {
329 if(timeStat::logEvent((timer_e)i)) {
330 timerColorInfo[i] = globalColorArray[globalColorIndex];
331 globalColorIndex = (globalColorIndex+1)%numGlobalColors;
332 }
333 }
334 return;
335}
336
337void kmp_stats_output_module::printStats(FILE *statsOut, statistic const * theStats, bool areTimers)
338{
339 if (areTimers)
340 {
341 // Check if we have useful timers, since we don't print zero value timers we need to avoid
342 // printing a header and then no data.
343 bool haveTimers = false;
344 for (int s = 0; s<TIMER_LAST; s++)
345 {
346 if (theStats[s].getCount() != 0)
347 {
348 haveTimers = true;
349 break;
350 }
351 }
352 if (!haveTimers)
353 return;
354 }
355
356 // Print
357 const char * title = areTimers ? "Timer, SampleCount," : "Counter, ThreadCount,";
358 fprintf (statsOut, "%s Min, Mean, Max, Total, SD\n", title);
359 if (areTimers) {
360 for (int s = 0; s<TIMER_LAST; s++) {
361 statistic const * stat = &theStats[s];
362 if (stat->getCount() != 0) {
363 char tag = timeStat::noUnits(timer_e(s)) ? ' ' : 'T';
364 fprintf (statsOut, "%-25s, %s\n", timeStat::name(timer_e(s)), stat->format(tag, true).c_str());
365 }
366 }
367 } else { // Counters
368 for (int s = 0; s<COUNTER_LAST; s++) {
369 statistic const * stat = &theStats[s];
370 fprintf (statsOut, "%-25s, %s\n", counter::name(counter_e(s)), stat->format(' ', true).c_str());
371 }
372 }
373}
374
375void kmp_stats_output_module::printCounters(FILE * statsOut, counter const * theCounters)
376{
377 // We print all the counters even if they are zero.
378 // That makes it easier to slice them into a spreadsheet if you need to.
379 fprintf (statsOut, "\nCounter, Count\n");
380 for (int c = 0; c<COUNTER_LAST; c++) {
381 counter const * stat = &theCounters[c];
382 fprintf (statsOut, "%-25s, %s\n", counter::name(counter_e(c)), formatSI(stat->getValue(), 9, ' ').c_str());
383 }
384}
385
386void kmp_stats_output_module::printEvents(FILE* eventsOut, kmp_stats_event_vector* theEvents, int gtid) {
387 // sort by start time before printing
388 theEvents->sort();
389 for (int i = 0; i < theEvents->size(); i++) {
390 kmp_stats_event ev = theEvents->at(i);
391 rgb_color color = getEventColor(ev.getTimerName());
392 fprintf(eventsOut, "%d %lu %lu %1.1f rgb(%1.1f,%1.1f,%1.1f) %s\n",
393 gtid,
394 ev.getStart(),
395 ev.getStop(),
396 1.2 - (ev.getNestLevel() * 0.2),
397 color.r, color.g, color.b,
398 timeStat::name(ev.getTimerName())
399 );
400 }
401 return;
402}
403
404void kmp_stats_output_module::windupExplicitTimers()
405{
406 // Wind up any explicit timers. We assume that it's fair at this point to just walk all the explcit timers in all threads
407 // and say "it's over".
408 // If the timer wasn't running, this won't record anything anyway.
409 kmp_stats_list::iterator it;
410 for(it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) {
411 for (int timer=0; timer<EXPLICIT_TIMER_LAST; timer++) {
412 (*it)->getExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer);
413 }
414 }
415}
416
417void kmp_stats_output_module::printPloticusFile() {
418 int i;
419 int size = __kmp_stats_list.size();
420 FILE* plotOut = fopen(plotFileName, "w+");
421
422 fprintf(plotOut, "#proc page\n"
423 " pagesize: 15 10\n"
424 " scale: 1.0\n\n");
425
426 fprintf(plotOut, "#proc getdata\n"
427 " file: %s\n\n",
428 eventsFileName);
429
430 fprintf(plotOut, "#proc areadef\n"
431 " title: OpenMP Sampling Timeline\n"
432 " titledetails: align=center size=16\n"
433 " rectangle: 1 1 13 9\n"
434 " xautorange: datafield=2,3\n"
435 " yautorange: -1 %d\n\n",
436 size);
437
438 fprintf(plotOut, "#proc xaxis\n"
439 " stubs: inc\n"
440 " stubdetails: size=12\n"
441 " label: Time (ticks)\n"
442 " labeldetails: size=14\n\n");
443
444 fprintf(plotOut, "#proc yaxis\n"
445 " stubs: inc 1\n"
446 " stubrange: 0 %d\n"
447 " stubdetails: size=12\n"
448 " label: Thread #\n"
449 " labeldetails: size=14\n\n",
450 size-1);
451
452 fprintf(plotOut, "#proc bars\n"
453 " exactcolorfield: 5\n"
454 " axis: x\n"
455 " locfield: 1\n"
456 " segmentfields: 2 3\n"
457 " barwidthfield: 4\n\n");
458
459 // create legend entries corresponding to the timer color
460 for(i=0;i<TIMER_LAST;i++) {
461 if(timeStat::logEvent((timer_e)i)) {
462 rgb_color c = getEventColor((timer_e)i);
463 fprintf(plotOut, "#proc legendentry\n"
464 " sampletype: color\n"
465 " label: %s\n"
466 " details: rgb(%1.1f,%1.1f,%1.1f)\n\n",
467 timeStat::name((timer_e)i),
468 c.r, c.g, c.b);
469
470 }
471 }
472
473 fprintf(plotOut, "#proc legend\n"
474 " format: down\n"
475 " location: max max\n\n");
476 fclose(plotOut);
477 return;
478}
479
480void kmp_stats_output_module::outputStats(const char* heading)
481{
482 statistic allStats[TIMER_LAST];
483 statistic allCounters[COUNTER_LAST];
484
485 // stop all the explicit timers for all threads
486 windupExplicitTimers();
487
488 FILE * eventsOut;
489 FILE * statsOut = outputFileName ? fopen (outputFileName, "a+") : stderr;
490
491 if (eventPrintingEnabled()) {
492 eventsOut = fopen(eventsFileName, "w+");
493 }
494
495 if (!statsOut)
496 statsOut = stderr;
497
498 fprintf(statsOut, "%s\n",heading);
499 // Accumulate across threads.
500 kmp_stats_list::iterator it;
501 for (it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) {
502 int t = (*it)->getGtid();
503 // Output per thread stats if requested.
504 if (perThreadPrintingEnabled()) {
505 fprintf (statsOut, "Thread %d\n", t);
506 printStats(statsOut, (*it)->getTimers(), true);
507 printCounters(statsOut, (*it)->getCounters());
508 fprintf(statsOut,"\n");
509 }
510 // Output per thread events if requested.
511 if (eventPrintingEnabled()) {
512 kmp_stats_event_vector events = (*it)->getEventVector();
513 printEvents(eventsOut, &events, t);
514 }
515
516 for (int s = 0; s<TIMER_LAST; s++) {
517 // See if we should ignore this timer when aggregating
518 if ((timeStat::masterOnly(timer_e(s)) && (t != 0)) || // Timer is only valid on the master and this thread is a worker
519 (timeStat::workerOnly(timer_e(s)) && (t == 0)) || // Timer is only valid on a worker and this thread is the master
520 timeStat::synthesized(timer_e(s)) // It's a synthesized stat, so there's no raw data for it.
521 )
522 {
523 continue;
524 }
525
526 statistic * threadStat = (*it)->getTimer(timer_e(s));
527 allStats[s] += *threadStat;
528 }
529
530 // Special handling for synthesized statistics.
531 // These just have to be coded specially here for now.
Jonathan Peyton45be4502015-08-11 21:36:41 +0000532 // At present we only have a few:
533 // The total parallel work done in each thread.
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000534 // The variance here makes it easy to see load imbalance over the whole program (though, of course,
535 // it's possible to have a code with awful load balance in every parallel region but perfect load
536 // balance oever the whole program.)
Jonathan Peyton45be4502015-08-11 21:36:41 +0000537 // The time spent in barriers in each thread.
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000538 allStats[TIMER_Total_work].addSample ((*it)->getTimer(TIMER_OMP_work)->getTotal());
539
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000540 // Time in explicit barriers.
541 allStats[TIMER_Total_barrier].addSample ((*it)->getTimer(TIMER_OMP_barrier)->getTotal());
542
543 for (int c = 0; c<COUNTER_LAST; c++) {
544 if (counter::masterOnly(counter_e(c)) && t != 0)
545 continue;
546 allCounters[c].addSample ((*it)->getCounter(counter_e(c))->getValue());
547 }
548 }
549
550 if (eventPrintingEnabled()) {
551 printPloticusFile();
552 fclose(eventsOut);
553 }
554
555 fprintf (statsOut, "Aggregate for all threads\n");
556 printStats (statsOut, &allStats[0], true);
557 fprintf (statsOut, "\n");
558 printStats (statsOut, &allCounters[0], false);
559
560 if (statsOut != stderr)
561 fclose(statsOut);
562
563}
564
565/* ************************************************** */
566/* ************* exported C functions ************** */
567
568// no name mangling for these functions, we want the c files to be able to get at these functions
569extern "C" {
570
571void __kmp_reset_stats()
572{
573 kmp_stats_list::iterator it;
574 for(it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) {
575 timeStat * timers = (*it)->getTimers();
576 counter * counters = (*it)->getCounters();
577 explicitTimer * eTimers = (*it)->getExplicitTimers();
578
579 for (int t = 0; t<TIMER_LAST; t++)
580 timers[t].reset();
581
582 for (int c = 0; c<COUNTER_LAST; c++)
583 counters[c].reset();
584
585 for (int t=0; t<EXPLICIT_TIMER_LAST; t++)
586 eTimers[t].reset();
587
588 // reset the event vector so all previous events are "erased"
589 (*it)->resetEventVector();
590
591 // May need to restart the explicit timers in thread zero?
592 }
593 KMP_START_EXPLICIT_TIMER(OMP_serial);
594 KMP_START_EXPLICIT_TIMER(OMP_start_end);
595}
596
597// This function will reset all stats and stop all threads' explicit timers if they haven't been stopped already.
598void __kmp_output_stats(const char * heading)
599{
600 __kmp_stats_global_output.outputStats(heading);
601 __kmp_reset_stats();
602}
603
604void __kmp_accumulate_stats_at_exit(void)
605{
606 // Only do this once.
607 if (KMP_XCHG_FIXED32(&statsPrinted, 1) != 0)
608 return;
609
610 __kmp_output_stats("Statistics on exit");
611 return;
612}
613
614void __kmp_stats_init(void)
615{
616 return;
617}
618
619} // extern "C"
620