Jonathan Peyton | b6b79ac | 2018-03-26 18:44:48 +0000 | [diff] [blame] | 1 | #!/usr/bin/python |
| 2 | |
| 3 | import pandas as pd |
| 4 | import numpy as np |
| 5 | import re |
| 6 | import sys |
| 7 | import os |
| 8 | import argparse |
| 9 | import matplotlib |
| 10 | from matplotlib import pyplot as plt |
| 11 | from matplotlib.projections.polar import PolarAxes |
| 12 | from matplotlib.projections import register_projection |
| 13 | |
| 14 | """ |
| 15 | Read the stats file produced by the OpenMP runtime |
| 16 | and produce a processed summary |
| 17 | |
| 18 | The radar_factory original code was taken from |
| 19 | matplotlib.org/examples/api/radar_chart.html |
| 20 | We added support to handle negative values for radar charts |
| 21 | """ |
| 22 | |
| 23 | def radar_factory(num_vars, frame='circle'): |
| 24 | """Create a radar chart with num_vars axes.""" |
| 25 | # calculate evenly-spaced axis angles |
| 26 | theta = 2*np.pi * np.linspace(0, 1-1./num_vars, num_vars) |
| 27 | # rotate theta such that the first axis is at the top |
| 28 | #theta += np.pi/2 |
| 29 | |
| 30 | def draw_poly_frame(self, x0, y0, r): |
| 31 | # TODO: use transforms to convert (x, y) to (r, theta) |
| 32 | verts = [(r*np.cos(t) + x0, r*np.sin(t) + y0) for t in theta] |
| 33 | return plt.Polygon(verts, closed=True, edgecolor='k') |
| 34 | |
| 35 | def draw_circle_frame(self, x0, y0, r): |
| 36 | return plt.Circle((x0, y0), r) |
| 37 | |
| 38 | frame_dict = {'polygon': draw_poly_frame, 'circle': draw_circle_frame} |
| 39 | if frame not in frame_dict: |
| 40 | raise ValueError, 'unknown value for `frame`: %s' % frame |
| 41 | |
| 42 | class RadarAxes(PolarAxes): |
| 43 | """ |
| 44 | Class for creating a radar chart (a.k.a. a spider or star chart) |
| 45 | |
| 46 | http://en.wikipedia.org/wiki/Radar_chart |
| 47 | """ |
| 48 | name = 'radar' |
| 49 | # use 1 line segment to connect specified points |
| 50 | RESOLUTION = 1 |
| 51 | # define draw_frame method |
| 52 | draw_frame = frame_dict[frame] |
| 53 | |
| 54 | def fill(self, *args, **kwargs): |
| 55 | """Override fill so that line is closed by default""" |
| 56 | closed = kwargs.pop('closed', True) |
| 57 | return super(RadarAxes, self).fill(closed=closed, *args, **kwargs) |
| 58 | |
| 59 | def plot(self, *args, **kwargs): |
| 60 | """Override plot so that line is closed by default""" |
| 61 | lines = super(RadarAxes, self).plot(*args, **kwargs) |
| 62 | #for line in lines: |
| 63 | # self._close_line(line) |
| 64 | |
| 65 | def set_varlabels(self, labels): |
| 66 | self.set_thetagrids(theta * 180/np.pi, labels,fontsize=14) |
| 67 | |
| 68 | def _gen_axes_patch(self): |
| 69 | x0, y0 = (0.5, 0.5) |
| 70 | r = 0.5 |
| 71 | return self.draw_frame(x0, y0, r) |
| 72 | |
| 73 | register_projection(RadarAxes) |
| 74 | return theta |
| 75 | |
| 76 | # Code to read the raw stats |
| 77 | def extractSI(s): |
| 78 | """Convert a measurement with a range suffix into a suitably scaled value""" |
| 79 | du = s.split() |
| 80 | num = float(du[0]) |
| 81 | units = du[1] if len(du) == 2 else ' ' |
| 82 | # http://physics.nist.gov/cuu/Units/prefixes.html |
| 83 | factor = {'Y': 1e24, |
| 84 | 'Z': 1e21, |
| 85 | 'E': 1e18, |
| 86 | 'P': 1e15, |
| 87 | 'T': 1e12, |
| 88 | 'G': 1e9, |
| 89 | 'M': 1e6, |
| 90 | 'k': 1e3, |
| 91 | ' ': 1 , |
| 92 | 'm': -1e3, # Yes, I do mean that, see below for the explanation. |
| 93 | 'u': -1e6, |
| 94 | 'n': -1e9, |
| 95 | 'p': -1e12, |
| 96 | 'f': -1e15, |
| 97 | 'a': -1e18, |
| 98 | 'z': -1e21, |
| 99 | 'y': -1e24}[units[0]] |
| 100 | # Minor trickery here is an attempt to preserve accuracy by using a single |
| 101 | # divide, rather than multiplying by 1/x, which introduces two roundings |
| 102 | # since 1/10 is not representable perfectly in IEEE floating point. (Not |
| 103 | # that this really matters, other than for cleanliness, since we're likely |
| 104 | # reading numbers with at most five decimal digits of precision). |
| 105 | return num*factor if factor > 0 else num/-factor |
| 106 | |
| 107 | def readData(f): |
| 108 | line = f.readline() |
| 109 | fieldnames = [x.strip() for x in line.split(',')] |
| 110 | line = f.readline().strip() |
| 111 | data = [] |
| 112 | while line != "": |
| 113 | if line[0] != '#': |
| 114 | fields = line.split(',') |
| 115 | data.append ((fields[0].strip(), [extractSI(v) for v in fields[1:]])) |
| 116 | line = f.readline().strip() |
| 117 | # Man, working out this next incantation out was non-trivial! |
| 118 | # They really want you to be snarfing data in csv or some other |
| 119 | # format they understand! |
| 120 | res = pd.DataFrame.from_items(data, columns=fieldnames[1:], orient='index') |
| 121 | return res |
| 122 | |
| 123 | def readTimers(f): |
| 124 | """Skip lines with leading #""" |
| 125 | line = f.readline() |
| 126 | while line[0] == '#': |
| 127 | line = f.readline() |
| 128 | line = line.strip() |
| 129 | if line == "Statistics on exit\n" or "Aggregate for all threads\n": |
| 130 | line = f.readline() |
| 131 | return readData(f) |
| 132 | |
| 133 | def readCounters(f): |
| 134 | """This can be just the same!""" |
| 135 | return readData(f) |
| 136 | |
| 137 | def readFile(fname): |
| 138 | """Read the statistics from the file. Return a dict with keys "timers", "counters" """ |
| 139 | res = {} |
| 140 | try: |
| 141 | with open(fname) as f: |
| 142 | res["timers"] = readTimers(f) |
| 143 | res["counters"] = readCounters(f) |
| 144 | return res |
| 145 | except (OSError, IOError): |
| 146 | print "Cannot open " + fname |
| 147 | return None |
| 148 | |
| 149 | def usefulValues(l): |
| 150 | """I.e. values which are neither null nor zero""" |
| 151 | return [p and q for (p,q) in zip (pd.notnull(l), l != 0.0)] |
| 152 | |
| 153 | def uselessValues(l): |
| 154 | """I.e. values which are null or zero""" |
| 155 | return [not p for p in usefulValues(l)] |
| 156 | |
| 157 | interestingStats = ("counters", "timers") |
| 158 | statProperties = {"counters" : ("Count", "Counter Statistics"), |
| 159 | "timers" : ("Time (ticks)", "Timer Statistics") |
| 160 | } |
| 161 | |
| 162 | def drawChart(data, kind, filebase): |
| 163 | """Draw a summary bar chart for the requested data frame into the specified file""" |
| 164 | data["Mean"].plot(kind="bar", logy=True, grid=True, colormap="GnBu", |
| 165 | yerr=data["SD"], ecolor="black") |
| 166 | plt.xlabel("OMP Constructs") |
| 167 | plt.ylabel(statProperties[kind][0]) |
| 168 | plt.title (statProperties[kind][1]) |
| 169 | plt.tight_layout() |
| 170 | plt.savefig(filebase+"_"+kind) |
| 171 | |
| 172 | def normalizeValues(data, countField, factor): |
| 173 | """Normalize values into a rate by dividing them all by the given factor""" |
| 174 | data[[k for k in data.keys() if k != countField]] /= factor |
| 175 | |
| 176 | |
| 177 | def setRadarFigure(titles): |
| 178 | """Set the attributes for the radar plots""" |
| 179 | fig = plt.figure(figsize=(9,9)) |
| 180 | rect = [0.1, 0.1, 0.8, 0.8] |
| 181 | labels = [0.2, 0.4, 0.6, 0.8, 1, 2, 3, 4, 5, 10] |
| 182 | matplotlib.rcParams.update({'font.size':13}) |
| 183 | theta = radar_factory(len(titles)) |
| 184 | ax = fig.add_axes(rect, projection='radar') |
| 185 | ax.set_rgrids(labels) |
| 186 | ax.set_varlabels(titles) |
| 187 | ax.text(theta[2], 1, "Linear->Log", horizontalalignment='center', color='green', fontsize=18) |
| 188 | return {'ax':ax, 'theta':theta} |
| 189 | |
| 190 | |
| 191 | def drawRadarChart(data, kind, filebase, params, color): |
| 192 | """Draw the radar plots""" |
| 193 | tmp_lin = data * 0 |
| 194 | tmp_log = data * 0 |
| 195 | for key in data.keys(): |
| 196 | if data[key] >= 1: |
| 197 | tmp_log[key] = np.log10(data[key]) |
| 198 | else: |
| 199 | tmp_lin[key] = (data[key]) |
| 200 | params['ax'].plot(params['theta'], tmp_log, color='b', label=filebase+"_"+kind+"_log") |
| 201 | params['ax'].plot(params['theta'], tmp_lin, color='r', label=filebase+"_"+kind+"_linear") |
| 202 | params['ax'].legend(loc='best', bbox_to_anchor=(1.4,1.2)) |
| 203 | params['ax'].set_rlim((0, np.ceil(max(tmp_log)))) |
| 204 | |
| 205 | def multiAppBarChartSettings(ax, plt, index, width, n, tmp, s): |
| 206 | ax.set_yscale('log') |
| 207 | ax.legend() |
| 208 | ax.set_xticks(index + width * n / 2) |
| 209 | ax.set_xticklabels(tmp[s]['Total'].keys(), rotation=50, horizontalalignment='right') |
| 210 | plt.xlabel("OMP Constructs") |
| 211 | plt.ylabel(statProperties[s][0]) |
| 212 | plt.title(statProperties[s][1]) |
| 213 | plt.tight_layout() |
| 214 | |
| 215 | def derivedTimerStats(data): |
| 216 | stats = {} |
| 217 | for key in data.keys(): |
| 218 | if key == 'OMP_worker_thread_life': |
| 219 | totalRuntime = data['OMP_worker_thread_life'] |
| 220 | elif key in ('FOR_static_iterations', 'OMP_PARALLEL_args', |
| 221 | 'OMP_set_numthreads', 'FOR_dynamic_iterations'): |
| 222 | break |
| 223 | else: |
| 224 | stats[key] = 100 * data[key] / totalRuntime |
| 225 | return stats |
| 226 | |
| 227 | def compPie(data): |
| 228 | compKeys = {} |
| 229 | nonCompKeys = {} |
| 230 | for key in data.keys(): |
| 231 | if key in ('OMP_critical', 'OMP_single', 'OMP_serial', |
| 232 | 'OMP_parallel', 'OMP_master', 'OMP_task_immediate', |
| 233 | 'OMP_task_taskwait', 'OMP_task_taskyield', 'OMP_task_taskgroup', |
| 234 | 'OMP_task_join_bar', 'OMP_task_plain_bar', 'OMP_task_taskyield'): |
| 235 | compKeys[key] = data[key] |
| 236 | else: |
| 237 | nonCompKeys[key] = data[key] |
| 238 | print "comp keys:", compKeys, "\n\n non comp keys:", nonCompKeys |
| 239 | return [compKeys, nonCompKeys] |
| 240 | |
| 241 | def drawMainPie(data, filebase, colors): |
| 242 | sizes = [sum(data[0].values()), sum(data[1].values())] |
| 243 | explode = [0,0] |
| 244 | labels = ["Compute - " + "%.2f" % sizes[0], "Non Compute - " + "%.2f" % sizes[1]] |
| 245 | patches = plt.pie(sizes, explode, colors=colors, startangle=90) |
| 246 | plt.title("Time Division") |
| 247 | plt.axis('equal') |
| 248 | plt.legend(patches[0], labels, loc='best', bbox_to_anchor=(-0.1,1), fontsize=16) |
| 249 | plt.savefig(filebase+"_main_pie", bbox_inches='tight') |
| 250 | |
| 251 | def drawSubPie(data, tag, filebase, colors): |
| 252 | explode = [] |
| 253 | labels = data.keys() |
| 254 | sizes = data.values() |
| 255 | total = sum(sizes) |
| 256 | percent = [] |
| 257 | for i in range(len(sizes)): |
| 258 | explode.append(0) |
| 259 | percent.append(100 * sizes[i] / total) |
| 260 | labels[i] = labels[i] + " - %.2f" % percent[i] |
| 261 | patches = plt.pie(sizes, explode=explode, colors=colors, startangle=90) |
| 262 | plt.title(tag+"(Percentage of Total:"+" %.2f" % (sum(data.values()))+")") |
| 263 | plt.tight_layout() |
| 264 | plt.axis('equal') |
| 265 | plt.legend(patches[0], labels, loc='best', bbox_to_anchor=(-0.1,1), fontsize=16) |
| 266 | plt.savefig(filebase+"_"+tag, bbox_inches='tight') |
| 267 | |
| 268 | def main(): |
| 269 | parser = argparse.ArgumentParser(description='''This script takes a list |
| 270 | of files containing each of which contain output from a stats-gathering |
| 271 | enabled OpenMP runtime library. Each stats file is read, parsed, and |
| 272 | used to produce a summary of the statistics''') |
| 273 | parser.add_argument('files', nargs='+', |
| 274 | help='files to parse which contain stats-gathering output') |
| 275 | command_args = parser.parse_args() |
| 276 | colors = ['orange', 'b', 'r', 'yellowgreen', 'lightsage', 'lightpink', |
| 277 | 'green', 'purple', 'yellow', 'cyan', 'mediumturquoise', |
| 278 | 'olive'] |
| 279 | stats = {} |
| 280 | matplotlib.rcParams.update({'font.size':22}) |
| 281 | for s in interestingStats: |
| 282 | fig, ax = plt.subplots() |
| 283 | width = 0.45 |
| 284 | n = 0 |
| 285 | index = 0 |
| 286 | |
| 287 | for f in command_args.files: |
| 288 | filebase = os.path.splitext(f)[0] |
| 289 | tmp = readFile(f) |
| 290 | data = tmp[s]['Total'] |
| 291 | """preventing repetition by removing rows similar to Total_OMP_work |
| 292 | as Total_OMP_work['Total'] is same as OMP_work['Total']""" |
| 293 | if s == 'counters': |
| 294 | elapsedTime = tmp["timers"]["Mean"]["OMP_worker_thread_life"] |
| 295 | normalizeValues(tmp["counters"], "SampleCount", |
| 296 | elapsedTime / 1.e9) |
| 297 | """Plotting radar charts""" |
| 298 | params = setRadarFigure(data.keys()) |
| 299 | chartType = "radar" |
| 300 | drawRadarChart(data, s, filebase, params, colors[n]) |
| 301 | """radar Charts finish here""" |
| 302 | plt.savefig(filebase+"_"+s+"_"+chartType, bbox_inches='tight') |
| 303 | elif s == 'timers': |
| 304 | print "overheads in "+filebase |
| 305 | numThreads = tmp[s]['SampleCount']['Total_OMP_parallel'] |
| 306 | for key in data.keys(): |
| 307 | if key[0:5] == 'Total': |
| 308 | del data[key] |
| 309 | stats[filebase] = derivedTimerStats(data) |
| 310 | dataSubSet = compPie(stats[filebase]) |
| 311 | drawMainPie(dataSubSet, filebase, colors) |
| 312 | plt.figure(0) |
| 313 | drawSubPie(dataSubSet[0], "Computational Time", filebase, colors) |
| 314 | plt.figure(1) |
| 315 | drawSubPie(dataSubSet[1], "Non Computational Time", filebase, colors) |
| 316 | with open('derivedStats_{}.csv'.format(filebase), 'w') as f: |
| 317 | f.write('================={}====================\n'.format(filebase)) |
| 318 | f.write(pd.DataFrame(stats[filebase].items()).to_csv()+'\n') |
| 319 | n += 1 |
| 320 | plt.close() |
| 321 | |
| 322 | if __name__ == "__main__": |
| 323 | main() |