Leon Clarke | f7060e2 | 2010-06-03 12:02:55 +0100 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | # |
| 3 | # Copyright 2010 the V8 project authors. All rights reserved. |
| 4 | # Redistribution and use in source and binary forms, with or without |
| 5 | # modification, are permitted provided that the following conditions are |
| 6 | # met: |
| 7 | # |
| 8 | # * Redistributions of source code must retain the above copyright |
| 9 | # notice, this list of conditions and the following disclaimer. |
| 10 | # * Redistributions in binary form must reproduce the above |
| 11 | # copyright notice, this list of conditions and the following |
| 12 | # disclaimer in the documentation and/or other materials provided |
| 13 | # with the distribution. |
| 14 | # * Neither the name of Google Inc. nor the names of its |
| 15 | # contributors may be used to endorse or promote products derived |
| 16 | # from this software without specific prior written permission. |
| 17 | # |
| 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 19 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 20 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 21 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 22 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 23 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 24 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 25 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 26 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 27 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 29 | # |
| 30 | |
| 31 | # |
| 32 | # This is an utility for plotting charts based on GC traces produced by V8 when |
| 33 | # run with flags --trace-gc --trace-gc-nvp. Relies on gnuplot for actual |
| 34 | # plotting. |
| 35 | # |
| 36 | # Usage: gc-nvp-trace-processor.py <GC-trace-filename> |
| 37 | # |
| 38 | |
| 39 | |
| 40 | from __future__ import with_statement |
Iain Merrick | 7568138 | 2010-08-19 15:07:18 +0100 | [diff] [blame] | 41 | import sys, types, re, subprocess, math |
Leon Clarke | f7060e2 | 2010-06-03 12:02:55 +0100 | [diff] [blame] | 42 | |
| 43 | def flatten(l): |
| 44 | flat = [] |
| 45 | for i in l: flat.extend(i) |
| 46 | return flat |
| 47 | |
| 48 | def split_nvp(s): |
| 49 | t = {} |
Kristian Monsen | 50ef84f | 2010-07-29 15:18:00 +0100 | [diff] [blame] | 50 | for (name, value) in re.findall(r"(\w+)=([-\w]+)", s): |
| 51 | try: |
| 52 | t[name] = int(value) |
| 53 | except ValueError: |
| 54 | t[name] = value |
| 55 | |
Leon Clarke | f7060e2 | 2010-06-03 12:02:55 +0100 | [diff] [blame] | 56 | return t |
| 57 | |
| 58 | def parse_gc_trace(input): |
| 59 | trace = [] |
| 60 | with open(input) as f: |
| 61 | for line in f: |
| 62 | info = split_nvp(line) |
| 63 | if info and 'pause' in info and info['pause'] > 0: |
| 64 | info['i'] = len(trace) |
| 65 | trace.append(info) |
| 66 | return trace |
| 67 | |
| 68 | def extract_field_names(script): |
| 69 | fields = { 'data': true, 'in': true } |
| 70 | |
| 71 | for m in re.finditer(r"$(\w+)", script): |
| 72 | field_name = m.group(1) |
| 73 | if field_name not in fields: |
| 74 | fields[field] = field_count |
| 75 | field_count = field_count + 1 |
| 76 | |
| 77 | return fields |
| 78 | |
| 79 | def gnuplot(script): |
| 80 | gnuplot = subprocess.Popen(["gnuplot"], stdin=subprocess.PIPE) |
| 81 | gnuplot.stdin.write(script) |
| 82 | gnuplot.stdin.close() |
| 83 | gnuplot.wait() |
| 84 | |
| 85 | x1y1 = 'x1y1' |
| 86 | x1y2 = 'x1y2' |
| 87 | x2y1 = 'x2y1' |
| 88 | x2y2 = 'x2y2' |
| 89 | |
| 90 | class Item(object): |
| 91 | def __init__(self, title, field, axis = x1y1, **keywords): |
| 92 | self.title = title |
| 93 | self.axis = axis |
| 94 | self.props = keywords |
| 95 | if type(field) is types.ListType: |
| 96 | self.field = field |
| 97 | else: |
| 98 | self.field = [field] |
| 99 | |
| 100 | def fieldrefs(self): |
| 101 | return self.field |
| 102 | |
| 103 | def to_gnuplot(self, context): |
| 104 | args = ['"%s"' % context.datafile, |
| 105 | 'using %s' % context.format_fieldref(self.field), |
| 106 | 'title "%s"' % self.title, |
| 107 | 'axis %s' % self.axis] |
| 108 | if 'style' in self.props: |
| 109 | args.append('with %s' % self.props['style']) |
| 110 | if 'lc' in self.props: |
| 111 | args.append('lc rgb "%s"' % self.props['lc']) |
| 112 | if 'fs' in self.props: |
| 113 | args.append('fs %s' % self.props['fs']) |
| 114 | return ' '.join(args) |
| 115 | |
| 116 | class Plot(object): |
| 117 | def __init__(self, *items): |
| 118 | self.items = items |
| 119 | |
| 120 | def fieldrefs(self): |
| 121 | return flatten([item.fieldrefs() for item in self.items]) |
| 122 | |
| 123 | def to_gnuplot(self, ctx): |
| 124 | return 'plot ' + ', '.join([item.to_gnuplot(ctx) for item in self.items]) |
| 125 | |
| 126 | class Set(object): |
| 127 | def __init__(self, value): |
| 128 | self.value = value |
| 129 | |
| 130 | def to_gnuplot(self, ctx): |
| 131 | return 'set ' + self.value |
| 132 | |
| 133 | def fieldrefs(self): |
| 134 | return [] |
| 135 | |
| 136 | class Context(object): |
| 137 | def __init__(self, datafile, field_to_index): |
| 138 | self.datafile = datafile |
| 139 | self.field_to_index = field_to_index |
| 140 | |
| 141 | def format_fieldref(self, fieldref): |
| 142 | return ':'.join([str(self.field_to_index[field]) for field in fieldref]) |
| 143 | |
| 144 | def collect_fields(plot): |
| 145 | field_to_index = {} |
| 146 | fields = [] |
| 147 | |
| 148 | def add_field(field): |
| 149 | if field not in field_to_index: |
| 150 | fields.append(field) |
| 151 | field_to_index[field] = len(fields) |
| 152 | |
| 153 | for field in flatten([item.fieldrefs() for item in plot]): |
| 154 | add_field(field) |
| 155 | |
| 156 | return (fields, field_to_index) |
| 157 | |
| 158 | def is_y2_used(plot): |
| 159 | for subplot in plot: |
| 160 | if isinstance(subplot, Plot): |
| 161 | for item in subplot.items: |
| 162 | if item.axis == x1y2 or item.axis == x2y2: |
| 163 | return True |
| 164 | return False |
| 165 | |
| 166 | def get_field(trace_line, field): |
| 167 | t = type(field) |
| 168 | if t is types.StringType: |
| 169 | return trace_line[field] |
| 170 | elif t is types.FunctionType: |
| 171 | return field(trace_line) |
| 172 | |
| 173 | def generate_datafile(datafile_name, trace, fields): |
| 174 | with open(datafile_name, 'w') as datafile: |
| 175 | for line in trace: |
| 176 | data_line = [str(get_field(line, field)) for field in fields] |
| 177 | datafile.write('\t'.join(data_line)) |
| 178 | datafile.write('\n') |
| 179 | |
| 180 | def generate_script_and_datafile(plot, trace, datafile, output): |
| 181 | (fields, field_to_index) = collect_fields(plot) |
| 182 | generate_datafile(datafile, trace, fields) |
| 183 | script = [ |
| 184 | 'set terminal png', |
| 185 | 'set output "%s"' % output, |
| 186 | 'set autoscale', |
| 187 | 'set ytics nomirror', |
| 188 | 'set xtics nomirror', |
| 189 | 'set key below' |
| 190 | ] |
| 191 | |
| 192 | if is_y2_used(plot): |
| 193 | script.append('set autoscale y2') |
| 194 | script.append('set y2tics') |
| 195 | |
| 196 | context = Context(datafile, field_to_index) |
| 197 | |
| 198 | for item in plot: |
| 199 | script.append(item.to_gnuplot(context)) |
| 200 | |
| 201 | return '\n'.join(script) |
| 202 | |
| 203 | def plot_all(plots, trace, prefix): |
| 204 | charts = [] |
| 205 | |
| 206 | for plot in plots: |
| 207 | outfilename = "%s_%d.png" % (prefix, len(charts)) |
| 208 | charts.append(outfilename) |
| 209 | script = generate_script_and_datafile(plot, trace, '~datafile', outfilename) |
| 210 | print 'Plotting %s...' % outfilename |
| 211 | gnuplot(script) |
| 212 | |
| 213 | return charts |
| 214 | |
| 215 | def reclaimed_bytes(row): |
| 216 | return row['total_size_before'] - row['total_size_after'] |
| 217 | |
Kristian Monsen | 50ef84f | 2010-07-29 15:18:00 +0100 | [diff] [blame] | 218 | def other_scope(r): |
Ben Murdoch | 257744e | 2011-11-30 15:57:28 +0000 | [diff] [blame] | 219 | if r['gc'] == 's': |
| 220 | # there is no 'other' scope for scavenging collections. |
| 221 | return 0 |
Ben Murdoch | 3ef787d | 2012-04-12 10:51:47 +0100 | [diff] [blame^] | 222 | return r['pause'] - r['mark'] - r['sweep'] - r['external'] |
Ben Murdoch | 257744e | 2011-11-30 15:57:28 +0000 | [diff] [blame] | 223 | |
| 224 | def scavenge_scope(r): |
| 225 | if r['gc'] == 's': |
| 226 | return r['pause'] - r['external'] |
| 227 | return 0 |
Kristian Monsen | 50ef84f | 2010-07-29 15:18:00 +0100 | [diff] [blame] | 228 | |
Ben Murdoch | 3ef787d | 2012-04-12 10:51:47 +0100 | [diff] [blame^] | 229 | |
| 230 | def real_mutator(r): |
| 231 | return r['mutator'] - r['stepstook'] |
| 232 | |
Leon Clarke | f7060e2 | 2010-06-03 12:02:55 +0100 | [diff] [blame] | 233 | plots = [ |
| 234 | [ |
| 235 | Set('style fill solid 0.5 noborder'), |
| 236 | Set('style histogram rowstacked'), |
| 237 | Set('style data histograms'), |
Ben Murdoch | 257744e | 2011-11-30 15:57:28 +0000 | [diff] [blame] | 238 | Plot(Item('Scavenge', scavenge_scope, lc = 'green'), |
| 239 | Item('Marking', 'mark', lc = 'purple'), |
Leon Clarke | f7060e2 | 2010-06-03 12:02:55 +0100 | [diff] [blame] | 240 | Item('Sweep', 'sweep', lc = 'blue'), |
Ben Murdoch | 257744e | 2011-11-30 15:57:28 +0000 | [diff] [blame] | 241 | Item('External', 'external', lc = '#489D43'), |
Ben Murdoch | 3ef787d | 2012-04-12 10:51:47 +0100 | [diff] [blame^] | 242 | Item('Other', other_scope, lc = 'grey'), |
| 243 | Item('IGC Steps', 'stepstook', lc = '#FF6347')) |
| 244 | ], |
| 245 | [ |
| 246 | Set('style fill solid 0.5 noborder'), |
| 247 | Set('style histogram rowstacked'), |
| 248 | Set('style data histograms'), |
| 249 | Plot(Item('Scavenge', scavenge_scope, lc = 'green'), |
| 250 | Item('Marking', 'mark', lc = 'purple'), |
| 251 | Item('Sweep', 'sweep', lc = 'blue'), |
| 252 | Item('External', 'external', lc = '#489D43'), |
| 253 | Item('Other', other_scope, lc = '#ADD8E6'), |
| 254 | Item('External', 'external', lc = '#D3D3D3')) |
| 255 | ], |
| 256 | |
| 257 | [ |
| 258 | Plot(Item('Mutator', real_mutator, lc = 'black', style = 'lines')) |
Leon Clarke | f7060e2 | 2010-06-03 12:02:55 +0100 | [diff] [blame] | 259 | ], |
| 260 | [ |
| 261 | Set('style histogram rowstacked'), |
| 262 | Set('style data histograms'), |
| 263 | Plot(Item('Heap Size (before GC)', 'total_size_before', x1y2, |
| 264 | fs = 'solid 0.4 noborder', |
| 265 | lc = 'green'), |
| 266 | Item('Total holes (after GC)', 'holes_size_before', x1y2, |
| 267 | fs = 'solid 0.4 noborder', |
| 268 | lc = 'red'), |
| 269 | Item('GC Time', ['i', 'pause'], style = 'lines', lc = 'red')) |
| 270 | ], |
| 271 | [ |
| 272 | Set('style histogram rowstacked'), |
| 273 | Set('style data histograms'), |
| 274 | Plot(Item('Heap Size (after GC)', 'total_size_after', x1y2, |
| 275 | fs = 'solid 0.4 noborder', |
| 276 | lc = 'green'), |
| 277 | Item('Total holes (after GC)', 'holes_size_after', x1y2, |
| 278 | fs = 'solid 0.4 noborder', |
| 279 | lc = 'red'), |
| 280 | Item('GC Time', ['i', 'pause'], |
| 281 | style = 'lines', |
| 282 | lc = 'red')) |
| 283 | ], |
| 284 | [ |
| 285 | Set('style fill solid 0.5 noborder'), |
| 286 | Set('style data histograms'), |
| 287 | Plot(Item('Allocated', 'allocated'), |
| 288 | Item('Reclaimed', reclaimed_bytes), |
| 289 | Item('Promoted', 'promoted', style = 'lines', lc = 'black')) |
| 290 | ], |
| 291 | ] |
| 292 | |
Iain Merrick | 7568138 | 2010-08-19 15:07:18 +0100 | [diff] [blame] | 293 | def freduce(f, field, trace, init): |
| 294 | return reduce(lambda t,r: f(t, r[field]), trace, init) |
| 295 | |
Kristian Monsen | 50ef84f | 2010-07-29 15:18:00 +0100 | [diff] [blame] | 296 | def calc_total(trace, field): |
Ben Murdoch | 3ef787d | 2012-04-12 10:51:47 +0100 | [diff] [blame^] | 297 | return freduce(lambda t,v: t + long(v), field, trace, long(0)) |
Kristian Monsen | 50ef84f | 2010-07-29 15:18:00 +0100 | [diff] [blame] | 298 | |
| 299 | def calc_max(trace, field): |
Iain Merrick | 7568138 | 2010-08-19 15:07:18 +0100 | [diff] [blame] | 300 | return freduce(lambda t,r: max(t, r), field, trace, 0) |
| 301 | |
| 302 | def count_nonzero(trace, field): |
| 303 | return freduce(lambda t,r: t if r == 0 else t + 1, field, trace, 0) |
| 304 | |
Kristian Monsen | 50ef84f | 2010-07-29 15:18:00 +0100 | [diff] [blame] | 305 | |
Leon Clarke | f7060e2 | 2010-06-03 12:02:55 +0100 | [diff] [blame] | 306 | def process_trace(filename): |
| 307 | trace = parse_gc_trace(filename) |
Leon Clarke | f7060e2 | 2010-06-03 12:02:55 +0100 | [diff] [blame] | 308 | |
Iain Merrick | 7568138 | 2010-08-19 15:07:18 +0100 | [diff] [blame] | 309 | marksweeps = filter(lambda r: r['gc'] == 'ms', trace) |
Kristian Monsen | 50ef84f | 2010-07-29 15:18:00 +0100 | [diff] [blame] | 310 | scavenges = filter(lambda r: r['gc'] == 's', trace) |
Ben Murdoch | 3ef787d | 2012-04-12 10:51:47 +0100 | [diff] [blame^] | 311 | globalgcs = filter(lambda r: r['gc'] != 's', trace) |
| 312 | |
Kristian Monsen | 50ef84f | 2010-07-29 15:18:00 +0100 | [diff] [blame] | 313 | |
Leon Clarke | f7060e2 | 2010-06-03 12:02:55 +0100 | [diff] [blame] | 314 | charts = plot_all(plots, trace, filename) |
| 315 | |
Iain Merrick | 7568138 | 2010-08-19 15:07:18 +0100 | [diff] [blame] | 316 | def stats(out, prefix, trace, field): |
| 317 | n = len(trace) |
| 318 | total = calc_total(trace, field) |
| 319 | max = calc_max(trace, field) |
| 320 | if n > 0: |
| 321 | avg = total / n |
| 322 | else: |
| 323 | avg = 0 |
| 324 | if n > 1: |
Ben Murdoch | 3ef787d | 2012-04-12 10:51:47 +0100 | [diff] [blame^] | 325 | dev = math.sqrt(freduce(lambda t,r: t + (r - avg) ** 2, field, trace, 0) / |
Iain Merrick | 7568138 | 2010-08-19 15:07:18 +0100 | [diff] [blame] | 326 | (n - 1)) |
| 327 | else: |
| 328 | dev = 0 |
| 329 | |
| 330 | out.write('<tr><td>%s</td><td>%d</td><td>%d</td>' |
| 331 | '<td>%d</td><td>%d [dev %f]</td></tr>' % |
| 332 | (prefix, n, total, max, avg, dev)) |
| 333 | |
Ben Murdoch | 3ef787d | 2012-04-12 10:51:47 +0100 | [diff] [blame^] | 334 | def HumanReadable(size): |
| 335 | suffixes = ['bytes', 'kB', 'MB', 'GB'] |
| 336 | power = 1 |
| 337 | for i in range(len(suffixes)): |
| 338 | if size < power*1024: |
| 339 | return "%.1f" % (float(size) / power) + " " + suffixes[i] |
| 340 | power *= 1024 |
| 341 | |
| 342 | def throughput(name, trace): |
| 343 | total_live_after = calc_total(trace, 'total_size_after') |
| 344 | total_live_before = calc_total(trace, 'total_size_before') |
| 345 | total_gc = calc_total(trace, 'pause') |
| 346 | if total_gc == 0: |
| 347 | return |
| 348 | out.write('GC %s Throughput (after): %s / %s ms = %s/ms<br/>' % |
| 349 | (name, |
| 350 | HumanReadable(total_live_after), |
| 351 | total_gc, |
| 352 | HumanReadable(total_live_after / total_gc))) |
| 353 | out.write('GC %s Throughput (before): %s / %s ms = %s/ms<br/>' % |
| 354 | (name, |
| 355 | HumanReadable(total_live_before), |
| 356 | total_gc, |
| 357 | HumanReadable(total_live_before / total_gc))) |
| 358 | |
Iain Merrick | 7568138 | 2010-08-19 15:07:18 +0100 | [diff] [blame] | 359 | |
Leon Clarke | f7060e2 | 2010-06-03 12:02:55 +0100 | [diff] [blame] | 360 | with open(filename + '.html', 'w') as out: |
| 361 | out.write('<html><body>') |
Iain Merrick | 7568138 | 2010-08-19 15:07:18 +0100 | [diff] [blame] | 362 | out.write('<table>') |
| 363 | out.write('<tr><td>Phase</td><td>Count</td><td>Time (ms)</td>') |
| 364 | out.write('<td>Max</td><td>Avg</td></tr>') |
| 365 | stats(out, 'Total in GC', trace, 'pause') |
| 366 | stats(out, 'Scavenge', scavenges, 'pause') |
| 367 | stats(out, 'MarkSweep', marksweeps, 'pause') |
Iain Merrick | 7568138 | 2010-08-19 15:07:18 +0100 | [diff] [blame] | 368 | stats(out, 'Mark', filter(lambda r: r['mark'] != 0, trace), 'mark') |
| 369 | stats(out, 'Sweep', filter(lambda r: r['sweep'] != 0, trace), 'sweep') |
Ben Murdoch | 257744e | 2011-11-30 15:57:28 +0000 | [diff] [blame] | 370 | stats(out, |
| 371 | 'External', |
| 372 | filter(lambda r: r['external'] != 0, trace), |
| 373 | 'external') |
Iain Merrick | 7568138 | 2010-08-19 15:07:18 +0100 | [diff] [blame] | 374 | out.write('</table>') |
Ben Murdoch | 3ef787d | 2012-04-12 10:51:47 +0100 | [diff] [blame^] | 375 | throughput('TOTAL', trace) |
| 376 | throughput('MS', marksweeps) |
| 377 | throughput('OLDSPACE', globalgcs) |
| 378 | out.write('<br/>') |
Leon Clarke | f7060e2 | 2010-06-03 12:02:55 +0100 | [diff] [blame] | 379 | for chart in charts: |
| 380 | out.write('<img src="%s">' % chart) |
| 381 | out.write('</body></html>') |
| 382 | |
| 383 | print "%s generated." % (filename + '.html') |
| 384 | |
| 385 | if len(sys.argv) != 2: |
| 386 | print "Usage: %s <GC-trace-filename>" % sys.argv[0] |
| 387 | sys.exit(1) |
| 388 | |
| 389 | process_trace(sys.argv[1]) |