Leon Clarke | f7060e2 | 2010-06-03 12:02:55 +0100 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | # |
| 3 | # Copyright 2010 the V8 project authors. All rights reserved. |
| 4 | # Redistribution and use in source and binary forms, with or without |
| 5 | # modification, are permitted provided that the following conditions are |
| 6 | # met: |
| 7 | # |
| 8 | # * Redistributions of source code must retain the above copyright |
| 9 | # notice, this list of conditions and the following disclaimer. |
| 10 | # * Redistributions in binary form must reproduce the above |
| 11 | # copyright notice, this list of conditions and the following |
| 12 | # disclaimer in the documentation and/or other materials provided |
| 13 | # with the distribution. |
| 14 | # * Neither the name of Google Inc. nor the names of its |
| 15 | # contributors may be used to endorse or promote products derived |
| 16 | # from this software without specific prior written permission. |
| 17 | # |
| 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 19 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 20 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 21 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 22 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 23 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 24 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 25 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 26 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 27 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 29 | # |
| 30 | |
| 31 | # |
| 32 | # This is an utility for plotting charts based on GC traces produced by V8 when |
| 33 | # run with flags --trace-gc --trace-gc-nvp. Relies on gnuplot for actual |
| 34 | # plotting. |
| 35 | # |
| 36 | # Usage: gc-nvp-trace-processor.py <GC-trace-filename> |
| 37 | # |
| 38 | |
| 39 | |
| 40 | from __future__ import with_statement |
| 41 | import sys, types, re, subprocess |
| 42 | |
| 43 | def flatten(l): |
| 44 | flat = [] |
| 45 | for i in l: flat.extend(i) |
| 46 | return flat |
| 47 | |
| 48 | def split_nvp(s): |
| 49 | t = {} |
Kristian Monsen | 50ef84f | 2010-07-29 15:18:00 +0100 | [diff] [blame^] | 50 | for (name, value) in re.findall(r"(\w+)=([-\w]+)", s): |
| 51 | try: |
| 52 | t[name] = int(value) |
| 53 | except ValueError: |
| 54 | t[name] = value |
| 55 | |
Leon Clarke | f7060e2 | 2010-06-03 12:02:55 +0100 | [diff] [blame] | 56 | return t |
| 57 | |
| 58 | def parse_gc_trace(input): |
| 59 | trace = [] |
| 60 | with open(input) as f: |
| 61 | for line in f: |
| 62 | info = split_nvp(line) |
| 63 | if info and 'pause' in info and info['pause'] > 0: |
| 64 | info['i'] = len(trace) |
| 65 | trace.append(info) |
| 66 | return trace |
| 67 | |
| 68 | def extract_field_names(script): |
| 69 | fields = { 'data': true, 'in': true } |
| 70 | |
| 71 | for m in re.finditer(r"$(\w+)", script): |
| 72 | field_name = m.group(1) |
| 73 | if field_name not in fields: |
| 74 | fields[field] = field_count |
| 75 | field_count = field_count + 1 |
| 76 | |
| 77 | return fields |
| 78 | |
| 79 | def gnuplot(script): |
| 80 | gnuplot = subprocess.Popen(["gnuplot"], stdin=subprocess.PIPE) |
| 81 | gnuplot.stdin.write(script) |
| 82 | gnuplot.stdin.close() |
| 83 | gnuplot.wait() |
| 84 | |
| 85 | x1y1 = 'x1y1' |
| 86 | x1y2 = 'x1y2' |
| 87 | x2y1 = 'x2y1' |
| 88 | x2y2 = 'x2y2' |
| 89 | |
| 90 | class Item(object): |
| 91 | def __init__(self, title, field, axis = x1y1, **keywords): |
| 92 | self.title = title |
| 93 | self.axis = axis |
| 94 | self.props = keywords |
| 95 | if type(field) is types.ListType: |
| 96 | self.field = field |
| 97 | else: |
| 98 | self.field = [field] |
| 99 | |
| 100 | def fieldrefs(self): |
| 101 | return self.field |
| 102 | |
| 103 | def to_gnuplot(self, context): |
| 104 | args = ['"%s"' % context.datafile, |
| 105 | 'using %s' % context.format_fieldref(self.field), |
| 106 | 'title "%s"' % self.title, |
| 107 | 'axis %s' % self.axis] |
| 108 | if 'style' in self.props: |
| 109 | args.append('with %s' % self.props['style']) |
| 110 | if 'lc' in self.props: |
| 111 | args.append('lc rgb "%s"' % self.props['lc']) |
| 112 | if 'fs' in self.props: |
| 113 | args.append('fs %s' % self.props['fs']) |
| 114 | return ' '.join(args) |
| 115 | |
| 116 | class Plot(object): |
| 117 | def __init__(self, *items): |
| 118 | self.items = items |
| 119 | |
| 120 | def fieldrefs(self): |
| 121 | return flatten([item.fieldrefs() for item in self.items]) |
| 122 | |
| 123 | def to_gnuplot(self, ctx): |
| 124 | return 'plot ' + ', '.join([item.to_gnuplot(ctx) for item in self.items]) |
| 125 | |
| 126 | class Set(object): |
| 127 | def __init__(self, value): |
| 128 | self.value = value |
| 129 | |
| 130 | def to_gnuplot(self, ctx): |
| 131 | return 'set ' + self.value |
| 132 | |
| 133 | def fieldrefs(self): |
| 134 | return [] |
| 135 | |
| 136 | class Context(object): |
| 137 | def __init__(self, datafile, field_to_index): |
| 138 | self.datafile = datafile |
| 139 | self.field_to_index = field_to_index |
| 140 | |
| 141 | def format_fieldref(self, fieldref): |
| 142 | return ':'.join([str(self.field_to_index[field]) for field in fieldref]) |
| 143 | |
| 144 | def collect_fields(plot): |
| 145 | field_to_index = {} |
| 146 | fields = [] |
| 147 | |
| 148 | def add_field(field): |
| 149 | if field not in field_to_index: |
| 150 | fields.append(field) |
| 151 | field_to_index[field] = len(fields) |
| 152 | |
| 153 | for field in flatten([item.fieldrefs() for item in plot]): |
| 154 | add_field(field) |
| 155 | |
| 156 | return (fields, field_to_index) |
| 157 | |
| 158 | def is_y2_used(plot): |
| 159 | for subplot in plot: |
| 160 | if isinstance(subplot, Plot): |
| 161 | for item in subplot.items: |
| 162 | if item.axis == x1y2 or item.axis == x2y2: |
| 163 | return True |
| 164 | return False |
| 165 | |
| 166 | def get_field(trace_line, field): |
| 167 | t = type(field) |
| 168 | if t is types.StringType: |
| 169 | return trace_line[field] |
| 170 | elif t is types.FunctionType: |
| 171 | return field(trace_line) |
| 172 | |
| 173 | def generate_datafile(datafile_name, trace, fields): |
| 174 | with open(datafile_name, 'w') as datafile: |
| 175 | for line in trace: |
| 176 | data_line = [str(get_field(line, field)) for field in fields] |
| 177 | datafile.write('\t'.join(data_line)) |
| 178 | datafile.write('\n') |
| 179 | |
| 180 | def generate_script_and_datafile(plot, trace, datafile, output): |
| 181 | (fields, field_to_index) = collect_fields(plot) |
| 182 | generate_datafile(datafile, trace, fields) |
| 183 | script = [ |
| 184 | 'set terminal png', |
| 185 | 'set output "%s"' % output, |
| 186 | 'set autoscale', |
| 187 | 'set ytics nomirror', |
| 188 | 'set xtics nomirror', |
| 189 | 'set key below' |
| 190 | ] |
| 191 | |
| 192 | if is_y2_used(plot): |
| 193 | script.append('set autoscale y2') |
| 194 | script.append('set y2tics') |
| 195 | |
| 196 | context = Context(datafile, field_to_index) |
| 197 | |
| 198 | for item in plot: |
| 199 | script.append(item.to_gnuplot(context)) |
| 200 | |
| 201 | return '\n'.join(script) |
| 202 | |
| 203 | def plot_all(plots, trace, prefix): |
| 204 | charts = [] |
| 205 | |
| 206 | for plot in plots: |
| 207 | outfilename = "%s_%d.png" % (prefix, len(charts)) |
| 208 | charts.append(outfilename) |
| 209 | script = generate_script_and_datafile(plot, trace, '~datafile', outfilename) |
| 210 | print 'Plotting %s...' % outfilename |
| 211 | gnuplot(script) |
| 212 | |
| 213 | return charts |
| 214 | |
| 215 | def reclaimed_bytes(row): |
| 216 | return row['total_size_before'] - row['total_size_after'] |
| 217 | |
Kristian Monsen | 50ef84f | 2010-07-29 15:18:00 +0100 | [diff] [blame^] | 218 | def other_scope(r): |
| 219 | return r['pause'] - r['mark'] - r['sweep'] - r['compact'] - r['flushcode'] |
| 220 | |
Leon Clarke | f7060e2 | 2010-06-03 12:02:55 +0100 | [diff] [blame] | 221 | plots = [ |
| 222 | [ |
| 223 | Set('style fill solid 0.5 noborder'), |
| 224 | Set('style histogram rowstacked'), |
| 225 | Set('style data histograms'), |
| 226 | Plot(Item('Marking', 'mark', lc = 'purple'), |
| 227 | Item('Sweep', 'sweep', lc = 'blue'), |
| 228 | Item('Compaction', 'compact', lc = 'red'), |
Kristian Monsen | 50ef84f | 2010-07-29 15:18:00 +0100 | [diff] [blame^] | 229 | Item('Flush Code', 'flushcode', lc = 'yellow'), |
| 230 | Item('Other', other_scope, lc = 'grey')) |
Leon Clarke | f7060e2 | 2010-06-03 12:02:55 +0100 | [diff] [blame] | 231 | ], |
| 232 | [ |
| 233 | Set('style histogram rowstacked'), |
| 234 | Set('style data histograms'), |
| 235 | Plot(Item('Heap Size (before GC)', 'total_size_before', x1y2, |
| 236 | fs = 'solid 0.4 noborder', |
| 237 | lc = 'green'), |
| 238 | Item('Total holes (after GC)', 'holes_size_before', x1y2, |
| 239 | fs = 'solid 0.4 noborder', |
| 240 | lc = 'red'), |
| 241 | Item('GC Time', ['i', 'pause'], style = 'lines', lc = 'red')) |
| 242 | ], |
| 243 | [ |
| 244 | Set('style histogram rowstacked'), |
| 245 | Set('style data histograms'), |
| 246 | Plot(Item('Heap Size (after GC)', 'total_size_after', x1y2, |
| 247 | fs = 'solid 0.4 noborder', |
| 248 | lc = 'green'), |
| 249 | Item('Total holes (after GC)', 'holes_size_after', x1y2, |
| 250 | fs = 'solid 0.4 noborder', |
| 251 | lc = 'red'), |
| 252 | Item('GC Time', ['i', 'pause'], |
| 253 | style = 'lines', |
| 254 | lc = 'red')) |
| 255 | ], |
| 256 | [ |
| 257 | Set('style fill solid 0.5 noborder'), |
| 258 | Set('style data histograms'), |
| 259 | Plot(Item('Allocated', 'allocated'), |
| 260 | Item('Reclaimed', reclaimed_bytes), |
| 261 | Item('Promoted', 'promoted', style = 'lines', lc = 'black')) |
| 262 | ], |
| 263 | ] |
| 264 | |
Kristian Monsen | 50ef84f | 2010-07-29 15:18:00 +0100 | [diff] [blame^] | 265 | def calc_total(trace, field): |
| 266 | return reduce(lambda t,r: t + r[field], trace, 0) |
| 267 | |
| 268 | def calc_max(trace, field): |
| 269 | return reduce(lambda t,r: max(t, r[field]), trace, 0) |
| 270 | |
Leon Clarke | f7060e2 | 2010-06-03 12:02:55 +0100 | [diff] [blame] | 271 | def process_trace(filename): |
| 272 | trace = parse_gc_trace(filename) |
Kristian Monsen | 50ef84f | 2010-07-29 15:18:00 +0100 | [diff] [blame^] | 273 | total_gc = calc_total(trace, 'pause') |
| 274 | max_gc = calc_max(trace, 'pause') |
Leon Clarke | f7060e2 | 2010-06-03 12:02:55 +0100 | [diff] [blame] | 275 | avg_gc = total_gc / len(trace) |
| 276 | |
Kristian Monsen | 50ef84f | 2010-07-29 15:18:00 +0100 | [diff] [blame^] | 277 | total_sweep = calc_total(trace, 'sweep') |
| 278 | max_sweep = calc_max(trace, 'sweep') |
| 279 | |
| 280 | total_mark = calc_total(trace, 'mark') |
| 281 | max_mark = calc_max(trace, 'mark') |
| 282 | |
| 283 | scavenges = filter(lambda r: r['gc'] == 's', trace) |
| 284 | total_scavenge = calc_total(scavenges, 'pause') |
| 285 | max_scavenge = calc_max(scavenges, 'pause') |
| 286 | avg_scavenge = total_scavenge / len(scavenges) |
| 287 | |
Leon Clarke | f7060e2 | 2010-06-03 12:02:55 +0100 | [diff] [blame] | 288 | charts = plot_all(plots, trace, filename) |
| 289 | |
| 290 | with open(filename + '.html', 'w') as out: |
| 291 | out.write('<html><body>') |
Kristian Monsen | 50ef84f | 2010-07-29 15:18:00 +0100 | [diff] [blame^] | 292 | out.write('<table><tr><td>') |
Leon Clarke | f7060e2 | 2010-06-03 12:02:55 +0100 | [diff] [blame] | 293 | out.write('Total in GC: <b>%d</b><br/>' % total_gc) |
| 294 | out.write('Max in GC: <b>%d</b><br/>' % max_gc) |
| 295 | out.write('Avg in GC: <b>%d</b><br/>' % avg_gc) |
Kristian Monsen | 50ef84f | 2010-07-29 15:18:00 +0100 | [diff] [blame^] | 296 | out.write('</td><td>') |
| 297 | out.write('Total in Scavenge: <b>%d</b><br/>' % total_scavenge) |
| 298 | out.write('Max in Scavenge: <b>%d</b><br/>' % max_scavenge) |
| 299 | out.write('Avg in Scavenge: <b>%d</b><br/>' % avg_scavenge) |
| 300 | out.write('</td><td>') |
| 301 | out.write('Total in Sweep: <b>%d</b><br/>' % total_sweep) |
| 302 | out.write('Max in Sweep: <b>%d</b><br/>' % max_sweep) |
| 303 | out.write('</td><td>') |
| 304 | out.write('Total in Mark: <b>%d</b><br/>' % total_mark) |
| 305 | out.write('Max in Mark: <b>%d</b><br/>' % max_mark) |
| 306 | out.write('</td></tr></table>') |
Leon Clarke | f7060e2 | 2010-06-03 12:02:55 +0100 | [diff] [blame] | 307 | for chart in charts: |
| 308 | out.write('<img src="%s">' % chart) |
| 309 | out.write('</body></html>') |
| 310 | |
| 311 | print "%s generated." % (filename + '.html') |
| 312 | |
| 313 | if len(sys.argv) != 2: |
| 314 | print "Usage: %s <GC-trace-filename>" % sys.argv[0] |
| 315 | sys.exit(1) |
| 316 | |
| 317 | process_trace(sys.argv[1]) |